提交文件

3b197eb1 · Liuyuxinict · 3b197eb1 · 3b197eb1 · 3b197eb1 · 3b197eb1
Commit 3b197eb1 authored Jul 22, 2022 by Liuyuxinict
26 changed files
--- a/.idea/PMG-Progressive-Multi-Granularity-Training-master.iml
+++ b/.idea/PMG-Progressive-Multi-Granularity-Training-master.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.6 (pytorch1.7)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="pytest" />
+  </component>
+</module>
\ No newline at end of file
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (pytorch1.7)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/PMG-Progressive-Multi-Granularity-Training-master.iml" filepath="$PROJECT_DIR$/.idea/PMG-Progressive-Multi-Granularity-Training-master.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="1de14600-5bec-46d2-972f-11687490a303" name="Default Changelist" comment="" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileTemplateManagerImpl">
+    <option name="RECENT_TEMPLATES">
+      <list>
+        <option value="Python Script" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectId" id="1hp1mgftlnNubdit1AM27vnxPWs" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showExcludedFiles" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="PropertiesComponent">
+    <property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
+    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
+    <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
+  </component>
+  <component name="RecentsManager">
+    <key name="CopyFile.RECENT_KEYS">
+      <recent name="D:\PMG" />
+    </key>
+  </component>
+  <component name="RunManager" selected="Python.train">
+    <configuration name="1" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="PMG-Progressive-Multi-Granularity-Training-master" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/1.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <configuration name="train" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="PMG-Progressive-Multi-Granularity-Training-master" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/train.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <configuration name="visualization" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="PMG-Progressive-Multi-Granularity-Training-master" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/visualization.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <recent_temporary>
+      <list>
+        <item itemvalue="Python.train" />
+        <item itemvalue="Python.visualization" />
+        <item itemvalue="Python.1" />
+      </list>
+    </recent_temporary>
+  </component>
+  <component name="SvnConfiguration">
+    <configuration />
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="1de14600-5bec-46d2-972f-11687490a303" name="Default Changelist" comment="" />
+      <created>1600693379983</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1600693379983</updated>
+    </task>
+    <servers />
+  </component>
+  <component name="WindowStateProjectService">
+    <state x="549" y="171" key="FileChooserDialogImpl" timestamp="1658372379078">
+      <screen x="0" y="0" width="1536" height="824" />
+    </state>
+    <state x="549" y="171" key="FileChooserDialogImpl/0.0.1536.824@0.0.1536.824" timestamp="1658372379078" />
+    <state width="1515" height="290" key="GridCell.Tab.0.bottom" timestamp="1658494944285">
+      <screen x="0" y="0" width="1536" height="824" />
+    </state>
+    <state width="1515" height="290" key="GridCell.Tab.0.bottom/0.0.1536.824@0.0.1536.824" timestamp="1658494944285" />
+    <state width="1515" height="290" key="GridCell.Tab.0.center" timestamp="1658494944285">
+      <screen x="0" y="0" width="1536" height="824" />
+    </state>
+    <state width="1515" height="290" key="GridCell.Tab.0.center/0.0.1536.824@0.0.1536.824" timestamp="1658494944285" />
+    <state width="1515" height="290" key="GridCell.Tab.0.left" timestamp="1658494944285">
+      <screen x="0" y="0" width="1536" height="824" />
+    </state>
+    <state width="1515" height="290" key="GridCell.Tab.0.left/0.0.1536.824@0.0.1536.824" timestamp="1658494944285" />
+    <state width="1515" height="290" key="GridCell.Tab.0.right" timestamp="1658494944285">
+      <screen x="0" y="0" width="1536" height="824" />
+    </state>
+    <state width="1515" height="290" key="GridCell.Tab.0.right/0.0.1536.824@0.0.1536.824" timestamp="1658494944285" />
+    <state width="1515" height="290" key="GridCell.Tab.1.bottom" timestamp="1658494944285">
+      <screen x="0" y="0" width="1536" height="824" />
+    </state>
+    <state width="1515" height="290" key="GridCell.Tab.1.bottom/0.0.1536.824@0.0.1536.824" timestamp="1658494944285" />
+    <state width="1515" height="290" key="GridCell.Tab.1.center" timestamp="1658494944285">
+      <screen x="0" y="0" width="1536" height="824" />
+    </state>
+    <state width="1515" height="290" key="GridCell.Tab.1.center/0.0.1536.824@0.0.1536.824" timestamp="1658494944285" />
+    <state width="1515" height="290" key="GridCell.Tab.1.left" timestamp="1658494944285">
+      <screen x="0" y="0" width="1536" height="824" />
+    </state>
+    <state width="1515" height="290" key="GridCell.Tab.1.left/0.0.1536.824@0.0.1536.824" timestamp="1658494944285" />
+    <state width="1515" height="290" key="GridCell.Tab.1.right" timestamp="1658494944285">
+      <screen x="0" y="0" width="1536" height="824" />
+    </state>
+    <state width="1515" height="290" key="GridCell.Tab.1.right/0.0.1536.824@0.0.1536.824" timestamp="1658494944285" />
+    <state x="272" y="58" key="SettingsEditor" timestamp="1658493360383">
+      <screen x="0" y="0" width="1536" height="824" />
+    </state>
+    <state x="272" y="58" key="SettingsEditor/0.0.1536.824@0.0.1536.824" timestamp="1658493360383" />
+    <state x="461" y="241" key="com.intellij.ide.util.TipDialog" timestamp="1658372307680">
+      <screen x="0" y="0" width="1536" height="824" />
+    </state>
+    <state x="461" y="241" key="com.intellij.ide.util.TipDialog/0.0.1536.824@0.0.1536.824" timestamp="1658372307680" />
+  </component>
+</project>
\ No newline at end of file
--- a/1.py
+++ b/1.py
+import numpy as np
+'''
+A= np.array([[1,1,1,1,1,1,1,1,1,1,1],
+                [-5,-4,-3,-2,-1,0,1,2,3,4,5],
+            ]).transpose()
+b= np.array([2,7,9,12,13,14,14,13,10,8,4])
+
+AA = np.dot(A.transpose(),A)
+print(AA)
+AB = np.dot(A.transpose(),b)
+print(AB)
+x = np.dot(np.linalg.inv(AA),AB)
+print(x)
+
+print(np.dot(b-np.dot(A,x),(b-np.dot(A,x).transpose())))
+#[25, 16, 9, 4, 1, 0, 1, 4, 9, 16, 25]
+
+
+A = np.array([[1,3,1,-4],
+             [-1,-3,1,0],
+             [2,6,2,-8]])
+
+AA = np.dot(A.transpose(),A)
+print(AA)
+
+print(np.linalg.matrix_rank(A))
+'''
+
+import matplotlib.pyplot as plt
+import numpy as np
+plt.rcParams['font.sans-serif']=['SimHei']
+plt.rcParams['axes.unicode_minus'] = False
+x = np.random.rand(10000)
+t = np.arange(len(x))
+# plt.plot(t, x, 'g.', label=u'均匀分布')  # 散点图
+plt.hist(x, 1000, color='m', alpha=0.6, label=u'均匀分布', normed=True)
+plt.legend(loc='upper right')
+plt.grid(True, ls=':')
+plt.show()
+
+
+
+
+
+
--- a/LICENSE
+++ b/LICENSE
+MIT License
+
+Copyright (c) 2020 PRIS-CV: Computer Vision Group
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
+
+# Progressive Region Enhancement Network (PRENet)
+ 
+Code release for Large Scale Visual Food Recognition
+ 
+### Requirement
+ 
+python 3.6
+
+PyTorch >= 1.3.1
+
+torchvision >= 0.4.2
+
+dropblock
+
+### Training
+
+1. 
+
+2. Train from scratch with ``train.py``.
+
+### Inference
+
+1. Download the pretrained model on Food2k [here](https://pan.baidu.com/s/1HMvBf0F-FpMIMPtuQtUE8Q)(Code: o0nj)
+
+### Pretrained model
+
+|  CNN   | link  |
+|  ----  | ----  |
+| resnet50  | [here](https://pan.baidu.com/s/1WY7VsCBTJt2mL9n3Gdl8Mg)(Code: 5eay) |
+| resnet101  | [here](https://pan.baidu.com/s/1mEO7KyJFHrkpB5G0Aj6oWw)(Code: yv1o) |
+| resnet152  | [here](https://pan.baidu.com/s/1-3LikXkDEvbxQur6n-FUJw)(Code: 22zw) |
+| densenet161  | [here](https://pan.baidu.com/s/1UllqjTJMAQEnGFVgzf6-nQ)(Code: bew5) |
+| inception_resnet_v2  | [here](https://pan.baidu.com/s/1_974E4eZRzKubemLIQlOHA)(Code: xa8r) |
+| senet154  | [here](https://pan.baidu.com/s/1tHpFFSm2AySRjDZ4BTtboQ)(Code: kwzf) |
+
+
+## Contact
+
+
--- a/Resnet.py
+++ b/Resnet.py
+import torch.nn as nn
+import torch
+from  torch.utils.model_zoo import load_url as load_state_dict_from_url
+
+
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152', 'resnext50_32x4d', 'resnext101_32x8d']
+
+
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(BasicBlock, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(nn.Module):
+
+    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
+                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
+                 norm_layer=None):
+        super(ResNet, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU6(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+                                       dilate=replace_stride_with_dilation[2])
+        self.inplanes = 512
+        self.layer3xx2 = self._make_layer(block, 256, layers[2], stride=2,
+                                         dilate=replace_stride_with_dilation[1])
+        self.layer4xx2 = self._make_layer(block, 512, layers[3], stride=2,
+                                       dilate=replace_stride_with_dilation[2])
+
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x1 = self.maxpool(x)
+
+        x2 = self.layer1(x1)
+        x3 = self.layer2(x2)
+
+        x4 = self.layer3(x3)
+
+        x5 = self.layer4(x4)
+
+        x4_1 = self.layer3xx2(x3)
+        x5_1 = self.layer4xx2(x4_1)
+
+        x = self.avgpool(x5_1)
+        x = x.reshape(x.size(0), -1)
+
+
+        return x1, x2, x3, x4, x5, x
+
+
+def _resnet(arch, inplanes, planes, pretrained, progress, path, **kwargs):
+    model = ResNet(inplanes, planes, **kwargs)
+    if pretrained:
+        state_dict = torch.load(path)
+        model.load_state_dict(state_dict)
+        print("load the pretrained_model")
+    return model
+
+
+def resnet18(pretrained=False, progress=True, **kwargs):
+    """Constructs a ResNet-18 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
+                   **kwargs)
+
+
+def resnet34(pretrained=False, progress=True, **kwargs):
+    """Constructs a ResNet-34 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+
+
+def resnet50(pretrained=False, progress=True, path="", **kwargs):
+    """Constructs a ResNet-50 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, path,
+                   **kwargs)
+
+
+def resnet101(pretrained=False, progress=True, **kwargs):
+    """Constructs a ResNet-101 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
+                   **kwargs)
+
+
+def resnet152(pretrained=False, progress=True, **kwargs):
+    """Constructs a ResNet-152 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
+                   **kwargs)
+
+
+def resnext50_32x4d(**kwargs):
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 4
+    return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
+                   pretrained=False, progress=True, **kwargs)
+
+
+def resnext101_32x8d(**kwargs):
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 8
+    return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
+                   pretrained=False, progress=True, **kwargs)
--- a/__pycache__/Resnet.cpython-36.pyc
+++ b/__pycache__/Resnet.cpython-36.pyc
--- a/__pycache__/Resnet.cpython-37.pyc
+++ b/__pycache__/Resnet.cpython-37.pyc
--- a/__pycache__/layer_self_attention.cpython-36.pyc
+++ b/__pycache__/layer_self_attention.cpython-36.pyc
--- a/__pycache__/layer_self_attention.cpython-37.pyc
+++ b/__pycache__/layer_self_attention.cpython-37.pyc
--- a/__pycache__/model.cpython-36.pyc
+++ b/__pycache__/model.cpython-36.pyc
--- a/__pycache__/model.cpython-37.pyc
+++ b/__pycache__/model.cpython-37.pyc
--- a/__pycache__/self_attention.cpython-36.pyc
+++ b/__pycache__/self_attention.cpython-36.pyc
--- a/__pycache__/self_attention.cpython-37.pyc
+++ b/__pycache__/self_attention.cpython-37.pyc
--- a/__pycache__/utils.cpython-36.pyc
+++ b/__pycache__/utils.cpython-36.pyc
--- a/__pycache__/utils.cpython-37.pyc
+++ b/__pycache__/utils.cpython-37.pyc
--- a/layer_self_attention.py
+++ b/layer_self_attention.py
+import torch.nn as nn
+import torch
+import torch.nn.functional as F
+
+class layer_self_attention(nn.Module):
+
+    def __init__(self, in_channels, out_channels, dk, dq, dv, Nh):
+        super(layer_self_attention, self).__init__()
+        self.Cin = in_channels
+        self.Cout = out_channels
+        self.dq = dq
+        self.dk = dk
+        self.dv = dv
+        self.Nh = Nh
+
+        self.k = int(self.dk * self.Cin)
+        self.q = int(self.dq * self.Cin)
+        self.v = int(self.dv * self.Cin)
+
+        self.q_conv = nn.Sequential(
+            nn.Conv2d(self.Cin, self.q, kernel_size=1, stride=1, padding=0),
+            #nn.BatchNorm2d(self.q,self.q)
+        )
+
+        self.kv_conv = nn.Sequential(
+            nn.Conv2d(self.Cin, self.k + self.v, kernel_size=1, stride=1, padding=0),
+            #nn.BatchNorm2d(self.k + self.v, self.k + self.v)
+        )
+
+        self.attn = nn.Conv2d(self.v, self.Cout, kernel_size=1, stride=1)
+
+    def split_heads_2d(self, x, Nh):
+        batch, channels, height, width = x.size()
+        ret_shape = (batch, Nh, channels // Nh, height, width)
+        split = torch.reshape(x, ret_shape)
+        return split
+
+    #shape of flat_q: (N, Nh, dq//Nh, H*W)
+    #shape of q:      (N, Nh, dq//Nh, H, W)
+    def layer_compute_flat_qkv(self, x, x_st, dq, dk, dv, Nh):
+        q = self.q_conv(x)
+        N, _, Hq, Wq = q.shape
+
+        kv = self.kv_conv(x_st)
+        N, _, H, W = kv.shape
+        k,v = torch.split(kv, [dk, dv], dim=1)
+
+
+        q = self.split_heads_2d(q, Nh)
+        k = self.split_heads_2d(k, Nh)
+        v = self.split_heads_2d(v, Nh)
+
+        dkh = dk // Nh
+        q *= dkh ** -0.5
+        flat_q = torch.reshape(q, (N, Nh, dq // Nh, Hq * Wq))
+        flat_k = torch.reshape(k, (N, Nh, dk // Nh, H * W))
+        flat_v = torch.reshape(v, (N, Nh, dv // Nh, H * W))
+
+        return flat_q, flat_k, flat_v, q, k, v
+
+#use inputs_st(k,v) to strength inputs(q)
+    def forward(self, inputs, inputs_st):
+        batch, N, H, W = inputs.shape
+        #print(inputs.shape)
+        flat_q, flat_k, flat_v, q, k, v = self.layer_compute_flat_qkv(inputs, inputs_st,self.q, self.k,self.v,self.Nh)
+        #print(flat_q.shape)
+        logits = torch.matmul(flat_q.transpose(2, 3), flat_k)
+        weights = F.softmax(logits, dim=1)
+        #print(weights.shape)
+        attn_out = torch.matmul(weights, flat_v.transpose(2, 3))
+        attn_out = torch.reshape(attn_out, (batch, self.Nh, self.v // self.Nh, H, W))
+        #print(attn_out.shape)
+        attn_out = torch.reshape(attn_out, (batch, self.Nh * (self.v // self.Nh), H, W))
+        #print(attn_out.shape)
+        attn_out = self.attn(attn_out)
+        #print(attn_out.shape)
+
+        return attn_out
+
+
--- a/model.py
+++ b/model.py
+import torch.nn as nn
+import torch
+import torch.nn.functional as F
+from self_attention import self_attention
+from layer_self_attention import layer_self_attention
+from dropblock import DropBlock2D
+import numpy as np
+
+class PMG(nn.Module):
+    def __init__(self, model, feature_size, classes_num):
+        super(PMG, self).__init__()
+
+        self.features = model
+
+        self.num_ftrs = 2048 * 1 * 1
+        self.elu = nn.ELU(inplace=True)
+
+        self.dk = 0.5
+        self.dq = 0.5
+        self.dv = 0.5
+        self.Nh = 8
+
+
+        self.classifier_concat = nn.Sequential(
+            nn.BatchNorm1d(1024 * 5),
+            nn.Linear(1024 * 5, feature_size),
+            nn.BatchNorm1d(feature_size),
+            nn.ELU(inplace=True),
+            nn.Linear(feature_size, classes_num),
+        )
+
+        self.conv_block0 = nn.Sequential(
+            BasicConv(self.num_ftrs // 8, feature_size, kernel_size=1, stride=1, padding=0, relu=True),
+            BasicConv(feature_size, self.num_ftrs // 2, kernel_size=3, stride=1, padding=1, relu=True)
+        )
+        self.classifier0 = nn.Sequential(
+            nn.BatchNorm1d(self.num_ftrs // 2),
+            nn.Linear(self.num_ftrs // 2, feature_size),
+            nn.BatchNorm1d(feature_size),
+            nn.ELU(inplace=True),
+            nn.Linear(feature_size, classes_num),
+        )
+
+        self.conv_block1 = nn.Sequential(
+            BasicConv(self.num_ftrs//4, feature_size, kernel_size=1, stride=1, padding=0, relu=True),
+            BasicConv(feature_size, self.num_ftrs//2, kernel_size=3, stride=1, padding=1, relu=True)
+        )
+        self.classifier1 = nn.Sequential(
+            nn.BatchNorm1d(self.num_ftrs//2),
+            nn.Linear(self.num_ftrs//2, feature_size),
+            nn.BatchNorm1d(feature_size),
+            nn.ELU(inplace=True),
+            nn.Linear(feature_size, classes_num),
+        )
+
+        self.conv_block2 = nn.Sequential(
+            BasicConv(self.num_ftrs//2, feature_size, kernel_size=1, stride=1, padding=0, relu=True),
+            BasicConv(feature_size, self.num_ftrs//2, kernel_size=3, stride=1, padding=1, relu=True)
+        )
+        self.classifier2 = nn.Sequential(
+            nn.BatchNorm1d(self.num_ftrs//2),
+            nn.Linear(self.num_ftrs//2, feature_size),
+            nn.BatchNorm1d(feature_size),
+            nn.ELU(inplace=True),
+            nn.Linear(feature_size, classes_num),
+        )
+
+        self.conv_block3 = nn.Sequential(
+            BasicConv(self.num_ftrs, feature_size, kernel_size=1, stride=1, padding=0, relu=True),
+            BasicConv(feature_size, self.num_ftrs//2, kernel_size=3, stride=1, padding=1, relu=True)
+        )
+        self.classifier3 = nn.Sequential(
+            nn.BatchNorm1d(self.num_ftrs//2),
+            nn.Linear(self.num_ftrs//2, feature_size),
+            nn.BatchNorm1d(feature_size),
+            nn.ELU(inplace=True),
+            nn.Linear(feature_size, classes_num),
+        )
+
+        self.Avgmax = nn.AdaptiveMaxPool2d(output_size=(1,1))
+
+        self.attn1_1 = self_attention(self.num_ftrs // 2,self.num_ftrs // 2, self.dk, self.dq, self.dv, self.Nh)
+        self.attn2_2 = self_attention(self.num_ftrs // 2,self.num_ftrs // 2, self.dk, self.dq, self.dv, self.Nh)
+        self.attn3_3 = self_attention(self.num_ftrs // 2,self.num_ftrs // 2, self.dk, self.dq, self.dv, self.Nh)
+
+        '''
+        self.attn1_2 = layer_self_attention(self.num_ftrs // 2,self.num_ftrs // 2, self.dk, self.dq, self.dv, self.Nh)
+        self.attn1_3 = layer_self_attention(self.num_ftrs // 2,self.num_ftrs // 2, self.dk, self.dq, self.dv, self.Nh)
+        self.attn2_3 = layer_self_attention(self.num_ftrs // 2,self.num_ftrs // 2, self.dk, self.dq, self.dv, self.Nh)
+
+        self.attn2_1 = layer_self_attention(self.num_ftrs // 2, self.num_ftrs // 2, self.dk, self.dq, self.dv, self.Nh)
+        self.attn3_1 = layer_self_attention(self.num_ftrs // 2, self.num_ftrs // 2, self.dk, self.dq, self.dv, self.Nh)
+        self.attn3_2 = layer_self_attention(self.num_ftrs // 2, self.num_ftrs // 2, self.dk, self.dq, self.dv, self.Nh)
+        '''
+
+        self.sconv1 = nn.Conv2d((self.num_ftrs // 2), self.num_ftrs // 2, kernel_size= 3, padding= 1)
+        self.sconv2 = nn.Conv2d((self.num_ftrs // 2), self.num_ftrs // 2, kernel_size= 3, padding= 1)
+        self.sconv3 = nn.Conv2d((self.num_ftrs // 2), self.num_ftrs // 2, kernel_size= 3, padding= 1)
+        self.drop_block = DropBlock2D(block_size=3, drop_prob=0.5)
+
+    def forward(self, x, label):
+        xf1, xf2, xf3, xf4, xf5, xn = self.features(x)
+        batch_size, _, _, _ = x.shape
+
+        #get feature pyramid
+        xl1 = self.conv_block1(xf3)
+        xl2 = self.conv_block2(xf4)
+        xl3 = self.conv_block3(xf5)
+
+        xk1 = self.Avgmax(xl1)
+        xk1 = xk1.view(xk1.size(0), -1)
+        xc1 = self.classifier1(xk1)
+
+        xk2 = self.Avgmax(xl2)
+        xk2 = xk2.view(xk2.size(0), -1)
+        xc2 = self.classifier2(xk2)
+
+        xk3 = self.Avgmax(xl3)
+        xk3 = xk3.view(xk3.size(0), -1)
+        xc3 = self.classifier3(xk3)
+
+
+        if label:
+            # xs1_2 means that using x2 to strength x1
+            #(batch, 1024, 56, 56)
+            xs1 = self.attn1_1(xl1)
+            #xs1_2 = self.attn1_2(xl1, xl2)
+            #xs1_3 = self.attn1_3(xl1, xl3)
+            # (batch, 1024, 28, 28)
+            xs2 = self.attn1_1(xl2)
+            #xs2_3 = self.attn2_3(xl2, xl3)
+            #xs2_1 = self.attn2_1(xl2, xl1)
+            # (batch, 1024, 14, 14)
+            xs3 = self.attn1_1(xl3)
+            #xs3_1 = self.attn2_1(xl3, xl1)
+            #xs3_2 = self.attn2_1(xl3, xl2)
+
+            #xr1 = self.drop_block(self.sconv1(torch.cat([xs1,xs1_2,xs1_3], dim=1)))
+            #xr2 = self.drop_block(self.sconv2(torch.cat([xs2,xs2_3,xs2_1], dim=1)))
+            #xr3 = self.drop_block(self.sconv3(torch.cat([xs3,xs3_1,xs3_2], dim=1)))
+            xr1 = self.drop_block(self.sconv1(xs1))
+            xr2 = self.drop_block(self.sconv2(xs2))
+            xr3 = self.drop_block(self.sconv3(xs3))
+
+            xm1 = self.Avgmax(xr1)
+            xm1 = xm1.view(xm1.size(0), -1)
+            #print(np.argmax(F.softmax(xm1, dim=1).cpu().detach().numpy(),axis=1))
+            #input()
+
+            xm2 = self.Avgmax(xr2)
+            xm2 = xm2.view(xm2.size(0), -1)
+            #print(np.argmax(F.softmax(xm2, dim=1).cpu().detach().numpy(),axis=1))
+            #input()
+
+            xm3 = self.Avgmax(xr3)
+            xm3 = xm3.view(xm3.size(0), -1)
+            #print(np.argmax(F.softmax(xm3, dim=1).cpu().detach().numpy(),axis=1))
+            #input()
+
+            x_concat = torch.cat((xm1, xm2, xm3, xn), dim=1)
+            x_concat = self.classifier_concat(x_concat)
+        else:
+            x_concat = torch.cat((xk1, xk2, xk3, xn), dim=1)
+            x_concat = self.classifier_concat(x_concat)
+
+        #get origal feature vector
+
+
+        return xk1, xk2, xk3, x_concat, xc1, xc2, xc3
+    
+    
+class BasicConv(nn.Module):
+    def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False):
+        super(BasicConv, self).__init__()
+        self.out_channels = out_planes
+        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size,
+                              stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
+        self.bn = nn.BatchNorm2d(out_planes, eps=1e-5,
+                                 momentum=0.01, affine=True) if bn else None
+        self.relu = nn.ReLU() if relu else None
+
+    def forward(self, x):
+        x = self.conv(x)
+        if self.bn is not None:
+            x = self.bn(x)
+        if self.relu is not None:
+            x = self.relu(x)
+        return x
--- a/self_attention.py
+++ b/self_attention.py
+import torch.nn as nn
+import torch
+import torch.nn.functional as F
+import numpy as np
+
+class self_attention(nn.Module):
+
+    def __init__(self, in_channels, out_channels, dk, dq, dv, Nh):
+        super(self_attention, self).__init__()
+        self.Cin = in_channels
+        self.Cout = out_channels
+        self.dq = dq
+        self.dk = dk
+        self.dv = dv
+        self.Nh = Nh
+
+        self.k = int(self.dk * self.Cin)
+        self.q = int(self.dq * self.Cin)
+        self.v = int(self.dv * self.Cin)
+
+        self.kqv_conv = nn.Sequential(
+            nn.Conv2d(self.Cin, self.k+self.q+self.v, kernel_size=1, stride=1, padding=0),
+            #nn.BatchNorm2d(self.k+self.q+self.v,self.k+self.q+self.v)
+        )
+        self.attn = nn.Conv2d(self.v, self.Cout, kernel_size=1, stride=1)
+
+    def split_heads_2d(self, x, Nh):
+        batch, channels, height, width = x.size()
+        ret_shape = (batch, Nh, channels // Nh, height, width)
+        split = torch.reshape(x, ret_shape)
+        return split
+
+    #shape of flat_q: (N, Nh, dq//Nh, H*W)
+    #shape of q:      (N, Nh, dq//Nh, H, W)
+    def compute_flat_qkv(self, x, dq, dk, dv, Nh):
+        qkv = self.kqv_conv(x)
+        N, _, H, W = qkv.size()
+        q, k, v = torch.split(qkv, [dq, dk, dv], dim=1)
+        q = self.split_heads_2d(q, Nh)
+        k = self.split_heads_2d(k, Nh)
+        v = self.split_heads_2d(v, Nh)
+
+        dkh = dk // Nh
+        q *= dkh ** -0.5
+        flat_q = torch.reshape(q, (N, Nh, dq // Nh, H * W))
+        flat_k = torch.reshape(k, (N, Nh, dk // Nh, H * W))
+        flat_v = torch.reshape(v, (N, Nh, dv // Nh, H * W))
+
+        return flat_q, flat_k, flat_v, q, k, v
+
+    def forward(self, inputs):
+        batch, N, H, W = inputs.shape
+        #print(inputs.shape)
+        flat_q, flat_k, flat_v, q, k, v = self.compute_flat_qkv(inputs, self.q, self.k,self.v,self.Nh)
+        #print(flat_q.shape)
+        logits = torch.matmul(flat_q.transpose(2, 3), flat_k)
+        weights = F.softmax(logits, dim=1)
+        #print(weights.shape)
+        #result = weights.cpu().detach().numpy()
+        #np.save("visual/matrix"+str(H), result)
+        #print(weights.shape)
+        attn_out = torch.matmul(weights, flat_v.transpose(2, 3))
+        attn_out = torch.reshape(attn_out, (batch, self.Nh, self.v // self.Nh, H, W))
+        #print(attn_out.shape)
+        attn_out = torch.reshape(attn_out, (batch, self.Nh * (self.v // self.Nh), H, W))
+        #print(attn_out.shape)
+        attn_out = self.attn(attn_out)
+        #print(attn_out.shape)
+
+        return attn_out
+
+
--- a/senet.py
+++ b/senet.py
+#coding=utf-8
+"""
+ResNet code gently borrowed from
+https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
+"""
+from __future__ import print_function, division, absolute_import
+from collections import OrderedDict
+import math
+import torch
+import torch.nn as nn
+from thop import clever_format
+from tensorboardX import SummaryWriter
+from thop import profile
+from torch.utils import model_zoo
+
+__all__ = ['SENet', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152',
+           'se_resnext50_32x4d', 'se_resnext101_32x4d']
+
+pretrained_settings = {
+    'senet154': {
+        'imagenet': {
+            'url': 'D:/DCL/senet154.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 224, 224],
+            'input_range': [0, 1],
+            'mean': [0.485, 0.456, 0.406],
+            'std': [0.229, 0.224, 0.225],
+            'num_classes': 1000
+        }
+    },
+    'se_resnet50': {
+        'imagenet': {
+            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 224, 224],
+            'input_range': [0, 1],
+            'mean': [0.485, 0.456, 0.406],
+            'std': [0.229, 0.224, 0.225],
+            'num_classes': 1000
+        }
+    },
+    'se_resnet101': {
+        'imagenet': {
+            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 224, 224],
+            'input_range': [0, 1],
+            'mean': [0.485, 0.456, 0.406],
+            'std': [0.229, 0.224, 0.225],
+            'num_classes': 1000
+        }
+    },
+    'se_resnet152': {
+        'imagenet': {
+            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 224, 224],
+            'input_range': [0, 1],
+            'mean': [0.485, 0.456, 0.406],
+            'std': [0.229, 0.224, 0.225],
+            'num_classes': 1000
+        }
+    },
+    'se_resnext50_32x4d': {
+        'imagenet': {
+            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 224, 224],
+            'input_range': [0, 1],
+            'mean': [0.485, 0.456, 0.406],
+            'std': [0.229, 0.224, 0.225],
+            'num_classes': 1000
+        }
+    },
+    'se_resnext101_32x4d': {
+        'imagenet': {
+            'url': 'D:/DCL/resnext.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 224, 224],
+            'input_range': [0, 1],
+            'mean': [0.485, 0.456, 0.406],
+            'std': [0.229, 0.224, 0.225],
+            'num_classes': 1000
+        }
+    },
+}
+
+
+class SEModule(nn.Module):
+
+    def __init__(self, channels, reduction):
+        super(SEModule, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
+                             padding=0)
+        self.relu = nn.ReLU(inplace=True)
+        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
+                             padding=0)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        module_input = x
+        x = self.avg_pool(x)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.sigmoid(x)
+        return module_input * x
+
+
+class Bottleneck(nn.Module):
+    """
+    Base class for bottlenecks that implements `forward()` method.
+    """
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out = self.se_module(out) + residual
+        out = self.relu(out)
+
+        return out
+
+
+class SEBottleneck(Bottleneck):
+    """
+    Bottleneck for SENet154.
+    """
+    expansion = 4
+
+    def __init__(self, inplanes, planes, groups, reduction, stride=1,
+                 downsample=None):
+        super(SEBottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes * 2)
+        self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3,
+                               stride=stride, padding=1, groups=groups,
+                               bias=False)
+        self.bn2 = nn.BatchNorm2d(planes * 4)
+        self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1,
+                               bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.se_module = SEModule(planes * 4, reduction=reduction)
+        self.downsample = downsample
+        self.stride = stride
+
+
+class SEResNetBottleneck(Bottleneck):
+    """
+    ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe
+    implementation and uses `stride=stride` in `conv1` and not in `conv2`
+    (the latter is used in the torchvision implementation of ResNet).
+    """
+    expansion = 4
+
+    def __init__(self, inplanes, planes, groups, reduction, stride=1,
+                 downsample=None):
+        super(SEResNetBottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False,
+                               stride=stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1,
+                               groups=groups, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.se_module = SEModule(planes * 4, reduction=reduction)
+        self.downsample = downsample
+        self.stride = stride
+
+
+class SEResNeXtBottleneck(Bottleneck):
+    """
+    ResNeXt bottleneck type C with a Squeeze-and-Excitation module.
+    """
+    expansion = 4
+
+    def __init__(self, inplanes, planes, groups, reduction, stride=1,
+                 downsample=None, base_width=4):
+        super(SEResNeXtBottleneck, self).__init__()
+        # width = math.floor(planes * (base_width / 64)) * groups
+        width = int(planes * (base_width / 64)) * groups
+        self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False,
+                               stride=1)
+        self.bn1 = nn.BatchNorm2d(width)
+        self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride,
+                               padding=1, groups=groups, bias=False)
+        self.bn2 = nn.BatchNorm2d(width)
+        self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.se_module = SEModule(planes * 4, reduction=reduction)
+        self.downsample = downsample
+        self.stride = stride
+
+
+class SENet(nn.Module):
+
+    def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
+                 inplanes=128, input_3x3=True, downsample_kernel_size=3,
+                 downsample_padding=1, num_classes=1000):
+        """
+        Parameters
+        ----------
+        block (nn.Module): Bottleneck class.
+            - For SENet154: SEBottleneck
+            - For SE-ResNet models: SEResNetBottleneck
+            - For SE-ResNeXt models:  SEResNeXtBottleneck
+        layers (list of ints): Number of residual blocks for 4 layers of the
+            network (layer1...layer4).
+        groups (int): Number of groups for the 3x3 convolution in each
+            bottleneck block.
+            - For SENet154: 64
+            - For SE-ResNet models: 1
+            - For SE-ResNeXt models:  32
+        reduction (int): Reduction ratio for Squeeze-and-Excitation modules.
+            - For all models: 16
+        dropout_p (float or None): Drop probability for the Dropout layer.
+            If `None` the Dropout layer is not used.
+            - For SENet154: 0.2
+            - For SE-ResNet models: None
+            - For SE-ResNeXt models: None
+        inplanes (int):  Number of input channels for layer1.
+            - For SENet154: 128
+            - For SE-ResNet models: 64
+            - For SE-ResNeXt models: 64
+        input_3x3 (bool): If `True`, use three 3x3 convolutions instead of
+            a single 7x7 convolution in layer0.
+            - For SENet154: True
+            - For SE-ResNet models: False
+            - For SE-ResNeXt models: False
+        downsample_kernel_size (int): Kernel size for downsampling convolutions
+            in layer2, layer3 and layer4.
+            - For SENet154: 3
+            - For SE-ResNet models: 1
+            - For SE-ResNeXt models: 1
+        downsample_padding (int): Padding for downsampling convolutions in
+            layer2, layer3 and layer4.
+            - For SENet154: 1
+            - For SE-ResNet models: 0
+            - For SE-ResNeXt models: 0
+        num_classes (int): Number of outputs in `last_linear` layer.
+            - For all models: 1000
+        """
+        super(SENet, self).__init__()
+        self.inplanes = inplanes
+        if input_3x3:
+            layer0_modules = [
+                ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,
+                                    bias=False)),
+                ('bn1', nn.BatchNorm2d(64)),
+                ('relu1', nn.ReLU(inplace=True)),                     # 从这  224 -> 112  stride =2                  
+                ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,
+                                    bias=False)),
+                ('bn2', nn.BatchNorm2d(64)),
+                ('relu2', nn.ReLU(inplace=True)),
+                ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1,
+                                    bias=False)),
+                ('bn3', nn.BatchNorm2d(inplanes)),
+                ('relu3', nn.ReLU(inplace=True)),            # 输出的是 128 * 112* 112
+            ] 
+        else:
+            layer0_modules = [
+                ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2,
+                                    padding=3, bias=False)),
+                ('bn1', nn.BatchNorm2d(inplanes)),
+                ('relu1', nn.ReLU(inplace=True)),
+            ]
+        # To preserve compatibility with Caffe weights `ceil_mode=True`
+        # is used instead of `padding=1`.
+        layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2,
+                                                    ceil_mode=True)))    #  这个 就 变成了  112 -> 56
+        self.layer0 = nn.Sequential(OrderedDict(layer0_modules))   # output 128 * 56 * 56
+        self.layer1 = self._make_layer(
+            block,
+            planes=64,
+            blocks=layers[0],
+            groups=groups,
+            reduction=reduction,
+            downsample_kernel_size=1,
+            downsample_padding=0                 # layer 1 不会降尺寸。 但是会改变通道。 所以输出是256 * 56 *56
+        )
+        self.layer2 = self._make_layer(
+            block,
+            planes=128,
+            blocks=layers[1],
+            stride=2,
+            groups=groups,
+            reduction=reduction,
+            downsample_kernel_size=downsample_kernel_size,
+            downsample_padding=downsample_padding          # layer 2  降尺寸。  因为stride =2 要进行降采样。 输出就是 512 * 28 * 28
+        ) 
+        self.layer3 = self._make_layer(
+            block,
+            planes=256,
+            blocks=layers[2],
+            stride=2,
+            groups=groups,
+            reduction=reduction,
+            downsample_kernel_size=downsample_kernel_size,
+            downsample_padding=downsample_padding  # layer 3  降尺寸。  因为stride =2 要进行降采样。 输出就是 1024 * 14 * 14
+        )
+        self.layer4 = self._make_layer(
+            block,
+            planes=512,
+            blocks=layers[3],
+            stride=2,
+            groups=groups,
+            reduction=reduction,
+            downsample_kernel_size=downsample_kernel_size,  # layer 4  降尺寸。  因为stride =2 要进行降采样。 输出就是 2048 * 7 * 7
+            downsample_padding=downsample_padding
+        )
+        self.avg_pool = nn.AvgPool2d(7, stride=1)
+        self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
+        self.last_linear = nn.Linear(512 * block.expansion, num_classes)
+
+    def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,
+                    downsample_kernel_size=1, downsample_padding=0):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=downsample_kernel_size, stride=stride,
+                          padding=downsample_padding, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, groups, reduction, stride,
+                            downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups, reduction))
+
+        return nn.Sequential(*layers)
+
+    def features(self, x):
+        print(x.shape)
+        x = self.layer0(x)
+        #print(x.shape)
+        x = self.layer1(x)
+        #print(x.shape)
+        x = self.layer2(x)
+        #print(x.shape)
+        x = self.layer3(x)
+        #print(x.shape)
+        x = self.layer4(x)
+        #print(x.shape)
+        return x
+
+    def logits(self, x):
+        x = self.avg_pool(x)
+        print(x.shape)
+        if self.dropout is not None:
+            x = self.dropout(x)
+        x = x.view(x.size(0), -1)
+        print(x.shape)
+        x = self.last_linear(x)
+        return x
+
+    def forward(self, x):
+        x = self.features(x)
+        print(x.shape)
+        x = self.logits(x)
+        return x
+
+
+def initialize_pretrained_model(model, num_classes, settings):
+    assert num_classes == settings['num_classes'], \
+        'num_classes should be {}, but is {}'.format(
+            settings['num_classes'], num_classes)
+    #model.load_state_dict(model_zoo.load_url(settings['url']))
+    model.load_state_dict(torch.load(settings['url']))
+    model.input_space = settings['input_space']
+    model.input_size = settings['input_size']
+    model.input_range = settings['input_range']
+    model.mean = settings['mean']
+    model.std = settings['std']
+
+
+def senet154(num_classes=1000, pretrained='imagenet'):
+    model = SENet(SEBottleneck, [3, 8, 36, 3], groups=64, reduction=16,
+                  dropout_p=0.2, num_classes=num_classes)
+    if pretrained is not None:
+        settings = pretrained_settings['senet154'][pretrained]
+        initialize_pretrained_model(model, num_classes, settings)
+    return model
+
+
+def se_resnet50(num_classes=1000, pretrained='imagenet'):
+    model = SENet(SEResNetBottleneck, [3, 4, 6, 3], groups=1, reduction=16,
+                  dropout_p=None, inplanes=64, input_3x3=False,
+                  downsample_kernel_size=1, downsample_padding=0,
+                  num_classes=num_classes)
+    if pretrained is not None:
+        settings = pretrained_settings['se_resnet50'][pretrained]
+        initialize_pretrained_model(model, num_classes, settings)
+    return model
+
+
+def se_resnet101(num_classes=1000, pretrained='imagenet'):
+    model = SENet(SEResNetBottleneck, [3, 4, 23, 3], groups=1, reduction=16,
+                  dropout_p=None, inplanes=64, input_3x3=False,
+                  downsample_kernel_size=1, downsample_padding=0,
+                  num_classes=num_classes)
+    if pretrained is not None:
+        settings = pretrained_settings['se_resnet101'][pretrained]
+        initialize_pretrained_model(model, num_classes, settings)
+    return model
+
+
+def se_resnet152(num_classes=1000, pretrained=None):
+    model = SENet(SEResNetBottleneck, [3, 8, 36, 3], groups=1, reduction=16,
+                  dropout_p=None, inplanes=64, input_3x3=False,
+                  downsample_kernel_size=1, downsample_padding=0,
+                  num_classes=num_classes)
+    if pretrained is not None:
+        settings = pretrained_settings['se_resnet152'][pretrained]
+        initialize_pretrained_model(model, num_classes, settings)
+    return model
+
+
+def se_resnext50_32x4d(num_classes=1000, pretrained='imagenet'):
+    model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16,
+                  dropout_p=None, inplanes=64, input_3x3=False,
+                  downsample_kernel_size=1, downsample_padding=0,
+                  num_classes=num_classes)
+    if pretrained is not None:
+        settings = pretrained_settings['se_resnext50_32x4d'][pretrained]
+        initialize_pretrained_model(model, num_classes, settings)
+    return model
+
+
+def se_resnext101_32x4d(num_classes=1000, pretrained="imagenet"):
+    model = SENet(SEResNeXtBottleneck, [3, 4, 23, 3], groups=32, reduction=16,
+                  dropout_p=None, inplanes=64, input_3x3=False,
+                  downsample_kernel_size=1, downsample_padding=0,
+                  num_classes=num_classes)
+    if pretrained is not None:
+        settings = pretrained_settings['se_resnext101_32x4d'][pretrained]
+        initialize_pretrained_model(model, num_classes, settings)
+    return model
+
+
+'''
+model = se_resnext101_32x4d()
+#model = senet154()
+#model = se_resnet152()
+
+print(model)
+input()
+model.last_linear = nn.Linear(2048 , 500)
+input = torch.randn([3,3,224,224])
+print(model(input).shape)
+
+with SummaryWriter(comment='Resnet') as w:
+    w.add_graph(model, (input, ))
+
+
+flops, params = profile(model, inputs=(input,))
+flops, params = clever_format([flops, params], "%.3f")
+print(flops)
+print(params)
+'''
\ No newline at end of file
--- a/train.py
+++ b/train.py
+from __future__ import print_function
+import os
+from PIL import Image
+import torch.utils.data as data
+import os
+import PIL
+from tqdm import tqdm
+import torch.optim as optim
+from torch.optim import lr_scheduler
+import torch.backends.cudnn as cudnn
+import re
+from utils import *
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+EPOCH         = 200            # number of times for each run-through
+BATCH_SIZE    = 2            # number of images for each epoch
+LEARNING_RATE = 0.0001             # default learning rate
+GPU_IN_USE    = torch.cuda.is_available()  # whether using GPU
+DIR_TRAIN_IMAGES   = r'E:\datasets\food101\meta_data\train_full.txt'
+#DIR_TRAIN_IMAGES   = "/home/vipl/lyx/train_full.txt"
+DIR_TEST_IMAGES    = r'E:\datasets\food101\meta_data\test_full.txt'
+#DIR_TEST_IMAGES    = "/home/vipl/lyx/test_full.txt"
+Image_path = r"E:/datasets/food101/images/"
+#Image_path = "/home/vipl/lizhuo/dataset_food/food101/images/"
+#NUM_CATEGORIES     = 500
+NUM_CATEGORIES     = 101
+#WEIGHT_PATH= '/home/vipl/lyx/resnet50.pth'
+WEIGHT_PATH = r'E:/Pretrained_model/food2k_resnet50_0.0001.pth'
+
+checkpoint = ''
+useJP = False  #use Jigsaw Patches during PMG food2k_448_from2k_only_cengnei
+usecheckpoint = False
+checkpath = "./food2k_448_from2k_only_cengnei/model.pth"
+
+useAttn = True
+
+normalize = transforms.Normalize(mean=[0.5457954,0.44430383,0.34424934],
+                                  std=[0.23273608,0.24383051,0.24237761])
+train_transforms = transforms.Compose([
+                     transforms.RandomHorizontalFlip(p=0.5), # default value is 0.5
+                     transforms.RandomRotation(degrees=15),
+                     transforms.ColorJitter(brightness=0.126,saturation=0.5),
+                     transforms.Resize((550, 550)),
+                     transforms.RandomCrop(448),
+                     transforms.ToTensor(),
+                     normalize
+                  ])
+
+# transforms of test dataset
+test_transforms = transforms.Compose([
+                    transforms.Resize((550, 550)),
+                    transforms.CenterCrop((448,448)),
+                    transforms.ToTensor(),
+                    normalize
+                  ])
+
+def My_loader(path):
+    return PIL.Image.open(path).convert('RGB')
+
+class MyDataset(torch.utils.data.Dataset):
+
+    def __init__(self, txt_dir, transform=None, target_transform=None, loader=My_loader):
+        data_txt = open(txt_dir, 'r')
+        imgs = []
+        for line in data_txt:
+            line = line.strip()
+            words = line.split(' ')
+            imgs.append((words[0], int(words[1].strip())))
+        self.imgs = imgs
+        self.transform = transform
+        self.target_transform = target_transform
+        self.loader = My_loader
+
+    def __len__(self):
+
+        return len(self.imgs)
+
+    def __getitem__(self, index):
+        img_name, label = self.imgs[index]
+        # label = list(map(int, label))
+        # print label
+        # print type(label)
+        #img = self.loader('/home/vipl/llh/food101_finetuning/food101_vgg/origal_data/images/'+img_name.replace("\\","/"))
+        img = self.loader(Image_path + img_name)
+
+        # print img
+        if self.transform is not None:
+            img = self.transform(img)
+            # print img.size()
+            # label =torch.Tensor(label)
+
+            # print label.size()
+        return img, label
+        # if the label is the single-label it can be the int
+        # if the multilabel can be the list to torch.tensor
+
+train_dataset = MyDataset(txt_dir=DIR_TRAIN_IMAGES , transform=train_transforms)
+test_dataset = MyDataset(txt_dir=DIR_TEST_IMAGES , transform=test_transforms)
+train_loader  = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0)
+test_loader   = torch.utils.data.DataLoader(dataset=test_dataset,  batch_size=BATCH_SIZE//2,  shuffle=False, num_workers=0)
+print('Data Preparation : Finished')
+
+def train(nb_epoch, trainloader, testloader, batch_size, store_name, start_epoch, net,optimizer,exp_lr_scheduler):
+    exp_dir = store_name
+    try:
+        os.stat(exp_dir)
+    except:
+        os.makedirs(exp_dir)
+
+
+    CELoss = nn.CrossEntropyLoss()
+    KLLoss = nn.KLDivLoss(reduction="batchmean")
+
+
+    max_val_acc = 0
+    #val_acc, val5_acc, _, _, val_loss = test(net, CELoss, batch_size, testloader)
+
+    for epoch in range(start_epoch, nb_epoch):
+
+        print('\nEpoch: %d' % epoch)
+        net.train()
+        train_loss = 0
+        train_loss1 = 0
+        train_loss2 = 0
+        train_loss3 = 0
+        train_loss4 = 0
+        correct = 0
+        total = 0
+        idx = 0
+        batch_idx = 0
+        u1 = 1
+        u2 = 0.5
+        for (inputs, targets) in tqdm(trainloader):
+            idx = batch_idx
+            if inputs.shape[0] < batch_size:
+                continue
+            inputs, targets = inputs.cuda(), targets.cuda()
+            inputs, targets = Variable(inputs), Variable(targets)
+
+            # Step 1
+            optimizer.zero_grad()
+            #inputs1 = jigsaw_generator(inputs, 8)
+            inputs1 = None
+            if useJP:
+                _, _, _, _, output_1, _, _ = net(inputs1,False)
+            else:
+                _, _, _, _, output_1, _, _ = net(inputs, False)
+                #print(output_1.shape)
+            loss1 = CELoss(output_1, targets) * 1
+            loss1.backward()
+            optimizer.step()
+
+            # Step 2
+            optimizer.zero_grad()
+            #inputs2 = jigsaw_generator(inputs, 4)
+            inputs2 = None
+            if useJP:
+                _, _, _, _, _, output_2, _, = net(inputs2,False)
+            else:
+                _, _, _, _, _, output_2, _, = net(inputs, False)
+                #print(output_2.shape)
+            loss2 = CELoss(output_2, targets) * 1
+            loss2.backward()
+            optimizer.step()
+
+            # Step 3
+            optimizer.zero_grad()
+            #inputs3 = jigsaw_generator(inputs, 2)
+            inputs3 = None
+            if useJP:
+                _, _, _, _, _, _, output_3 = net(inputs3,False)
+            else:
+                _, _, _, _, _, _, output_3 = net(inputs, False)
+                #print(output_3.shape)
+            loss3 = CELoss(output_3, targets) * 1
+            loss3.backward()
+            optimizer.step()
+
+
+            optimizer.zero_grad()
+            x1, x2, x3, output_concat, _, _, _ = net(inputs,useAttn)
+            concat_loss = CELoss(output_concat, targets) * 2
+
+
+            #loss4 = -KLLoss(F.softmax(x1, dim=1), F.softmax(x2, dim=1)) / batch_size
+            #loss5 = -KLLoss(F.softmax(x1, dim=1), F.softmax(x3, dim=1)) / batch_size
+            loss6 = -KLLoss(F.softmax(x2, dim=1), F.softmax(x1, dim=1))
+            #loss7 = -KLLoss(F.softmax(x2, dim=1), F.softmax(x3, dim=1)) / batch_size
+            loss8 = -KLLoss(F.softmax(x3, dim=1), F.softmax(x1, dim=1))
+            loss9 = -KLLoss(F.softmax(x3, dim=1), F.softmax(x2, dim=1))
+
+            Klloss = loss6 + loss8 + loss9
+
+            totalloss = u1 * concat_loss + u2 * Klloss
+            totalloss.backward()
+            optimizer.step()
+
+            #  training log
+            _, predicted = torch.max(output_concat.data, 1)
+            total += targets.size(0)
+            correct += predicted.eq(targets.data).cpu().sum()
+
+            train_loss += (loss1.item() + loss2.item() + loss3.item() + concat_loss.item())
+            train_loss1 += loss1.item()
+            train_loss2 += loss2.item()
+            train_loss3 += loss3.item()
+            train_loss4 += concat_loss.item()
+
+            if batch_idx % 10 == 0:
+                print(
+                    'Step: %d | Loss1: %.3f | Loss2: %.5f | Loss3: %.5f | Loss_concat: %.5f | Loss: %.3f | Acc: %.3f%% (%d/%d)' % (
+                    batch_idx, train_loss1 / (batch_idx + 1), train_loss2 / (batch_idx + 1),
+                    train_loss3 / (batch_idx + 1), train_loss4 / (batch_idx + 1), train_loss / (batch_idx + 1),
+                    100. * float(correct) / total, correct, total))
+            batch_idx += 1
+
+        exp_lr_scheduler.step()
+
+        train_acc = 100. * float(correct) / total
+        train_loss = train_loss / (idx + 1)
+        with open(exp_dir + '/results_train.txt', 'a') as file:
+            file.write(
+                'Iteration %d | train_acc = %.5f | train_loss = %.5f | Loss1: %.3f | Loss2: %.5f | Loss3: %.5f | Loss_concat: %.5f |\n' % (
+                epoch, train_acc, train_loss, train_loss1 / (idx + 1), train_loss2 / (idx + 1), train_loss3 / (idx + 1),
+                train_loss4 / (idx + 1)))
+
+        val_acc, val5_acc, val_acc_com, val5_acc_com, val_loss = test(net, CELoss, batch_size, testloader,useAttn)
+        if val_acc > max_val_acc:
+            max_val_acc = val_acc
+            torch.save(net, './' + store_name + '/model.pth')
+        with open(exp_dir + '/results_test.txt', 'a') as file:
+            file.write(
+                'Iteration %d, top1 = %.5f, top5 = %.5f, top1_combined = %.5f, top5_combined = %.5f, test_loss = %.6f\n' % (
+                    epoch, val_acc, val5_acc, val_acc_com, val5_acc_com, val_loss))
+
+net = load_model('resnet50_pmg',pretrain=False,require_grad=True,num_class=NUM_CATEGORIES)
+net.fc = nn.Linear(2048, 2000)
+state_dict = {}
+pretrained = torch.load(WEIGHT_PATH)
+
+for k, v in net.state_dict().items():
+    if k[9:] in pretrained.keys() and "fc" not in k:
+        state_dict[k] = pretrained[k[9:]]
+    elif "xx" in k and re.sub(r'xx[0-9]\.?',".", k[9:]) in pretrained.keys():
+        state_dict[k] = pretrained[re.sub(r'xx[0-9]\.?',".", k[9:])]
+    else:
+        state_dict[k] = v
+        print(k)
+
+net.load_state_dict(state_dict)
+net.fc = nn.Linear(2048,NUM_CATEGORIES)
+
+ignored_params = list(map(id, net.features.parameters()))
+new_params = filter(lambda p: id(p) not in ignored_params, net.parameters())
+optimizer = optim.SGD([
+    {'params': net.features.parameters(), 'lr': LEARNING_RATE*0.1},
+    {'params': new_params, 'lr': LEARNING_RATE}
+],
+    momentum=0.9, weight_decay=5e-4)
+
+exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.9)
+for p in optimizer.param_groups:
+    outputs = ''
+    for k, v in p.items():
+        if k is 'params':
+            outputs += (k + ': ' + str(v[0].shape).ljust(30) + ' ')
+        else:
+            outputs += (k + ': ' + str(v).ljust(10) + ' ')
+    print(outputs)
+
+cudnn.benchmark = True
+net.cuda()
+net = nn.DataParallel(net)
+
+if usecheckpoint:
+    #net.load_state_dict(torch.load(checkpath))
+    net.module.load_state_dict(torch.load(checkpath).module.state_dict())
+    print('load the checkpoint')
+
+
+train(nb_epoch=200,             # number of epoch
+         trainloader=train_loader,
+         testloader=test_loader,
+         batch_size=BATCH_SIZE,         # batch size
+         store_name='food2k_448_from2k_only_cengnei',     # folder for output
+         start_epoch=0,
+         net=net,
+        optimizer = optimizer,
+        exp_lr_scheduler=exp_lr_scheduler)         # the start epoch number when you resume the training
+
--- a/utils.py
+++ b/utils.py
+import numpy as np
+import random
+import torch
+import torchvision
+from torch.autograd import Variable
+from torchvision import transforms, models
+import torch.nn.functional as F
+from tqdm import tqdm
+from model import *
+from Resnet import *
+
+def load_model(model_name, pretrain=True, require_grad=True, num_class=1000, pretrained_model=None):
+    print('==> Building model..')
+    if model_name == 'resnet50_pmg':
+        net = resnet50(pretrained=pretrain, path=pretrained_model)
+        #for param in net.parameters():
+            #param.requires_grad = require_grad
+        net = PMG(net, 512, num_class)
+
+    return net
+
+def jigsaw_generator(images, n):
+    l = []
+    for a in range(n):
+        for b in range(n):
+            l.append([a, b])
+    block_size = 448 // n
+    rounds = n ** 2
+    random.shuffle(l)
+    jigsaws = images.clone()
+    for i in range(rounds):
+        x, y = l[i]
+        temp = jigsaws[..., 0:block_size, 0:block_size].clone()
+        jigsaws[..., 0:block_size, 0:block_size] = jigsaws[..., x * block_size:(x + 1) * block_size,
+                                                y * block_size:(y + 1) * block_size].clone()
+        jigsaws[..., x * block_size:(x + 1) * block_size, y * block_size:(y + 1) * block_size] = temp
+
+    return jigsaws
+
+
+def test(net, criterion, batch_size, testloader,useattn):
+    net.eval()
+    test_loss = 0
+    correct = 0
+    correct_com = 0
+    total = 0
+    idx = 0
+    val_corrects1 = 0
+    val_corrects2 = 0
+    val_corrects5 = 0
+
+    val_en_corrects1 = 0
+    val_en_corrects2 = 0
+    val_en_corrects5 = 0
+    batch_idx = 0
+    for (inputs, targets) in tqdm(testloader):
+        idx = batch_idx
+        with torch.no_grad():
+            inputs, targets = inputs.cuda(), targets.cuda()
+            inputs, targets = Variable(inputs), Variable(targets)
+            _, _, _, output_concat, output1, output2, output3 = net(inputs,useattn)
+            #print(np.argmax(F.softmax(output_concat, dim=1).cpu().numpy(),axis=1))
+            #input()
+            #continue
+            outputs_com = output1 + output2 + output3 + output_concat
+
+            #print(np.argmax(F.softmax(output1, dim=1).cpu().numpy(),axis=1))
+            #input()
+            loss = criterion(output_concat, targets)
+            test_loss += loss.item()
+            _, top3_pos = torch.topk(output_concat.data, 5)
+            _, top3_pos_en = torch.topk(outputs_com.data, 5)
+
+            total += targets.size(0)
+
+            batch_corrects1 = torch.sum((top3_pos[:, 0] == targets)).data.item()
+            val_corrects1 += batch_corrects1
+            batch_corrects2 = torch.sum((top3_pos[:, 1] == targets)).data.item()
+            val_corrects2 += (batch_corrects2 + batch_corrects1)
+            batch_corrects3 = torch.sum((top3_pos[:, 2] == targets)).data.item()
+            batch_corrects4 = torch.sum((top3_pos[:, 3] == targets)).data.item()
+            batch_corrects5 = torch.sum((top3_pos[:, 4] == targets)).data.item()
+            val_corrects5 += (batch_corrects5 + batch_corrects4 + batch_corrects3 + batch_corrects2 + batch_corrects1)
+
+            batch_corrects1 = torch.sum((top3_pos_en[:, 0] == targets)).data.item()
+            val_en_corrects1 += batch_corrects1
+            batch_corrects2 = torch.sum((top3_pos_en[:, 1] == targets)).data.item()
+            val_en_corrects2+= (batch_corrects2 + batch_corrects1)
+            batch_corrects3 = torch.sum((top3_pos_en[:, 2] == targets)).data.item()
+            batch_corrects4 = torch.sum((top3_pos_en[:, 3] == targets)).data.item()
+            batch_corrects5 = torch.sum((top3_pos_en[:, 4] == targets)).data.item()
+            val_en_corrects5 += (batch_corrects5 + batch_corrects4 + batch_corrects3 + batch_corrects2 + batch_corrects1)
+
+            batch_idx += 1
+    test_acc = val_corrects1 / total
+    test5_acc = val_corrects5 / total
+    test_acc_en = val_en_corrects1 / total
+    test5_acc_en = val_en_corrects5 / total
+    test_loss = test_loss / (idx + 1)
+    return test_acc, test5_acc, test_acc_en, test5_acc_en, test_loss
+    #return test_acc, test5_acc, test_loss
+
+
--- a/visualization.py
+++ b/visualization.py
+import torch
+import torch.nn as nn
+import torch.optim as optim
+
+import cv2
+import numpy as np
+import torchvision
+from torchvision import datasets
+
+# coding: utf-8
+from PIL import Image
+from torch.utils.data import Dataset
+from torchvision import transforms
+from utils import load_model
+
+check = False
+load_pretrained = False
+
+#model = ResNet_aac(fb=64, n_label = 11, model_size=152,kernel_size=3,stride=2,dk=1,dv=1, Nh=8,shape=224,relative = False)
+model = load_model('resnet50_pmg',pretrain=False,require_grad=True,num_class=101)
+model.cuda()
+model = nn.DataParallel(model)
+#model = resnet152()
+pointlist=[]
+
+if load_pretrained:
+    model_dict = model.state_dict()
+    #pretrained_dict = torch.load(args.pretrainedmodel)
+    pretrained_dict = torch.load("D:/resnet50.pth")
+    # pretrained_dict = model_zoo.load_url('https://download.pytorch.org/models/resnet50-19c8e357.pth')
+    state_dict = {}
+
+    for k, v in model_dict.items():
+        # if k in dict1.keys():
+        if k in pretrained_dict.keys() and "fc" not in k:
+            #state_dict[k] = pretrained_dict[dict1[k]]
+            state_dict[k] = pretrained_dict[k]
+
+        else:
+            state_dict[k] = v
+            print(k)
+
+if check:
+    #filename = "best_model_"
+    #checkpoint = torch.load('./checkpoint/' + filename + 'ckpt.t7')
+    #checkpoint = torch.load('unprebest.t7')
+    model.module.load_state_dict(torch.load("./food101/model.pth").module.state_dict())
+
+
+model.eval()
+test_img = "G:/images/apple_pie/116697.jpg"
+img = Image.open(test_img).convert('RGB')
+transform_test = transforms.Compose([
+            transforms.Resize(size=(299, 299)),
+            transforms.CenterCrop((224,224)),
+            transforms.ToTensor(),
+            transforms.Normalize((0.5457954,0.44430383,0.34424934), (0.23273608,0.24383051,0.24237761))
+        ])
+img1 = transform_test(img).reshape([1,3,224,224])
+model(img1,True)
+#print(model(img1,True).shape)
+
+
+def on_EVENT_LBUTTONDOWN(event, x, y, flags, param):
+    if event == cv2.EVENT_LBUTTONDOWN:
+        xy = "%d,%d" % (x, y)
+        print (xy)
+        pointlist.append((x,y))
+        cv2.circle(img, (x, y), 1, (255, 0, 0), thickness = -1)
+        cv2.putText(img, xy, (x, y), cv2.FONT_HERSHEY_PLAIN,
+                    1.0, (0,0,0), thickness = 1)
+        cv2.imshow("Image", img)
+        return
+
+
+test_img = "G:/images/apple_pie/116697.jpg"
+#显示原图
+img = cv2.imread(test_img)
+
+img = cv2.resize(img,(224,224))
+cv2.namedWindow("Image")
+cv2.setMouseCallback("Image", on_EVENT_LBUTTONDOWN)
+img_raw=img.copy()
+cv2.imshow("Image", img)
+
+cv2.waitKey(0)
+
+
+
+
+#显示normalize之后的图
+img = Image.open(test_img).convert('RGB')
+transform_test = transforms.Compose([
+            transforms.Resize(size=(224, 224)),
+            transforms.ToTensor(),
+            transforms.Normalize((0.5457954,0.44430383,0.34424934), (0.23273608,0.24383051,0.24237761))
+        ])
+img1 = transform_test(img)
+img = img1.transpose(0,1).transpose(1,2)
+img = img.numpy()
+img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+cv2.imshow("Image", img)
+cv2.waitKey (0)
+
+
+
+#显示热力图
+AAC_mat = np.load(r"D:\PMG\visual\matrix14.npy")
+depth = AAC_mat.shape[-1]
+height = int(np.sqrt(depth))
+AAC_mat = np.reshape(AAC_mat,[8,depth,depth])
+#print(AAC_mat.shape)
+
+
+
+isfirst1 = True
+imgs = None
+imgs1 = None
+for item in pointlist:
+    isfirst = True
+    x,y = item
+    x/=224/height
+    y/=224/height
+    mat = AAC_mat[:,int(x*height+y),:]
+    #print(mat.shape)
+    #for i in range(0,8):
+    result = mat
+
+    result = result.reshape([8,height,height])
+    for i in range(0,8):
+        result = mat[i:]
+        img = result
+        #img = cv2.resize(result, (224, 224))
+        #img = img*255
+
+        heatmap = img / np.max(img)
+        heatmap = np.uint8(255 * heatmap)
+        w=heatmap
+        w = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)  # 转化为jet 的colormap
+        #w = heatmap
+        w = cv2.resize(w,(224,224))
+        #x = img_raw*0.5+w*0.5   # 权重自己定
+        x =w
+        x = x.astype(np.uint8)
+        #print(x.shape)
+        if isfirst:
+            imgs = x
+            isfirst = False
+        else:
+            print(imgs.shape)
+            print(x.shape)
+            imgs = np.hstack([imgs, x])
+        #print(imgs.shape)
+
+    if isfirst1:
+        imgs1 = imgs
+        isfirst1 = False
+    else:
+        imgs1 = np.vstack([imgs1, imgs])
+
+print(imgs1.shape)
+cv2.imshow("mutil_pic", imgs1)
+cv2.waitKey(0)