Merge pull request #353 from yoshitomo-matsubara/dev

yoshitomo-matsubara · web-flow · commit a02f1ba2c4ee · 2023-05-06T19:58:34.000-07:00
Fix a typo and replace pretrained with weights
diff --git a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/README.md b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/README.md
@@ -79,15 +79,15 @@ python3 examples/torchvision/image_classification.py \
     -test_only
 ```
 
-#### L2 (CSE + L2)
+#### L2 (CE + L2)
 ```
 python3 examples/torchvision/image_classification.py \
-    --config configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/cse_l2-resnet18_from_resnet34.yaml \
+    --config configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/ce_l2-resnet18_from_resnet34.yaml \
     -test_only
 ```
 
 #### PAD-L2 (2nd stage)
-Note that you first need to train a model with L2 (CSE + L2), and load the ckpt file designated in the following yaml file.  
+Note that you first need to train a model with L2 (CE + L2), and load the ckpt file designated in the following yaml file.  
 i.e., PAD-L2 is a two-stage training method.
 
 ```
@@ -159,19 +159,19 @@ torchrun  --nproc_per_node=${NUM_GPUS} examples/torchvision/image_classification
     --world_size ${NUM_GPUS} 
 ```
 
-#### L2 (CSE + L2)
+#### L2 (CE + L2)
 If you use fewer or more GPUs for distributed training, you should update `batch_size: 171` in `train_data_loader` entry 
 so that (batch size) * ${NUM_GPUS}  = 512. (e.g., `batch_size: 64` if you use 8 GPUs for distributed training.)  
 
 ```
 torchrun  --nproc_per_node=${NUM_GPUS} examples/torchvision/image_classification.py \
-    --config configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/cse_l2-resnet18_from_resnet34.yaml \
-    --run_log log/ilsvrc2012/cse_l2-resnet18_from_resnet34.log \
+    --config configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/ce_l2-resnet18_from_resnet34.yaml \
+    --run_log log/ilsvrc2012/ce_l2-resnet18_from_resnet34.log \
     --world_size ${NUM_GPUS} 
 ```
 
 #### PAD-L2 (2nd stage)
-Note that you first need to train a model with L2 (CSE + L2), and load the ckpt file designated in the following yaml file.  
+Note that you first need to train a model with L2 (CE + L2), and load the ckpt file designated in the following yaml file.  
 i.e., PAD-L2 is a two-stage training method.  
 
 If you use fewer or more GPUs for distributed training, you should update `batch_size: 171` in `train_data_loader` entry 
diff --git a/torchdistill/models/custom/bottleneck/detection/resnet_backbone.py b/torchdistill/models/custom/bottleneck/detection/resnet_backbone.py
@@ -96,18 +96,18 @@ def custom_resnet_fpn_backbone(backbone_key, backbone_params_config,
     prefix = 'custom_'
     start_idx = backbone_key.find(prefix) + len(prefix)
     org_backbone_key = backbone_key[start_idx:] if backbone_key.startswith(prefix) else backbone_key
+    weights = backbone_params_config.get('weights', None)
     backbone = resnet.__dict__[org_backbone_key](
-        pretrained=backbone_params_config.get('pretrained', False),
+        weights=weights,
         norm_layer=norm_layer
     )
     if layer1 is not None:
         backbone.layer1 = layer1
 
     trainable_layers = backbone_params_config.get('trainable_backbone_layers', 4)
-    # select layers that wont be frozen
+    # select layers that won't be frozen
     assert 0 <= trainable_layers <= 6
     layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'bn1', 'conv1'][:trainable_layers]
-    # freeze layers only if pretrained backbone is used
     for name, parameter in backbone.named_parameters():
         if all([not name.startswith(layer) for layer in layers_to_train]):
             parameter.requires_grad_(False)