Add pretrained patch embed resizing to swin

rwightman · rwightman · commit 6bae514656dd · 2023-09-27T10:27:28.000-07:00
diff --git a/timm/models/swin_transformer.py b/timm/models/swin_transformer.py
@@ -24,7 +24,7 @@
 
 from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
 from timm.layers import PatchEmbed, Mlp, DropPath, ClassifierHead, to_2tuple, to_ntuple, trunc_normal_, \
-    _assert, use_fused_attn, resize_rel_pos_bias_table
+    _assert, use_fused_attn, resize_rel_pos_bias_table, resample_patch_embed
 from ._builder import build_model_with_cfg
 from ._features_fx import register_notrace_function
 from ._manipulate import checkpoint_seq, named_apply
@@ -632,6 +632,17 @@ def checkpoint_filter_fn(state_dict, model):
         if any([n in k for n in ('relative_position_index', 'attn_mask')]):
             continue  # skip buffers that should not be persistent
 
+        if 'patch_embed.proj.weight' in k:
+            _, _, H, W = model.patch_embed.proj.weight.shape
+            if v.shape[-2] != H or v.shape[-1] != W:
+                v = resample_patch_embed(
+                    v,
+                    (H, W),
+                    interpolation='bicubic',
+                    antialias=True,
+                    verbose=True,
+                )
+
         if k.endswith('relative_position_bias_table'):
             m = model.get_submodule(k[:-29])
             if v.shape != m.relative_position_bias_table.shape or m.window_size[0] != m.window_size[1]:
diff --git a/timm/models/swin_transformer_v2.py b/timm/models/swin_transformer_v2.py
@@ -21,7 +21,8 @@
 import torch.utils.checkpoint as checkpoint
 
 from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
-from timm.layers import PatchEmbed, Mlp, DropPath, to_2tuple, trunc_normal_, _assert, ClassifierHead
+from timm.layers import PatchEmbed, Mlp, DropPath, to_2tuple, trunc_normal_, _assert, ClassifierHead,\
+    resample_patch_embed
 from ._builder import build_model_with_cfg
 from ._features_fx import register_notrace_function
 from ._registry import generate_default_cfgs, register_model, register_model_deprecations
@@ -622,6 +623,18 @@ def checkpoint_filter_fn(state_dict, model):
     for k, v in state_dict.items():
         if any([n in k for n in ('relative_position_index', 'relative_coords_table', 'attn_mask')]):
             continue  # skip buffers that should not be persistent
+
+        if 'patch_embed.proj.weight' in k:
+            _, _, H, W = model.patch_embed.proj.weight.shape
+            if v.shape[-2] != H or v.shape[-1] != W:
+                v = resample_patch_embed(
+                    v,
+                    (H, W),
+                    interpolation='bicubic',
+                    antialias=True,
+                    verbose=True,
+                )
+
         if not native_checkpoint:
             # skip layer remapping for updated checkpoints
             k = re.sub(r'layers.(\d+).downsample', lambda x: f'layers.{int(x.group(1)) + 1}.downsample', k)