Skip to content

Commit 627b631

Browse files
committed
Add typing to dinov2 entrypt fns, use hf hub for mae & dinov2 weights
1 parent c9db470 commit 627b631

File tree

1 file changed

+11
-7
lines changed

1 file changed

+11
-7
lines changed

timm/models/vision_transformer.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1121,21 +1121,25 @@ def _cfg(url='', **kwargs):
11211121
# DINOv2 pretrained - https://arxiv.org/abs/2304.07193 (no classifier head, for fine-tune/features only)
11221122
'vit_small_patch14_dinov2.lvd142m': _cfg(
11231123
url='https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth',
1124+
hf_hub_id='timm/',
11241125
license='cc-by-nc-4.0',
11251126
mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, num_classes=0,
11261127
input_size=(3, 518, 518), crop_pct=1.0),
11271128
'vit_base_patch14_dinov2.lvd142m': _cfg(
11281129
url='https://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_pretrain.pth',
1130+
hf_hub_id='timm/',
11291131
license='cc-by-nc-4.0',
11301132
mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, num_classes=0,
11311133
input_size=(3, 518, 518), crop_pct=1.0),
11321134
'vit_large_patch14_dinov2.lvd142m': _cfg(
11331135
url='https://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_pretrain.pth',
1136+
hf_hub_id='timm/',
11341137
license='cc-by-nc-4.0',
11351138
mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, num_classes=0,
11361139
input_size=(3, 518, 518), crop_pct=1.0),
11371140
'vit_giant_patch14_dinov2.lvd142m': _cfg(
11381141
url='https://dl.fbaipublicfiles.com/dinov2/dinov2_vitg14/dinov2_vitg14_pretrain.pth',
1142+
hf_hub_id='timm/',
11391143
license='cc-by-nc-4.0',
11401144
mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, num_classes=0,
11411145
input_size=(3, 518, 518), crop_pct=1.0),
@@ -1416,17 +1420,17 @@ def _cfg(url='', **kwargs):
14161420

14171421
'vit_base_patch16_224.mae': _cfg(
14181422
url='https://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_base.pth',
1419-
#hf_hub_id='timm/',
1423+
hf_hub_id='timm/',
14201424
license='cc-by-nc-4.0',
14211425
mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, num_classes=0),
14221426
'vit_large_patch16_224.mae': _cfg(
14231427
url='https://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_large.pth',
1424-
# hf_hub_id='timm/',
1428+
hf_hub_id='timm/',
14251429
license='cc-by-nc-4.0',
14261430
mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, num_classes=0),
14271431
'vit_huge_patch14_224.mae': _cfg(
14281432
url='https://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_huge.pth',
1429-
# hf_hub_id='timm/',
1433+
hf_hub_id='timm/',
14301434
license='cc-by-nc-4.0',
14311435
mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, num_classes=0),
14321436
})
@@ -1970,7 +1974,7 @@ def vit_huge_patch14_xp_224(pretrained=False, **kwargs) -> VisionTransformer:
19701974

19711975

19721976
@register_model
1973-
def vit_small_patch14_dinov2(pretrained=False, **kwargs):
1977+
def vit_small_patch14_dinov2(pretrained=False, **kwargs) -> VisionTransformer:
19741978
""" ViT-S/14 for DINOv2
19751979
"""
19761980
model_args = dict(
@@ -1982,7 +1986,7 @@ def vit_small_patch14_dinov2(pretrained=False, **kwargs):
19821986

19831987

19841988
@register_model
1985-
def vit_base_patch14_dinov2(pretrained=False, **kwargs):
1989+
def vit_base_patch14_dinov2(pretrained=False, **kwargs) -> VisionTransformer:
19861990
""" ViT-B/14 for DINOv2
19871991
"""
19881992
model_args = dict(
@@ -1994,7 +1998,7 @@ def vit_base_patch14_dinov2(pretrained=False, **kwargs):
19941998

19951999

19962000
@register_model
1997-
def vit_large_patch14_dinov2(pretrained=False, **kwargs):
2001+
def vit_large_patch14_dinov2(pretrained=False, **kwargs) -> VisionTransformer:
19982002
""" ViT-L/14 for DINOv2
19992003
"""
20002004
model_args = dict(
@@ -2006,7 +2010,7 @@ def vit_large_patch14_dinov2(pretrained=False, **kwargs):
20062010

20072011

20082012
@register_model
2009-
def vit_giant_patch14_dinov2(pretrained=False, **kwargs):
2013+
def vit_giant_patch14_dinov2(pretrained=False, **kwargs) -> VisionTransformer:
20102014
""" ViT-G/14 for DINOv2
20112015
"""
20122016

0 commit comments

Comments
 (0)