@@ -1121,21 +1121,25 @@ def _cfg(url='', **kwargs):
11211121 # DINOv2 pretrained - https://arxiv.org/abs/2304.07193 (no classifier head, for fine-tune/features only)
11221122 'vit_small_patch14_dinov2.lvd142m' : _cfg (
11231123 url = 'https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth' ,
1124+ hf_hub_id = 'timm/' ,
11241125 license = 'cc-by-nc-4.0' ,
11251126 mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD , num_classes = 0 ,
11261127 input_size = (3 , 518 , 518 ), crop_pct = 1.0 ),
11271128 'vit_base_patch14_dinov2.lvd142m' : _cfg (
11281129 url = 'https://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_pretrain.pth' ,
1130+ hf_hub_id = 'timm/' ,
11291131 license = 'cc-by-nc-4.0' ,
11301132 mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD , num_classes = 0 ,
11311133 input_size = (3 , 518 , 518 ), crop_pct = 1.0 ),
11321134 'vit_large_patch14_dinov2.lvd142m' : _cfg (
11331135 url = 'https://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_pretrain.pth' ,
1136+ hf_hub_id = 'timm/' ,
11341137 license = 'cc-by-nc-4.0' ,
11351138 mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD , num_classes = 0 ,
11361139 input_size = (3 , 518 , 518 ), crop_pct = 1.0 ),
11371140 'vit_giant_patch14_dinov2.lvd142m' : _cfg (
11381141 url = 'https://dl.fbaipublicfiles.com/dinov2/dinov2_vitg14/dinov2_vitg14_pretrain.pth' ,
1142+ hf_hub_id = 'timm/' ,
11391143 license = 'cc-by-nc-4.0' ,
11401144 mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD , num_classes = 0 ,
11411145 input_size = (3 , 518 , 518 ), crop_pct = 1.0 ),
@@ -1416,17 +1420,17 @@ def _cfg(url='', **kwargs):
14161420
14171421 'vit_base_patch16_224.mae' : _cfg (
14181422 url = 'https://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_base.pth' ,
1419- # hf_hub_id='timm/',
1423+ hf_hub_id = 'timm/' ,
14201424 license = 'cc-by-nc-4.0' ,
14211425 mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD , num_classes = 0 ),
14221426 'vit_large_patch16_224.mae' : _cfg (
14231427 url = 'https://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_large.pth' ,
1424- # hf_hub_id='timm/',
1428+ hf_hub_id = 'timm/' ,
14251429 license = 'cc-by-nc-4.0' ,
14261430 mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD , num_classes = 0 ),
14271431 'vit_huge_patch14_224.mae' : _cfg (
14281432 url = 'https://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_huge.pth' ,
1429- # hf_hub_id='timm/',
1433+ hf_hub_id = 'timm/' ,
14301434 license = 'cc-by-nc-4.0' ,
14311435 mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD , num_classes = 0 ),
14321436})
@@ -1970,7 +1974,7 @@ def vit_huge_patch14_xp_224(pretrained=False, **kwargs) -> VisionTransformer:
19701974
19711975
19721976@register_model
1973- def vit_small_patch14_dinov2 (pretrained = False , ** kwargs ):
1977+ def vit_small_patch14_dinov2 (pretrained = False , ** kwargs ) -> VisionTransformer :
19741978 """ ViT-S/14 for DINOv2
19751979 """
19761980 model_args = dict (
@@ -1982,7 +1986,7 @@ def vit_small_patch14_dinov2(pretrained=False, **kwargs):
19821986
19831987
19841988@register_model
1985- def vit_base_patch14_dinov2 (pretrained = False , ** kwargs ):
1989+ def vit_base_patch14_dinov2 (pretrained = False , ** kwargs ) -> VisionTransformer :
19861990 """ ViT-B/14 for DINOv2
19871991 """
19881992 model_args = dict (
@@ -1994,7 +1998,7 @@ def vit_base_patch14_dinov2(pretrained=False, **kwargs):
19941998
19951999
19962000@register_model
1997- def vit_large_patch14_dinov2 (pretrained = False , ** kwargs ):
2001+ def vit_large_patch14_dinov2 (pretrained = False , ** kwargs ) -> VisionTransformer :
19982002 """ ViT-L/14 for DINOv2
19992003 """
20002004 model_args = dict (
@@ -2006,7 +2010,7 @@ def vit_large_patch14_dinov2(pretrained=False, **kwargs):
20062010
20072011
20082012@register_model
2009- def vit_giant_patch14_dinov2 (pretrained = False , ** kwargs ):
2013+ def vit_giant_patch14_dinov2 (pretrained = False , ** kwargs ) -> VisionTransformer :
20102014 """ ViT-G/14 for DINOv2
20112015 """
20122016
0 commit comments