Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
stoneliu1981
pytorch-image-models
提交
709d7c07
P
pytorch-image-models
项目概览
stoneliu1981
/
pytorch-image-models
与 Fork 源项目一致
从无法访问的项目Fork
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
pytorch-image-models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
709d7c07
编写于
5月 20, 2021
作者:
M
mrT23
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'rwightman:master' into master
上级
240e6677
cf4ce2f1
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
271 addition
and
39 deletion
+271
-39
inference.py
inference.py
+3
-3
tests/test_models.py
tests/test_models.py
+1
-1
timm/models/layers/__init__.py
timm/models/layers/__init__.py
+1
-1
timm/models/layers/mlp.py
timm/models/layers/mlp.py
+32
-2
timm/models/mlp_mixer.py
timm/models/mlp_mixer.py
+234
-32
未找到文件。
inference.py
浏览文件 @
709d7c07
...
...
@@ -114,13 +114,13 @@ def main():
_logger
.
info
(
'Predict: [{0}/{1}] Time {batch_time.val:.3f} ({batch_time.avg:.3f})'
.
format
(
batch_idx
,
len
(
loader
),
batch_time
=
batch_time
))
topk_ids
=
np
.
concatenate
(
topk_ids
,
axis
=
0
)
.
squeeze
()
topk_ids
=
np
.
concatenate
(
topk_ids
,
axis
=
0
)
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
'./topk_ids.csv'
),
'w'
)
as
out_file
:
filenames
=
loader
.
dataset
.
filenames
(
basename
=
True
)
for
filename
,
label
in
zip
(
filenames
,
topk_ids
):
out_file
.
write
(
'{0},{1}
,{2},{3},{4},{5}
\n
'
.
format
(
filename
,
label
[
0
],
label
[
1
],
label
[
2
],
label
[
3
],
label
[
4
]
))
out_file
.
write
(
'{0},{1}
\n
'
.
format
(
filename
,
','
.
join
([
str
(
v
)
for
v
in
label
])
))
if
__name__
==
'__main__'
:
...
...
tests/test_models.py
浏览文件 @
709d7c07
...
...
@@ -15,7 +15,7 @@ if hasattr(torch._C, '_jit_set_profiling_executor'):
torch
.
_C
.
_jit_set_profiling_mode
(
False
)
# transformer models don't support many of the spatial / feature based model functionalities
NON_STD_FILTERS
=
[
'vit_*'
,
'tnt_*'
,
'pit_*'
,
'swin_*'
,
'coat_*'
,
'cait_*'
,
'
mixer
_*'
]
NON_STD_FILTERS
=
[
'vit_*'
,
'tnt_*'
,
'pit_*'
,
'swin_*'
,
'coat_*'
,
'cait_*'
,
'
*mixer_*'
,
'gmlp_*'
,
'resmlp
_*'
]
NUM_NON_STD
=
len
(
NON_STD_FILTERS
)
# exclude models that cause specific test failures
...
...
timm/models/layers/__init__.py
浏览文件 @
709d7c07
...
...
@@ -20,7 +20,7 @@ from .helpers import to_ntuple, to_2tuple, to_3tuple, to_4tuple, make_divisible
from
.inplace_abn
import
InplaceAbn
from
.linear
import
Linear
from
.mixed_conv2d
import
MixedConv2d
from
.mlp
import
Mlp
,
GluMlp
from
.mlp
import
Mlp
,
GluMlp
,
GatedMlp
from
.norm
import
GroupNorm
from
.norm_act
import
BatchNormAct2d
,
GroupNormAct
from
.padding
import
get_padding
,
get_same_padding
,
pad_same
...
...
timm/models/layers/mlp.py
浏览文件 @
709d7c07
...
...
@@ -34,9 +34,10 @@ class GluMlp(nn.Module):
super
().
__init__
()
out_features
=
out_features
or
in_features
hidden_features
=
hidden_features
or
in_features
self
.
fc1
=
nn
.
Linear
(
in_features
,
hidden_features
*
2
)
assert
hidden_features
%
2
==
0
self
.
fc1
=
nn
.
Linear
(
in_features
,
hidden_features
)
self
.
act
=
act_layer
()
self
.
fc2
=
nn
.
Linear
(
hidden_features
,
out_features
)
self
.
fc2
=
nn
.
Linear
(
hidden_features
//
2
,
out_features
)
self
.
drop
=
nn
.
Dropout
(
drop
)
def
forward
(
self
,
x
):
...
...
@@ -47,3 +48,32 @@ class GluMlp(nn.Module):
x
=
self
.
fc2
(
x
)
x
=
self
.
drop
(
x
)
return
x
class
GatedMlp
(
nn
.
Module
):
""" MLP as used in gMLP
"""
def
__init__
(
self
,
in_features
,
hidden_features
=
None
,
out_features
=
None
,
act_layer
=
nn
.
GELU
,
gate_layer
=
None
,
drop
=
0.
):
super
().
__init__
()
out_features
=
out_features
or
in_features
hidden_features
=
hidden_features
or
in_features
self
.
fc1
=
nn
.
Linear
(
in_features
,
hidden_features
)
self
.
act
=
act_layer
()
if
gate_layer
is
not
None
:
assert
hidden_features
%
2
==
0
self
.
gate
=
gate_layer
(
hidden_features
)
hidden_features
=
hidden_features
//
2
# FIXME base reduction on gate property?
else
:
self
.
gate
=
nn
.
Identity
()
self
.
fc2
=
nn
.
Linear
(
hidden_features
,
out_features
)
self
.
drop
=
nn
.
Dropout
(
drop
)
def
forward
(
self
,
x
):
x
=
self
.
fc1
(
x
)
x
=
self
.
act
(
x
)
x
=
self
.
drop
(
x
)
x
=
self
.
gate
(
x
)
x
=
self
.
fc2
(
x
)
x
=
self
.
drop
(
x
)
return
x
timm/models/mlp_mixer.py
浏览文件 @
709d7c07
""" MLP-Mixer in PyTorch
""" MLP-Mixer, ResMLP, and gMLP in PyTorch
This impl originally based on MLP-Mixer paper.
Official JAX impl: https://github.com/google-research/vision_transformer/blob/linen/vit_jax/models_mixer.py
...
...
@@ -12,6 +14,25 @@ Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2
year={2021}
}
Also supporting preliminary (not verified) implementations of ResMlp, gMLP, and possibly more...
Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404
@misc{touvron2021resmlp,
title={ResMLP: Feedforward networks for image classification with data-efficient training},
author={Hugo Touvron and Piotr Bojanowski and Mathilde Caron and Matthieu Cord and Alaaeldin El-Nouby and
Edouard Grave and Armand Joulin and Gabriel Synnaeve and Jakob Verbeek and Hervé Jégou},
year={2021},
eprint={2105.03404},
}
Paper: `Pay Attention to MLPs` - https://arxiv.org/abs/2105.08050
@misc{liu2021pay,
title={Pay Attention to MLPs},
author={Hanxiao Liu and Zihang Dai and David R. So and Quoc V. Le},
year={2021},
eprint={2105.08050},
}
A thank you to paper authors for releasing code and weights.
Hacked together by / Copyright 2021 Ross Wightman
...
...
@@ -25,7 +46,7 @@ import torch.nn as nn
from
timm.data
import
IMAGENET_DEFAULT_MEAN
,
IMAGENET_DEFAULT_STD
from
.helpers
import
build_model_with_cfg
,
overlay_external_default_cfg
from
.layers
import
PatchEmbed
,
Mlp
,
GluMlp
,
DropPath
,
lecun_normal_
from
.layers
import
PatchEmbed
,
Mlp
,
GluMlp
,
GatedMlp
,
DropPath
,
lecun_normal_
,
to_2tuple
from
.registry
import
register_model
...
...
@@ -43,7 +64,6 @@ def _cfg(url='', **kwargs):
default_cfgs
=
dict
(
mixer_s32_224
=
_cfg
(),
mixer_s16_224
=
_cfg
(),
mixer_s16_glu_224
=
_cfg
(),
mixer_b32_224
=
_cfg
(),
mixer_b16_224
=
_cfg
(
url
=
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_mixer_b16_224-76587d61.pth'
,
...
...
@@ -60,15 +80,29 @@ default_cfgs = dict(
url
=
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_mixer_l16_224_in21k-846aa33c.pth'
,
num_classes
=
21843
),
gmixer_12_224
=
_cfg
(
mean
=
IMAGENET_DEFAULT_MEAN
,
std
=
IMAGENET_DEFAULT_STD
),
gmixer_24_224
=
_cfg
(
mean
=
IMAGENET_DEFAULT_MEAN
,
std
=
IMAGENET_DEFAULT_STD
),
resmlp_12_224
=
_cfg
(
mean
=
IMAGENET_DEFAULT_MEAN
,
std
=
IMAGENET_DEFAULT_STD
),
resmlp_24_224
=
_cfg
(
mean
=
IMAGENET_DEFAULT_MEAN
,
std
=
IMAGENET_DEFAULT_STD
),
resmlp_36_224
=
_cfg
(
mean
=
IMAGENET_DEFAULT_MEAN
,
std
=
IMAGENET_DEFAULT_STD
),
gmlp_ti16_224
=
_cfg
(),
gmlp_s16_224
=
_cfg
(),
gmlp_b16_224
=
_cfg
(),
)
class
MixerBlock
(
nn
.
Module
):
""" Residual Block w/ token mixing and channel MLPs
Based on: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
"""
def
__init__
(
self
,
dim
,
seq_len
,
tokens_dim
,
channels_dim
,
mlp_layer
=
Mlp
,
norm_layer
=
partial
(
nn
.
LayerNorm
,
eps
=
1e-6
),
act_layer
=
nn
.
GELU
,
drop
=
0.
,
drop_path
=
0.
):
self
,
dim
,
seq_len
,
mlp_ratio
=
(
0.5
,
4.0
),
mlp_layer
=
Mlp
,
norm_layer
=
partial
(
nn
.
LayerNorm
,
eps
=
1e-6
),
act_layer
=
nn
.
GELU
,
drop
=
0.
,
drop_path
=
0.
):
super
().
__init__
()
tokens_dim
,
channels_dim
=
[
int
(
x
*
dim
)
for
x
in
to_2tuple
(
mlp_ratio
)]
self
.
norm1
=
norm_layer
(
dim
)
self
.
mlp_tokens
=
mlp_layer
(
seq_len
,
tokens_dim
,
act_layer
=
act_layer
,
drop
=
drop
)
self
.
drop_path
=
DropPath
(
drop_path
)
if
drop_path
>
0.
else
nn
.
Identity
()
...
...
@@ -81,6 +115,78 @@ class MixerBlock(nn.Module):
return
x
class
Affine
(
nn
.
Module
):
def
__init__
(
self
,
dim
):
super
().
__init__
()
self
.
alpha
=
nn
.
Parameter
(
torch
.
ones
((
1
,
1
,
dim
)))
self
.
beta
=
nn
.
Parameter
(
torch
.
zeros
((
1
,
1
,
dim
)))
def
forward
(
self
,
x
):
return
torch
.
addcmul
(
self
.
beta
,
self
.
alpha
,
x
)
class
ResBlock
(
nn
.
Module
):
""" Residual MLP block w/ LayerScale and Affine 'norm'
Based on: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404
"""
def
__init__
(
self
,
dim
,
seq_len
,
mlp_ratio
=
4
,
mlp_layer
=
Mlp
,
norm_layer
=
Affine
,
act_layer
=
nn
.
GELU
,
init_values
=
1e-4
,
drop
=
0.
,
drop_path
=
0.
):
super
().
__init__
()
channel_dim
=
int
(
dim
*
mlp_ratio
)
self
.
norm1
=
norm_layer
(
dim
)
self
.
linear_tokens
=
nn
.
Linear
(
seq_len
,
seq_len
)
self
.
drop_path
=
DropPath
(
drop_path
)
if
drop_path
>
0.
else
nn
.
Identity
()
self
.
norm2
=
norm_layer
(
dim
)
self
.
mlp_channels
=
mlp_layer
(
dim
,
channel_dim
,
act_layer
=
act_layer
,
drop
=
drop
)
self
.
ls1
=
nn
.
Parameter
(
init_values
*
torch
.
ones
(
dim
))
self
.
ls2
=
nn
.
Parameter
(
init_values
*
torch
.
ones
(
dim
))
def
forward
(
self
,
x
):
x
=
x
+
self
.
drop_path
(
self
.
ls1
*
self
.
linear_tokens
(
self
.
norm1
(
x
).
transpose
(
1
,
2
)).
transpose
(
1
,
2
))
x
=
x
+
self
.
drop_path
(
self
.
ls2
*
self
.
mlp_channels
(
self
.
norm2
(
x
)))
return
x
class
SpatialGatingUnit
(
nn
.
Module
):
""" Spatial Gating Unit
Based on: `Pay Attention to MLPs` - https://arxiv.org/abs/2105.08050
"""
def
__init__
(
self
,
dim
,
seq_len
,
norm_layer
=
nn
.
LayerNorm
):
super
().
__init__
()
gate_dim
=
dim
//
2
self
.
norm
=
norm_layer
(
gate_dim
)
self
.
proj
=
nn
.
Linear
(
seq_len
,
seq_len
)
def
forward
(
self
,
x
):
u
,
v
=
x
.
chunk
(
2
,
dim
=-
1
)
v
=
self
.
norm
(
v
)
v
=
self
.
proj
(
v
.
transpose
(
-
1
,
-
2
))
return
u
*
v
.
transpose
(
-
1
,
-
2
)
class
SpatialGatingBlock
(
nn
.
Module
):
""" Residual Block w/ Spatial Gating
Based on: `Pay Attention to MLPs` - https://arxiv.org/abs/2105.08050
"""
def
__init__
(
self
,
dim
,
seq_len
,
mlp_ratio
=
4
,
mlp_layer
=
GatedMlp
,
norm_layer
=
partial
(
nn
.
LayerNorm
,
eps
=
1e-6
),
act_layer
=
nn
.
GELU
,
drop
=
0.
,
drop_path
=
0.
):
super
().
__init__
()
channel_dim
=
int
(
dim
*
mlp_ratio
)
self
.
norm
=
norm_layer
(
dim
)
sgu
=
partial
(
SpatialGatingUnit
,
seq_len
=
seq_len
)
self
.
mlp_channels
=
mlp_layer
(
dim
,
channel_dim
,
act_layer
=
act_layer
,
gate_layer
=
sgu
,
drop
=
drop
)
self
.
drop_path
=
DropPath
(
drop_path
)
if
drop_path
>
0.
else
nn
.
Identity
()
def
forward
(
self
,
x
):
x
=
x
+
self
.
drop_path
(
self
.
mlp_channels
(
self
.
norm
(
x
)))
return
x
class
MlpMixer
(
nn
.
Module
):
def
__init__
(
...
...
@@ -91,24 +197,27 @@ class MlpMixer(nn.Module):
patch_size
=
16
,
num_blocks
=
8
,
hidden_dim
=
512
,
tokens_dim
=
256
,
channels_dim
=
2048
,
mlp_ratio
=
(
0.5
,
4.0
)
,
block_layer
=
MixerBlock
,
mlp_layer
=
Mlp
,
norm_layer
=
partial
(
nn
.
LayerNorm
,
eps
=
1e-6
),
act_layer
=
nn
.
GELU
,
drop_rate
=
0.
,
drop_path_rate
=
0.
,
nlhb
=
False
,
stem_norm
=
False
,
):
super
().
__init__
()
self
.
num_classes
=
num_classes
self
.
stem
=
PatchEmbed
(
img_size
=
img_size
,
patch_size
=
patch_size
,
in_chans
=
in_chans
,
embed_dim
=
hidden_dim
)
# FIXME drop_path (stochastic depth scaling rule?)
self
.
stem
=
PatchEmbed
(
img_size
=
img_size
,
patch_size
=
patch_size
,
in_chans
=
in_chans
,
embed_dim
=
hidden_dim
,
norm_layer
=
norm_layer
if
stem_norm
else
None
)
# FIXME drop_path (stochastic depth scaling rule or all the same?)
self
.
blocks
=
nn
.
Sequential
(
*
[
MixerBlock
(
hidden_dim
,
self
.
stem
.
num_patches
,
tokens_dim
,
channels_dim
,
mlp_layer
=
mlp_layer
,
norm_layer
=
norm_layer
,
act_layer
=
act_layer
,
drop
=
drop_rate
,
drop_path
=
drop_path_rate
)
block_layer
(
hidden_dim
,
self
.
stem
.
num_patches
,
mlp_ratio
,
mlp_layer
=
mlp_layer
,
norm_layer
=
norm_layer
,
act_layer
=
act_layer
,
drop
=
drop_rate
,
drop_path
=
drop_path_rate
)
for
_
in
range
(
num_blocks
)])
self
.
norm
=
norm_layer
(
hidden_dim
)
self
.
head
=
nn
.
Linear
(
hidden_dim
,
self
.
num_classes
)
# zero init
...
...
@@ -136,6 +245,9 @@ def _init_weights(m, n: str, head_bias: float = 0.):
if
n
.
startswith
(
'head'
):
nn
.
init
.
zeros_
(
m
.
weight
)
nn
.
init
.
constant_
(
m
.
bias
,
head_bias
)
elif
n
.
endswith
(
'gate.proj'
):
nn
.
init
.
normal_
(
m
.
weight
,
std
=
1e-4
)
nn
.
init
.
ones_
(
m
.
bias
)
else
:
nn
.
init
.
xavier_uniform_
(
m
.
weight
)
if
m
.
bias
is
not
None
:
...
...
@@ -177,8 +289,9 @@ def _create_mixer(variant, pretrained=False, default_cfg=None, **kwargs):
@
register_model
def
mixer_s32_224
(
pretrained
=
False
,
**
kwargs
):
""" Mixer-S/32 224x224
Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
"""
model_args
=
dict
(
patch_size
=
32
,
num_blocks
=
8
,
hidden_dim
=
512
,
tokens_dim
=
256
,
channels_dim
=
2048
,
**
kwargs
)
model_args
=
dict
(
patch_size
=
32
,
num_blocks
=
8
,
hidden_dim
=
512
,
**
kwargs
)
model
=
_create_mixer
(
'mixer_s32_224'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
...
...
@@ -186,28 +299,19 @@ def mixer_s32_224(pretrained=False, **kwargs):
@
register_model
def
mixer_s16_224
(
pretrained
=
False
,
**
kwargs
):
""" Mixer-S/16 224x224
Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
"""
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
8
,
hidden_dim
=
512
,
tokens_dim
=
256
,
channels_dim
=
2048
,
**
kwargs
)
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
8
,
hidden_dim
=
512
,
**
kwargs
)
model
=
_create_mixer
(
'mixer_s16_224'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
@
register_model
def
mixer_s16_glu_224
(
pretrained
=
False
,
**
kwargs
):
""" Mixer-S/16 224x224
"""
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
8
,
hidden_dim
=
512
,
tokens_dim
=
256
,
channels_dim
=
1536
,
mlp_layer
=
GluMlp
,
act_layer
=
nn
.
SiLU
,
**
kwargs
)
model
=
_create_mixer
(
'mixer_s16_glu_224'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
@
register_model
def
mixer_b32_224
(
pretrained
=
False
,
**
kwargs
):
""" Mixer-B/32 224x224
Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
"""
model_args
=
dict
(
patch_size
=
32
,
num_blocks
=
12
,
hidden_dim
=
768
,
tokens_dim
=
384
,
channels_dim
=
3072
,
**
kwargs
)
model_args
=
dict
(
patch_size
=
32
,
num_blocks
=
12
,
hidden_dim
=
768
,
**
kwargs
)
model
=
_create_mixer
(
'mixer_b32_224'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
...
...
@@ -215,8 +319,9 @@ def mixer_b32_224(pretrained=False, **kwargs):
@
register_model
def
mixer_b16_224
(
pretrained
=
False
,
**
kwargs
):
""" Mixer-B/16 224x224. ImageNet-1k pretrained weights.
Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
"""
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
12
,
hidden_dim
=
768
,
tokens_dim
=
384
,
channels_dim
=
3072
,
**
kwargs
)
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
12
,
hidden_dim
=
768
,
**
kwargs
)
model
=
_create_mixer
(
'mixer_b16_224'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
...
...
@@ -224,8 +329,9 @@ def mixer_b16_224(pretrained=False, **kwargs):
@
register_model
def
mixer_b16_224_in21k
(
pretrained
=
False
,
**
kwargs
):
""" Mixer-B/16 224x224. ImageNet-21k pretrained weights.
Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
"""
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
12
,
hidden_dim
=
768
,
tokens_dim
=
384
,
channels_dim
=
3072
,
**
kwargs
)
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
12
,
hidden_dim
=
768
,
**
kwargs
)
model
=
_create_mixer
(
'mixer_b16_224_in21k'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
...
...
@@ -233,8 +339,9 @@ def mixer_b16_224_in21k(pretrained=False, **kwargs):
@
register_model
def
mixer_l32_224
(
pretrained
=
False
,
**
kwargs
):
""" Mixer-L/32 224x224.
Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
"""
model_args
=
dict
(
patch_size
=
32
,
num_blocks
=
24
,
hidden_dim
=
1024
,
tokens_dim
=
512
,
channels_dim
=
4096
,
**
kwargs
)
model_args
=
dict
(
patch_size
=
32
,
num_blocks
=
24
,
hidden_dim
=
1024
,
**
kwargs
)
model
=
_create_mixer
(
'mixer_l32_224'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
...
...
@@ -242,8 +349,9 @@ def mixer_l32_224(pretrained=False, **kwargs):
@
register_model
def
mixer_l16_224
(
pretrained
=
False
,
**
kwargs
):
""" Mixer-L/16 224x224. ImageNet-1k pretrained weights.
Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
"""
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
24
,
hidden_dim
=
1024
,
tokens_dim
=
512
,
channels_dim
=
4096
,
**
kwargs
)
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
24
,
hidden_dim
=
1024
,
**
kwargs
)
model
=
_create_mixer
(
'mixer_l16_224'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
...
...
@@ -251,7 +359,101 @@ def mixer_l16_224(pretrained=False, **kwargs):
@
register_model
def
mixer_l16_224_in21k
(
pretrained
=
False
,
**
kwargs
):
""" Mixer-L/16 224x224. ImageNet-21k pretrained weights.
Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
"""
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
24
,
hidden_dim
=
1024
,
tokens_dim
=
512
,
channels_dim
=
4096
,
**
kwargs
)
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
24
,
hidden_dim
=
1024
,
**
kwargs
)
model
=
_create_mixer
(
'mixer_l16_224_in21k'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
@
register_model
def
gmixer_12_224
(
pretrained
=
False
,
**
kwargs
):
""" Glu-Mixer-12 224x224 (short & fat)
Experiment by Ross Wightman, adding (Si)GLU to MLP-Mixer
"""
model_args
=
dict
(
patch_size
=
20
,
num_blocks
=
12
,
hidden_dim
=
512
,
mlp_ratio
=
(
1.0
,
6.0
),
mlp_layer
=
GluMlp
,
act_layer
=
nn
.
SiLU
,
**
kwargs
)
model
=
_create_mixer
(
'gmixer_12_224'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
@
register_model
def
gmixer_24_224
(
pretrained
=
False
,
**
kwargs
):
""" Glu-Mixer-24 224x224 (tall & slim)
Experiment by Ross Wightman, adding (Si)GLU to MLP-Mixer
"""
model_args
=
dict
(
patch_size
=
20
,
num_blocks
=
24
,
hidden_dim
=
384
,
mlp_ratio
=
(
1.0
,
6.0
),
mlp_layer
=
GluMlp
,
act_layer
=
nn
.
SiLU
,
**
kwargs
)
model
=
_create_mixer
(
'gmixer_24_224'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
@
register_model
def
resmlp_12_224
(
pretrained
=
False
,
**
kwargs
):
""" ResMLP-12
Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404
"""
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
12
,
hidden_dim
=
384
,
mlp_ratio
=
4
,
block_layer
=
ResBlock
,
norm_layer
=
Affine
,
**
kwargs
)
model
=
_create_mixer
(
'resmlp_12_224'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
@
register_model
def
resmlp_24_224
(
pretrained
=
False
,
**
kwargs
):
""" ResMLP-24
Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404
"""
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
24
,
hidden_dim
=
384
,
mlp_ratio
=
4
,
block_layer
=
ResBlock
,
norm_layer
=
Affine
,
**
kwargs
)
model
=
_create_mixer
(
'resmlp_24_224'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
@
register_model
def
resmlp_36_224
(
pretrained
=
False
,
**
kwargs
):
""" ResMLP-36
Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404
"""
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
36
,
hidden_dim
=
384
,
mlp_ratio
=
4
,
block_layer
=
ResBlock
,
norm_layer
=
Affine
,
**
kwargs
)
model
=
_create_mixer
(
'resmlp_36_224'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
@
register_model
def
gmlp_ti16_224
(
pretrained
=
False
,
**
kwargs
):
""" gMLP-Tiny
Paper: `Pay Attention to MLPs` - https://arxiv.org/abs/2105.08050
"""
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
30
,
hidden_dim
=
128
,
mlp_ratio
=
6
,
block_layer
=
SpatialGatingBlock
,
mlp_layer
=
GatedMlp
,
**
kwargs
)
model
=
_create_mixer
(
'gmlp_ti16_224'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
@
register_model
def
gmlp_s16_224
(
pretrained
=
False
,
**
kwargs
):
""" gMLP-Small
Paper: `Pay Attention to MLPs` - https://arxiv.org/abs/2105.08050
"""
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
30
,
hidden_dim
=
256
,
mlp_ratio
=
6
,
block_layer
=
SpatialGatingBlock
,
mlp_layer
=
GatedMlp
,
**
kwargs
)
model
=
_create_mixer
(
'gmlp_s16_224'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
@
register_model
def
gmlp_b16_224
(
pretrained
=
False
,
**
kwargs
):
""" gMLP-Base
Paper: `Pay Attention to MLPs` - https://arxiv.org/abs/2105.08050
"""
model_args
=
dict
(
patch_size
=
16
,
num_blocks
=
30
,
hidden_dim
=
512
,
mlp_ratio
=
6
,
block_layer
=
SpatialGatingBlock
,
mlp_layer
=
GatedMlp
,
**
kwargs
)
model
=
_create_mixer
(
'gmlp_b16_224'
,
pretrained
=
pretrained
,
**
model_args
)
return
model
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录