Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
曾经的那一瞬间
Models
提交
21158b40
M
Models
项目概览
曾经的那一瞬间
/
Models
9 个月 前同步成功
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
Models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
21158b40
编写于
8月 21, 2023
作者:
H
Hongkun Yu
提交者:
A. Unique TensorFlower
8月 21, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
No public description
PiperOrigin-RevId: 558890434
上级
8bbb4841
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
0 addition
and
1291 deletion
+0
-1291
official/projects/videoglue/configs/backbones_3d_test.py
official/projects/videoglue/configs/backbones_3d_test.py
+0
-30
official/projects/videoglue/configs/dataset_test.py
official/projects/videoglue/configs/dataset_test.py
+0
-37
official/projects/videoglue/configs/head_test.py
official/projects/videoglue/configs/head_test.py
+0
-41
official/projects/videoglue/configs/spatiotemporal_action_localization_test.py
...eoglue/configs/spatiotemporal_action_localization_test.py
+0
-57
official/projects/videoglue/configs/video_classification_test.py
...l/projects/videoglue/configs/video_classification_test.py
+0
-35
official/projects/videoglue/datasets/action_localization_test.py
...l/projects/videoglue/datasets/action_localization_test.py
+0
-161
official/projects/videoglue/datasets/video_classification_test.py
.../projects/videoglue/datasets/video_classification_test.py
+0
-144
official/projects/videoglue/evaluation/spatiotemporal_action_localization_evaluator_test.py
...tion/spatiotemporal_action_localization_evaluator_test.py
+0
-84
official/projects/videoglue/modeling/backbones/vit_3d_test.py
...cial/projects/videoglue/modeling/backbones/vit_3d_test.py
+0
-99
official/projects/videoglue/modeling/heads/action_transformer_test.py
...jects/videoglue/modeling/heads/action_transformer_test.py
+0
-73
official/projects/videoglue/modeling/heads/simple_test.py
official/projects/videoglue/modeling/heads/simple_test.py
+0
-62
official/projects/videoglue/modeling/heads/transformer_decoder_test.py
...ects/videoglue/modeling/heads/transformer_decoder_test.py
+0
-85
official/projects/videoglue/modeling/video_action_transformer_model_test.py
...videoglue/modeling/video_action_transformer_model_test.py
+0
-41
official/projects/videoglue/modeling/video_classification_model_test.py
...cts/videoglue/modeling/video_classification_model_test.py
+0
-139
official/projects/videoglue/tasks/multihead_video_classification_test.py
...ts/videoglue/tasks/multihead_video_classification_test.py
+0
-140
official/projects/videoglue/tasks/spatiotemporal_action_localization_test.py
...ideoglue/tasks/spatiotemporal_action_localization_test.py
+0
-63
未找到文件。
official/projects/videoglue/configs/backbones_3d_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for backbones_3d."""
import
tensorflow
as
tf
from
official.projects.videoglue.configs
import
backbones_3d
class
Backbones3DTest
(
tf
.
test
.
TestCase
):
def
test_vit_3d
(
self
):
config
=
backbones_3d
.
Backbone3D
(
type
=
'vit_3d'
,
vit_3d
=
backbones_3d
.
VisionTransformer3D
())
config
.
validate
()
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/videoglue/configs/dataset_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for dataset."""
import
tensorflow
as
tf
from
official.projects.videoglue.configs
import
dataset
from
official.vision.configs
import
common
as
common_cfg
class
DatasetTest
(
tf
.
test
.
TestCase
):
def
test_dataset_valid
(
self
):
config
=
dataset
.
DataConfig
(
name
=
'dummy_set'
,
data_augmentation
=
dataset
.
DataAugmentation
(
type
=
'ava'
,
ava
=
dataset
.
AVA
(
scale_min
=
0.1
,
scale_max
=
1.0
)),
feature_shape
=
(
1
,
2
,
3
,
4
),
autoaug
=
common_cfg
.
AutoAugment
(),
randaug
=
common_cfg
.
RandAugment
(),
mixup_cutmix
=
common_cfg
.
MixupAndCutmix
())
config
.
validate
()
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/videoglue/configs/head_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for head."""
import
tensorflow
as
tf
from
official.projects.videoglue.configs
import
head
as
head_cfg
class
HeadTest
(
tf
.
test
.
TestCase
):
def
test_mlp_head_valid
(
self
):
config
=
head_cfg
.
MLP
(
num_hidden_channels
=
128
,
num_hidden_layers
=
4
,
num_output_channels
=
1280
,
use_sync_bn
=
True
,
norm_momentum
=
0.99
,
norm_epsilon
=
1e-5
,
activation
=
'relu'
)
config
.
validate
()
def
test_action_transformer_head_valid
(
self
):
config
=
head_cfg
.
ActionTransformer
(
activation
=
'relu'
,
tx_activation
=
'relu'
)
config
.
validate
()
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/videoglue/configs/spatiotemporal_action_localization_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for spatiotemporal_action_localization."""
import
tensorflow
as
tf
from
official.projects.videoglue.configs
import
spatiotemporal_action_localization
as
stal
class
SpatiotemporalActionLocalizationTest
(
tf
.
test
.
TestCase
):
def
test_spatiotemporal_action_localization_config
(
self
):
config
=
(
stal
.
spatiotemporal_action_localization
())
self
.
assertIsInstance
(
config
.
task
,
stal
.
SpatiotemporalActionLocalizationTask
)
self
.
assertIsInstance
(
config
.
task
.
model
,
stal
.
VideoActionTransformerModel
)
def
test_spatiotemporal_action_localization_vit12_config
(
self
):
config
=
(
stal
.
spatiotemporal_action_localization_vit12
())
self
.
assertIsInstance
(
config
.
task
,
stal
.
SpatiotemporalActionLocalizationTask
)
self
.
assertEqual
(
config
.
trainer
.
optimizer_config
.
optimizer
.
type
,
'vit_adamw'
)
def
test_spatiotemporal_action_localization_vit16_config
(
self
):
config
=
(
stal
.
spatiotemporal_action_localization_vit16
())
self
.
assertIsInstance
(
config
.
task
,
stal
.
SpatiotemporalActionLocalizationTask
)
self
.
assertEqual
(
config
.
trainer
.
optimizer_config
.
optimizer
.
type
,
'vit_adamw'
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/videoglue/configs/video_classification_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for video_classification."""
import
tensorflow
as
tf
from
official.projects.videoglue.configs
import
video_classification
as
cfg
class
VideoCoarseClassificationTest
(
tf
.
test
.
TestCase
):
def
test_video_classification_config
(
self
):
config
=
cfg
.
mh_video_classification
()
self
.
assertIsInstance
(
config
.
task
,
cfg
.
MultiHeadVideoClassificationTask
)
self
.
assertIsInstance
(
config
.
task
.
model
,
cfg
.
MultiHeadVideoClassificationModel
)
config
.
task
.
train_data
.
is_training
=
None
with
self
.
assertRaises
(
KeyError
):
config
.
validate
()
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/videoglue/datasets/action_localization_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for action_localization data loader."""
import
io
from
absl
import
logging
import
numpy
as
np
from
PIL
import
Image
import
tensorflow
as
tf
from
official.projects.videoglue.datasets
import
action_localization
IMAGE_KEY
=
'image/encoded'
KEYFRAME_INDEX
=
'clip/key_frame/frame_index'
KEYFRAME_BOX_PREFIX
=
'clip/key_frame/bbox'
DETECTED_BOX_PREFIX
=
'centernet/bbox'
TFR_PATH
=
'/tmp/example.tfrecord'
def
create_fake_tfse_sstable
():
"""Creates fake data."""
random_image
=
np
.
random
.
randint
(
0
,
256
,
size
=
(
263
,
320
,
3
),
dtype
=
np
.
uint8
)
random_image
=
Image
.
fromarray
(
random_image
)
with
io
.
BytesIO
()
as
buffer
:
random_image
.
save
(
buffer
,
format
=
'JPEG'
)
raw_image_bytes
=
buffer
.
getvalue
()
num_frames
=
4
tfse
=
tf
.
train
.
SequenceExample
()
# keyframe index
tfse
.
context
.
feature
.
get_or_create
(
KEYFRAME_INDEX
).
int64_list
.
value
[:]
=
[
2
]
# keyframe boxes
tfse
.
context
.
feature
.
get_or_create
(
f
'
{
KEYFRAME_BOX_PREFIX
}
/ymin'
).
float_list
.
value
[:]
=
[
0.0
,
0.1
,
0.2
,
0.2
]
tfse
.
context
.
feature
.
get_or_create
(
f
'
{
KEYFRAME_BOX_PREFIX
}
/xmin'
).
float_list
.
value
[:]
=
[
0.0
,
0.1
,
0.2
,
0.2
]
tfse
.
context
.
feature
.
get_or_create
(
f
'
{
KEYFRAME_BOX_PREFIX
}
/ymax'
).
float_list
.
value
[:]
=
[
0.5
,
0.6
,
0.7
,
0.7
]
tfse
.
context
.
feature
.
get_or_create
(
f
'
{
KEYFRAME_BOX_PREFIX
}
/xmax'
).
float_list
.
value
[:]
=
[
0.5
,
0.6
,
0.7
,
0.7
]
tfse
.
context
.
feature
.
get_or_create
(
f
'
{
KEYFRAME_BOX_PREFIX
}
/score'
).
float_list
.
value
[:]
=
[
1.0
,
1.0
,
1.0
,
1.0
]
# boxes labels
tfse
.
context
.
feature
.
get_or_create
(
f
'
{
KEYFRAME_BOX_PREFIX
}
/label/index'
).
int64_list
.
value
[:]
=
[
0
,
1
,
2
,
3
]
tfse
.
context
.
feature
.
get_or_create
(
f
'
{
KEYFRAME_BOX_PREFIX
}
/label/string'
).
bytes_list
.
value
[:]
=
[
b
'0'
,
b
'1'
,
b
'2'
,
b
'3'
,
]
for
i
in
range
(
num_frames
):
# image
tfse
.
feature_lists
.
feature_list
.
get_or_create
(
IMAGE_KEY
).
feature
.
add
().
bytes_list
.
value
[:]
=
[
raw_image_bytes
]
# detected boxes.
tfse
.
feature_lists
.
feature_list
.
get_or_create
(
f
'
{
DETECTED_BOX_PREFIX
}
/ymin'
).
feature
.
add
().
float_list
.
value
[:]
=
[
0.0
,
0.1
,
0.2
]
tfse
.
feature_lists
.
feature_list
.
get_or_create
(
f
'
{
DETECTED_BOX_PREFIX
}
/xmin'
).
feature
.
add
().
float_list
.
value
[:]
=
[
0.0
,
0.1
,
0.2
]
tfse
.
feature_lists
.
feature_list
.
get_or_create
(
f
'
{
DETECTED_BOX_PREFIX
}
/ymax'
).
feature
.
add
().
float_list
.
value
[:]
=
[
0.5
,
0.6
,
0.7
]
tfse
.
feature_lists
.
feature_list
.
get_or_create
(
f
'
{
DETECTED_BOX_PREFIX
}
/xmax'
).
feature
.
add
().
float_list
.
value
[:]
=
[
0.5
,
0.6
,
0.7
]
tfse
.
feature_lists
.
feature_list
.
get_or_create
(
f
'
{
DETECTED_BOX_PREFIX
}
/score'
).
feature
.
add
().
float_list
.
value
[:]
=
[
0.91
,
0.91
,
0.1
*
i
]
writer
=
tf
.
io
.
TFRecordWriter
(
TFR_PATH
)
writer
.
write
(
tfse
.
SerializeToString
())
logging
.
info
(
'Writing tfrecord table: %s'
,
TFR_PATH
)
writer
.
close
()
class
ActionLocalizationTest
(
tf
.
test
.
TestCase
):
def
test_create_action_localization_dataset
(
self
):
create_fake_tfse_sstable
()
dataset_cls
=
action_localization
.
ActionLocalizationBaseFactory
(
subset
=
'train'
)
dataset_cls
.
_NUM_CLASSES
=
5
dataset_cls
.
_ZERO_BASED_INDEX
=
True
configs
=
{
'is_training'
:
False
,
'num_frames'
:
4
,
'temporal_stride'
:
1
,
'num_instance_per_frame'
:
5
,
'one_hot_label'
:
True
,
'merge_multi_labels'
:
True
,
'import_detected_bboxes'
:
True
,
'augmentation_type'
:
'ava'
,
'augmentation_params'
:
{
'scale_min'
:
0.0
,
'scale_max'
:
0.0
}
}
dataset_cls
.
configure
(
**
configs
)
ds
=
dataset_cls
.
make_dataset
(
shuffle
=
False
,
batch_size
=
1
)
ds_iter
=
iter
(
ds
)
data
=
next
(
ds_iter
)
expected_subset
=
[
'image'
,
'keyframe_index'
,
'label'
,
'instances_position'
,
'instances_mask'
,
'instances_score'
,
'nonmerge_label'
,
'nonmerge_instances_position'
,
'detected_instances_position'
,
'detected_instances_mask'
,
'detected_instances_score'
,
]
self
.
assertSameElements
(
expected_subset
,
data
.
keys
())
self
.
assertAllEqual
(
data
[
'keyframe_index'
],
[[
2
]])
expected_label
=
tf
.
constant
(
[[
1.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
1.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
1.
,
1.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
,
0.
]])
expected_label
=
expected_label
[
None
,
...]
self
.
assertAllEqual
(
data
[
'label'
],
expected_label
)
expected_instances_mask
=
tf
.
constant
([
True
,
True
,
True
,
False
,
False
])
expected_instances_mask
=
expected_instances_mask
[
None
,
:]
self
.
assertAllEqual
(
data
[
'instances_mask'
],
expected_instances_mask
)
expected_nonmerge_label
=
tf
.
constant
([
0
,
1
,
2
,
3
,
-
1
])
expected_nonmerge_label
=
expected_nonmerge_label
[
None
,
:]
self
.
assertAllEqual
(
data
[
'nonmerge_label'
],
expected_nonmerge_label
)
self
.
assertAllEqual
(
data
[
'detected_instances_position'
].
shape
,
[
1
,
5
,
4
])
self
.
assertAllEqual
(
data
[
'detected_instances_mask'
].
shape
,
[
1
,
5
])
expected_detected_instances_mask
=
tf
.
constant
(
[[
True
,
True
,
False
,
False
,
False
]])
self
.
assertAllEqual
(
data
[
'detected_instances_mask'
],
expected_detected_instances_mask
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/videoglue/datasets/video_classification_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for video_classification."""
import
io
from
absl
import
logging
import
numpy
as
np
from
PIL
import
Image
import
tensorflow
as
tf
from
official.projects.videoglue.datasets
import
video_classification
from
official.vision.configs
import
common
as
common_cfg
IMAGE_KEY
=
'image/encoded'
LABEL_KEY
=
'clip/label/index'
TFR_PATH
=
'/tmp/sample.tfrecord'
def
create_fake_tfse_sstable
():
"""Creates fake data."""
num_frames
=
25
tfse
=
tf
.
train
.
SequenceExample
()
tfse
.
context
.
feature
.
get_or_create
(
LABEL_KEY
).
int64_list
.
value
[:]
=
[
0
]
for
frame_id
in
range
(
num_frames
):
image
=
np
.
ones
((
263
,
320
,
3
),
dtype
=
np
.
uint8
)
*
frame_id
image
=
Image
.
fromarray
(
image
)
with
io
.
BytesIO
()
as
buffer
:
image
.
save
(
buffer
,
format
=
'JPEG'
)
raw_image_bytes
=
buffer
.
getvalue
()
tfse
.
feature_lists
.
feature_list
.
get_or_create
(
IMAGE_KEY
).
feature
.
add
().
bytes_list
.
value
[:]
=
[
raw_image_bytes
]
writer
=
tf
.
io
.
TFRecordWriter
(
TFR_PATH
)
writer
.
write
(
tfse
.
SerializeToString
())
logging
.
info
(
'Writing tfrecord table: %s'
,
TFR_PATH
)
writer
.
close
()
class
VideoClassificationTest
(
tf
.
test
.
TestCase
):
def
test_create_video_classification_data
(
self
):
create_fake_tfse_sstable
()
dataset_cls
=
video_classification
.
VideoClassificationBaseFactory
(
subset
=
'train'
)
configs
=
{
'is_training'
:
True
,
'num_frames'
:
4
,
'one_hot_label'
:
True
,
}
dataset_cls
.
configure
(
**
configs
)
ds
=
dataset_cls
.
make_dataset
(
shuffle
=
False
,
batch_size
=
2
)
ds_iter
=
iter
(
ds
)
data
=
next
(
ds_iter
)
self
.
assertAllEqual
(
data
[
'label'
].
shape
,
[
2
,
400
])
self
.
assertAllEqual
(
data
[
'image'
].
shape
,
[
2
,
4
,
224
,
224
,
3
])
def
test_video_classification_randaug
(
self
):
create_fake_tfse_sstable
()
dataset_cls
=
video_classification
.
VideoClassificationBaseFactory
(
subset
=
'train'
)
configs
=
{
'is_training'
:
True
,
'num_frames'
:
4
,
'one_hot_label'
:
True
,
'randaug_params'
:
common_cfg
.
RandAugment
().
as_dict
(),
}
dataset_cls
.
configure
(
**
configs
)
ds
=
dataset_cls
.
make_dataset
(
shuffle
=
False
,
batch_size
=
2
)
ds_iter
=
iter
(
ds
)
data
=
next
(
ds_iter
)
self
.
assertAllEqual
(
data
[
'label'
].
shape
,
[
2
,
400
])
self
.
assertAllEqual
(
data
[
'image'
].
shape
,
[
2
,
4
,
224
,
224
,
3
])
def
test_video_classification_autoaug
(
self
):
create_fake_tfse_sstable
()
dataset_cls
=
video_classification
.
VideoClassificationBaseFactory
(
subset
=
'train'
)
configs
=
{
'is_training'
:
True
,
'num_frames'
:
4
,
'one_hot_label'
:
True
,
'autoaug_params'
:
common_cfg
.
AutoAugment
().
as_dict
(),
}
dataset_cls
.
configure
(
**
configs
)
ds
=
dataset_cls
.
make_dataset
(
shuffle
=
False
,
batch_size
=
2
)
ds_iter
=
iter
(
ds
)
data
=
next
(
ds_iter
)
self
.
assertAllEqual
(
data
[
'label'
].
shape
,
[
2
,
400
])
self
.
assertAllEqual
(
data
[
'image'
].
shape
,
[
2
,
4
,
224
,
224
,
3
])
def
test_video_classification_mixup_cutmix
(
self
):
create_fake_tfse_sstable
()
dataset_cls
=
video_classification
.
VideoClassificationBaseFactory
(
subset
=
'train'
)
configs
=
{
'is_training'
:
True
,
'num_frames'
:
4
,
'one_hot_label'
:
True
,
'mixup_cutmix_params'
:
common_cfg
.
MixupAndCutmix
().
as_dict
(),
}
dataset_cls
.
configure
(
**
configs
)
ds
=
dataset_cls
.
make_dataset
(
shuffle
=
False
,
batch_size
=
2
)
ds_iter
=
iter
(
ds
)
data
=
next
(
ds_iter
)
self
.
assertAllEqual
(
data
[
'label'
].
shape
,
[
2
,
400
])
self
.
assertAllEqual
(
data
[
'image'
].
shape
,
[
2
,
4
,
224
,
224
,
3
])
def
test_video_classification_sample_segments
(
self
):
create_fake_tfse_sstable
()
dataset_cls
=
video_classification
.
VideoClassificationBaseFactory
(
subset
=
'train'
)
configs
=
{
'is_training'
:
False
,
'num_frames'
:
5
,
'temporal_stride'
:
1
,
'sample_from_segments'
:
True
,
'one_hot_label'
:
True
,
'mixup_cutmix_params'
:
common_cfg
.
MixupAndCutmix
().
as_dict
(),
}
dataset_cls
.
configure
(
**
configs
)
ds
=
dataset_cls
.
make_dataset
(
shuffle
=
False
,
batch_size
=
1
)
ds_iter
=
iter
(
ds
)
data
=
next
(
ds_iter
)
self
.
assertAllEqual
(
data
[
'label'
].
shape
,
[
1
,
400
])
self
.
assertAllEqual
(
data
[
'image'
].
shape
,
[
1
,
5
,
224
,
224
,
3
])
average_image
=
tf
.
reduce_mean
(
data
[
'image'
]
*
255.
,
axis
=
[
2
,
3
,
4
])
self
.
assertAllEqual
(
average_image
[
0
].
numpy
(),
[
2.0
,
7.0
,
12.0
,
16.0
,
21.0
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/videoglue/evaluation/spatiotemporal_action_localization_evaluator_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for spatiotemporal_action_localization_evaluator."""
import
tensorflow
as
tf
from
official.projects.videoglue.evaluation
import
spatiotemporal_action_localization_evaluator
as
eval_util
class
SpatiotemporalActionLocalizationEvaluatorTest
(
tf
.
test
.
TestCase
):
def
_create_test_data_simple
(
self
):
boxes
=
tf
.
convert_to_tensor
(
[[[
0.1
,
0.15
,
0.2
,
0.25
],
[
0.35
,
0.18
,
0.43
,
0.4
],
[
0.2
,
0.1
,
0.3
,
0.2
],
[
0.65
,
0.55
,
0.75
,
0.85
]],
[[
0.2
,
0.5
,
0.5
,
0.8
],
[
0.7
,
0.1
,
0.9
,
0.9
],
[
0.1
,
0.4
,
0.5
,
0.7
],
[
0.04
,
0.05
,
0.88
,
0.77
]]],
dtype
=
tf
.
float32
)
nonmerge_boxes
=
boxes
classes
=
tf
.
convert_to_tensor
([[
0
,
2
,
3
,
4
],
[
11
,
12
,
13
,
14
]],
dtype
=
tf
.
int32
)
predictions
=
tf
.
one_hot
(
classes
,
depth
=
80
)
data
=
{
'instances_position'
:
boxes
,
'nonmerge_instances_position'
:
nonmerge_boxes
,
'predictions'
:
predictions
,
'nonmerge_label'
:
classes
,
}
return
data
def
_create_test_data_complex
(
self
):
nonmerge_boxes
=
tf
.
convert_to_tensor
(
[[[
0.1
,
0.15
,
0.2
,
0.25
],
[
0.1
,
0.15
,
0.2
,
0.25
],
[
0.2
,
0.1
,
0.3
,
0.2
],
[
0.65
,
0.55
,
0.75
,
0.85
]],
[[
0.2
,
0.5
,
0.5
,
0.8
],
[
0.7
,
0.1
,
0.9
,
0.9
],
[
0.2
,
0.5
,
0.5
,
0.8
],
[
0.7
,
0.1
,
0.9
,
0.9
]]],
dtype
=
tf
.
float32
)
boxes
=
tf
.
convert_to_tensor
(
[[[
0.1
,
0.15
,
0.2
,
0.25
],
[
0.2
,
0.1
,
0.3
,
0.2
],
[
0.65
,
0.55
,
0.75
,
0.85
],
[
-
1
,
-
1
,
-
1
,
-
1
]],
[[
0.2
,
0.5
,
0.5
,
0.8
],
[
0.7
,
0.1
,
0.9
,
0.9
],
[
-
1
,
-
1
,
-
1
,
-
1
],
[
-
1
,
-
1
,
-
1
,
-
1
]]],
dtype
=
tf
.
float32
)
classes
=
tf
.
convert_to_tensor
([[
0
,
2
,
3
,
4
],
[
11
,
12
,
13
,
14
]],
dtype
=
tf
.
int32
)
predictions
=
tf
.
one_hot
(
classes
,
depth
=
80
)
data
=
{
'instances_position'
:
boxes
,
'nonmerge_instances_position'
:
nonmerge_boxes
,
'predictions'
:
predictions
,
'nonmerge_label'
:
classes
,
}
return
data
def
test_action_localization_eval_simple
(
self
):
data
=
self
.
_create_test_data_simple
()
evaluator
=
eval_util
.
SpatiotemporalActionLocalizationEvaluator
()
evaluator
.
reset_states
()
evaluator
.
update_state
(
data
)
metrics
=
evaluator
.
result
()
self
.
assertAlmostEqual
(
metrics
[
'mAP@.5IOU'
],
1.0
)
def
test_action_localization_eval_complex
(
self
):
data
=
self
.
_create_test_data_complex
()
evaluator
=
eval_util
.
SpatiotemporalActionLocalizationEvaluator
()
evaluator
.
reset_states
()
evaluator
.
update_state
(
data
)
metrics
=
evaluator
.
result
()
self
.
assertAlmostEqual
(
metrics
[
'mAP@.5IOU'
],
0.64375
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/videoglue/modeling/backbones/vit_3d_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for vit_3d."""
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.projects.videoglue.modeling.backbones
import
vit_3d
class
Vit3DTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
8
,
224
,
87718656
),
(
16
,
256
,
88204032
),
)
def
test_network_creation
(
self
,
num_frames
,
input_size
,
params_count
):
"""Test creation of VisionTransformer family models."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
2
,
num_frames
,
input_size
,
input_size
,
3
])
network
=
vit_3d
.
VisionTransformer3D
(
input_specs
=
input_specs
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
num_frames
,
input_size
,
input_size
,
3
),
batch_size
=
1
)
_
=
network
(
inputs
)
self
.
assertEqual
(
network
.
count_params
(),
params_count
)
def
test_network_none_pooler
(
self
):
"""Tests creation of VisionTransformer family models."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
num_frames
=
8
input_size
=
224
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
2
,
num_frames
,
input_size
,
input_size
,
3
])
network
=
vit_3d
.
VisionTransformer3D
(
input_specs
=
input_specs
,
pooler
=
'none'
,
representation_size
=
128
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
num_frames
,
input_size
,
input_size
,
3
),
batch_size
=
1
)
endpoints
=
network
(
inputs
)
self
.
assertEqual
(
endpoints
[
'encoded_tokens'
].
shape
,
[
1
,
2
,
14
,
14
,
128
])
@
parameterized
.
parameters
(
'native'
,
'mae'
)
def
test_network_convention
(
self
,
variant
):
"""Tests creation of VisionTransformer family models."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
num_frames
=
8
input_size
=
224
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
2
,
num_frames
,
input_size
,
input_size
,
3
])
network
=
vit_3d
.
VisionTransformer3D
(
variant
=
variant
,
input_specs
=
input_specs
,
pooler
=
'none'
,
representation_size
=
128
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
num_frames
,
input_size
,
input_size
,
3
),
batch_size
=
1
)
endpoints
=
network
(
inputs
)
self
.
assertEqual
(
endpoints
[
'encoded_tokens'
].
shape
,
[
1
,
2
,
14
,
14
,
128
])
def
test_network_pos_embed_interpolation_mae
(
self
):
"""Tests creation of VisionTransformer family models."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
variant
=
'mae'
pos_embed_shape
=
(
8
,
14
,
14
)
num_frames
=
8
input_size
=
256
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
2
,
num_frames
,
input_size
,
input_size
,
3
])
network
=
vit_3d
.
VisionTransformer3D
(
variant
=
variant
,
input_specs
=
input_specs
,
pooler
=
'none'
,
representation_size
=
128
,
pos_embed_shape
=
pos_embed_shape
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
num_frames
,
input_size
,
input_size
,
3
),
batch_size
=
1
)
endpoints
=
network
(
inputs
)
self
.
assertEqual
(
endpoints
[
'encoded_tokens'
].
shape
,
[
1
,
2
,
16
,
16
,
128
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/videoglue/modeling/heads/action_transformer_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for action_transformer."""
import
tensorflow
as
tf
from
official.projects.videoglue.modeling.heads
import
action_transformer
class
ActionTransformerTest
(
tf
.
test
.
TestCase
):
def
test_action_transformer_head_construction
(
self
):
head
=
action_transformer
.
ActionTransformerHead
(
num_hidden_layers
=
1
,
num_hidden_channels
=
1024
,
use_sync_bn
=
False
,
num_classes
=
80
,
# parameters for TxDecoder
num_tx_channels
=
128
,
num_tx_layers
=
3
,
num_tx_heads
=
3
,
use_positional_embedding
=
True
)
inputs
=
{
'features'
:
tf
.
ones
([
2
,
4
,
16
,
16
,
128
]),
'instances_position'
:
tf
.
random
.
uniform
([
2
,
6
,
4
]),
}
outputs
=
head
(
inputs
,
training
=
False
)
self
.
assertAllEqual
(
outputs
.
shape
,
[
2
,
6
,
80
])
def
test_action_transformer_linear_head_construction
(
self
):
head
=
action_transformer
.
ActionTransformerHead
(
num_hidden_layers
=
0
,
num_hidden_channels
=
1024
,
use_sync_bn
=
False
,
num_classes
=
80
,
dropout_rate
=
0.5
,
# parameters for TxDecoder
num_tx_channels
=
128
,
num_tx_layers
=
0
,
num_tx_heads
=
3
,
attention_dropout_rate
=
0.2
,
use_positional_embedding
=
False
)
inputs
=
{
'features'
:
tf
.
ones
([
2
,
4
,
16
,
16
,
128
]),
'instances_position'
:
tf
.
random
.
uniform
([
2
,
6
,
4
]),
}
outputs
=
head
(
inputs
,
training
=
False
)
self
.
assertAllEqual
(
outputs
.
shape
,
[
2
,
6
,
80
])
trainable_weight_names
=
[
w
.
name
for
w
in
head
.
weights
]
expected_weight_names
=
[
'action_transformer_head/mlp/dense/kernel:0'
,
'action_transformer_head/mlp/dense/bias:0'
]
self
.
assertCountEqual
(
trainable_weight_names
,
expected_weight_names
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/videoglue/modeling/heads/simple_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for simple."""
import
numpy
as
np
import
tensorflow
as
tf
from
official.projects.videoglue.modeling.heads
import
simple
class
SimpleTest
(
tf
.
test
.
TestCase
):
def
test_mlp_construction
(
self
):
mlp_head
=
simple
.
MLP
(
num_hidden_layers
=
3
,
num_hidden_channels
=
128
,
num_output_channels
=
56
,
use_sync_bn
=
False
,
activation
=
'relu'
)
inputs
=
tf
.
zeros
([
2
,
512
])
outputs
=
mlp_head
(
inputs
,
training
=
False
)
num_params
=
np
.
sum
(
[
np
.
prod
(
v
.
get_shape
())
for
v
in
mlp_head
.
trainable_weights
])
self
.
assertEqual
(
num_params
,
106296
)
self
.
assertAllEqual
(
outputs
.
shape
,
[
2
,
56
])
def
test_att_pooler_classifier_construction
(
self
):
pooler_head
=
simple
.
AttentionPoolerClassificationHead
(
num_heads
=
6
,
hidden_size
=
768
,
num_classes
=
3
)
inputs
=
tf
.
zeros
([
2
,
16
,
768
])
inputs
=
tf
.
reshape
(
inputs
,
(
2
,
4
,
4
,
768
))
outputs
=
pooler_head
(
inputs
,
training
=
False
)
self
.
assertAllEqual
(
outputs
.
shape
,
[
2
,
3
])
def
test_att_pooler_classifier_construction_with_posembed
(
self
):
pooler_head
=
simple
.
AttentionPoolerClassificationHead
(
num_heads
=
6
,
hidden_size
=
768
,
num_classes
=
3
,
add_temporal_pos_embed
=
True
)
inputs
=
tf
.
zeros
([
2
,
16
,
768
])
inputs
=
tf
.
reshape
(
inputs
,
(
2
,
4
,
4
,
768
))
outputs
=
pooler_head
(
inputs
,
training
=
False
)
self
.
assertAllEqual
(
outputs
.
shape
,
[
2
,
3
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/videoglue/modeling/heads/transformer_decoder_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for TransformerDecoder."""
import
tensorflow
as
tf
from
official.projects.videoglue.modeling.heads
import
transformer_decoder
class
TransformerTest
(
tf
.
test
.
TestCase
):
def
test_decoder_unit_return_shape
(
self
):
decoder_unit
=
transformer_decoder
.
DecoderUnit
(
num_channels
=
128
,
use_bias
=
True
,
dropout_rate
=
0.5
,
activation
=
'relu'
,
layer_norm_epsilon
=
1e-7
)
batch_size
=
16
num_inputs
=
128
num_channels
=
256
input_tensor
=
tf
.
zeros
([
batch_size
,
num_inputs
,
num_channels
])
memory_tensor
=
tf
.
ones
([
batch_size
,
num_inputs
*
4
,
num_channels
])
outputs
=
decoder_unit
(
input_tensor
,
memory_tensor
,
training
=
False
)
self
.
assertAllEqual
(
outputs
[
'hidden_states'
].
shape
,
[
batch_size
,
num_inputs
,
num_inputs
])
self
.
assertAllEqual
(
outputs
[
'attention_weights'
].
shape
,
[
batch_size
,
num_inputs
,
4
*
num_inputs
])
def
test_decoder_layer_return_shape
(
self
):
decoder_layer
=
transformer_decoder
.
TransformerDecoderLayer
(
num_channels
=
128
,
num_heads
=
3
,
use_bias
=
True
,
dropout_rate
=
0.5
,
activation
=
'relu'
,
layer_norm_epsilon
=
1e-7
)
batch_size
=
16
num_inputs
=
128
num_channels
=
256
input_tensor
=
tf
.
zeros
([
batch_size
,
num_inputs
,
num_channels
])
memory_tensor
=
tf
.
ones
([
batch_size
,
num_inputs
*
4
,
num_channels
])
outputs
=
decoder_layer
(
input_tensor
,
memory_tensor
,
training
=
False
)
self
.
assertAllEqual
(
outputs
[
'hidden_states'
].
shape
,
[
batch_size
,
num_inputs
,
num_inputs
*
3
])
self
.
assertAllEqual
(
outputs
[
'attention_weights'
][
-
1
].
shape
,
[
batch_size
,
num_inputs
,
4
*
num_inputs
])
def
test_decoder_return_shape
(
self
):
decoder
=
transformer_decoder
.
TransformerDecoder
(
num_channels
=
128
,
num_layers
=
5
,
num_heads
=
3
,
use_bias
=
True
,
dropout_rate
=
0.5
,
activation
=
'relu'
,
layer_norm_epsilon
=
1e-7
)
batch_size
=
16
num_inputs
=
128
num_channels
=
256
input_tensor
=
tf
.
zeros
([
batch_size
,
num_inputs
,
num_channels
])
memory_tensor
=
tf
.
ones
([
batch_size
,
num_inputs
*
4
,
num_channels
])
outputs
=
decoder
(
input_tensor
,
memory_tensor
,
training
=
False
)
self
.
assertLen
(
outputs
[
'attention_weights'
],
5
)
self
.
assertAllEqual
(
outputs
[
'hidden_states'
][
-
1
].
shape
,
[
batch_size
,
num_inputs
,
num_inputs
*
3
])
self
.
assertAllEqual
(
outputs
[
'attention_weights'
][
-
1
][
-
1
].
shape
,
[
batch_size
,
num_inputs
,
4
*
num_inputs
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/videoglue/modeling/video_action_transformer_model_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for video_action_transformer_model."""
import
tensorflow
as
tf
from
official.projects.videoglue.configs
import
spatiotemporal_action_localization
as
cfg
from
official.projects.videoglue.modeling
import
video_action_transformer_model
class
VideoActionTransformerModelTest
(
tf
.
test
.
TestCase
):
def
test_video_action_transformer_model_construction
(
self
):
model_config
=
cfg
.
VideoActionTransformerModel
()
input_specs
=
{
'image'
:
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
4
,
20
,
20
,
3
]),
'instances_position'
:
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
8
,
4
])
}
model
=
video_action_transformer_model
.
build_video_action_transformer_model
(
input_specs_dict
=
input_specs
,
model_config
=
model_config
,
num_classes
=
80
)
self
.
assertIsInstance
(
model
,
video_action_transformer_model
.
VideoActionTransformerModel
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/videoglue/modeling/video_classification_model_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for video classification network."""
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.projects.videoglue.modeling
import
video_classification_model
from
official.vision.modeling
import
backbones
class
MultiHeadsVideoClassificationNetworkTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
50
,
8
,
112
,
'relu'
,
False
),
(
50
,
8
,
112
,
'swish'
,
True
),
)
def
test_resnet3d_network_creation
(
self
,
model_id
,
temporal_size
,
spatial_size
,
activation
,
aggregate_endpoints
):
"""Test for creation of a ResNet3D-50 classifier."""
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
temporal_size
,
spatial_size
,
spatial_size
,
3
])
temporal_strides
=
[
1
,
1
,
1
,
1
]
temporal_kernel_sizes
=
[(
3
,
3
,
3
),
(
3
,
1
,
3
,
1
),
(
3
,
1
,
3
,
1
,
3
,
1
),
(
1
,
3
,
1
)]
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
backbone
=
backbones
.
ResNet3D
(
model_id
=
model_id
,
temporal_strides
=
temporal_strides
,
temporal_kernel_sizes
=
temporal_kernel_sizes
,
input_specs
=
input_specs
,
activation
=
activation
)
num_classes
=
1000
model
=
video_classification_model
.
MultiHeadVideoClassificationModel
(
backbone
=
backbone
,
num_classes
=
num_classes
,
input_specs
=
{
'image'
:
input_specs
},
dropout_rate
=
0.2
,
aggregate_endpoints
=
aggregate_endpoints
,
)
inputs
=
np
.
random
.
rand
(
2
,
temporal_size
,
spatial_size
,
spatial_size
,
3
)
logits
=
model
(
inputs
)
self
.
assertAllEqual
([
2
,
num_classes
],
logits
.
numpy
().
shape
)
@
parameterized
.
parameters
(
(
50
,
8
,
112
,
'relu'
,
False
),
(
50
,
8
,
112
,
'swish'
,
False
),
)
def
test_resnet3d_network_pooler_head_creation
(
self
,
model_id
,
temporal_size
,
spatial_size
,
activation
,
aggregate_endpoints
):
"""Test for creation of a ResNet3D-50 classifier."""
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
temporal_size
,
spatial_size
,
spatial_size
,
3
])
temporal_strides
=
[
1
,
1
,
1
,
1
]
temporal_kernel_sizes
=
[(
3
,
3
,
3
),
(
3
,
1
,
3
,
1
),
(
3
,
1
,
3
,
1
,
3
,
1
),
(
1
,
3
,
1
)]
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
backbone
=
backbones
.
ResNet3D
(
model_id
=
model_id
,
temporal_strides
=
temporal_strides
,
temporal_kernel_sizes
=
temporal_kernel_sizes
,
input_specs
=
input_specs
,
activation
=
activation
)
num_classes
=
1000
model
=
video_classification_model
.
MultiHeadVideoClassificationModel
(
backbone
=
backbone
,
num_classes
=
num_classes
,
input_specs
=
{
'image'
:
input_specs
},
dropout_rate
=
0.2
,
aggregate_endpoints
=
aggregate_endpoints
,
classifier_type
=
'pooler'
)
inputs
=
np
.
random
.
rand
(
2
,
temporal_size
,
spatial_size
,
spatial_size
,
3
)
logits
=
model
(
inputs
)
self
.
assertAllEqual
([
2
,
num_classes
],
logits
.
numpy
().
shape
)
@
parameterized
.
parameters
(
(
50
,
8
,
112
,
'relu'
,
False
),
(
50
,
8
,
112
,
'swish'
,
True
),
)
def
test_resnet3d_network_multiheads_creation
(
self
,
model_id
,
temporal_size
,
spatial_size
,
activation
,
aggregate_endpoints
):
"""Test for creation of a ResNet3D-50 multiheads classifier."""
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
temporal_size
,
spatial_size
,
spatial_size
,
3
])
temporal_strides
=
[
1
,
1
,
1
,
1
]
temporal_kernel_sizes
=
[(
3
,
3
,
3
),
(
3
,
1
,
3
,
1
),
(
3
,
1
,
3
,
1
,
3
,
1
),
(
1
,
3
,
1
)]
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
backbone
=
backbones
.
ResNet3D
(
model_id
=
model_id
,
temporal_strides
=
temporal_strides
,
temporal_kernel_sizes
=
temporal_kernel_sizes
,
input_specs
=
input_specs
,
activation
=
activation
)
num_classes
=
[
1000
,
100
,
10
]
model
=
video_classification_model
.
MultiHeadVideoClassificationModel
(
backbone
=
backbone
,
num_classes
=
num_classes
,
input_specs
=
{
'image'
:
input_specs
},
dropout_rate
=
0.2
,
aggregate_endpoints
=
aggregate_endpoints
,
)
inputs
=
np
.
random
.
rand
(
2
,
temporal_size
,
spatial_size
,
spatial_size
,
3
)
logits
=
model
(
inputs
)
for
i
in
range
(
3
):
self
.
assertAllEqual
([
2
,
num_classes
[
i
]],
logits
[
i
].
numpy
().
shape
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/videoglue/tasks/multihead_video_classification_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for video_classification."""
import
tensorflow
as
tf
# pylint: disable=unused-import
from
official.modeling
import
optimization
from
official.projects.videoglue.configs
import
video_classification
as
exp_cfg
from
official.projects.videoglue.modeling
import
video_classification_model
from
official.projects.videoglue.modeling.backbones
import
vit_3d
from
official.projects.videoglue.tasks
import
multihead_video_classification
# pylint: enable=unused-import
class
MultiheadVideoClassificationTest
(
tf
.
test
.
TestCase
):
def
test_one_head_video_classification
(
self
):
config
=
exp_cfg
.
mh_video_classification
()
config
.
task
.
train_data
.
global_batch_size
=
2
config
.
task
.
train_data
.
num_classes
=
400
config
.
task
.
validation_data
.
num_classes
=
400
config
.
task
.
train_data
.
feature_shape
=
(
16
,
56
,
56
,
3
)
config
.
task
.
validation_data
.
feature_shape
=
(
16
,
56
,
56
,
3
)
task
=
multihead_video_classification
.
MultiHeadVideoClassificationTask
(
config
.
task
)
model
=
task
.
build_model
()
metrics
=
task
.
build_metrics
()
data_inputs
=
{
'image'
:
tf
.
ones
([
2
,
16
,
56
,
56
,
3
],
tf
.
float32
),
'label'
:
tf
.
ones
([
2
,
400
],
tf
.
float32
),
}
opt_factory
=
optimization
.
OptimizerFactory
(
config
.
trainer
.
optimizer_config
)
optimizer
=
opt_factory
.
build_optimizer
(
opt_factory
.
build_learning_rate
())
logs
=
task
.
train_step
(
data_inputs
,
model
,
optimizer
,
metrics
=
metrics
)
self
.
assertIn
(
'loss'
,
logs
)
self
.
assertIn
(
'label/accuracy'
,
logs
)
self
.
assertIn
(
'label/top_1_accuracy'
,
logs
)
self
.
assertIn
(
'label/top_5_accuracy'
,
logs
)
logs
=
task
.
validation_step
(
data_inputs
,
model
,
metrics
=
metrics
)
self
.
assertIn
(
'loss'
,
logs
)
self
.
assertIn
(
'label/accuracy'
,
logs
)
self
.
assertIn
(
'label/top_1_accuracy'
,
logs
)
self
.
assertIn
(
'label/top_5_accuracy'
,
logs
)
def
test_one_head_video_classification_multilabel
(
self
):
config
=
exp_cfg
.
mh_video_classification
()
config
.
task
.
train_data
.
global_batch_size
=
2
config
.
task
.
train_data
.
num_classes
=
400
config
.
task
.
train_data
.
is_multilabel
=
True
config
.
task
.
validation_data
.
num_classes
=
400
config
.
task
.
train_data
.
feature_shape
=
(
16
,
56
,
56
,
3
)
config
.
task
.
validation_data
.
feature_shape
=
(
16
,
56
,
56
,
3
)
config
.
task
.
validation_data
.
is_multilabel
=
True
task
=
multihead_video_classification
.
MultiHeadVideoClassificationTask
(
config
.
task
)
model
=
task
.
build_model
()
metrics
=
task
.
build_metrics
()
data_inputs
=
{
'image'
:
tf
.
ones
([
2
,
16
,
56
,
56
,
3
],
tf
.
float32
),
'label'
:
tf
.
ones
([
2
,
400
],
tf
.
float32
),
}
opt_factory
=
optimization
.
OptimizerFactory
(
config
.
trainer
.
optimizer_config
)
optimizer
=
opt_factory
.
build_optimizer
(
opt_factory
.
build_learning_rate
())
logs
=
task
.
train_step
(
data_inputs
,
model
,
optimizer
,
metrics
=
metrics
)
self
.
assertIn
(
'loss'
,
logs
)
self
.
assertIn
(
'label/ROC-AUC'
,
logs
)
self
.
assertIn
(
'label/PR-AUC'
,
logs
)
self
.
assertIn
(
'label/RecallAtPrecision95'
,
logs
)
logs
=
task
.
validation_step
(
data_inputs
,
model
,
metrics
=
metrics
)
self
.
assertIn
(
'loss'
,
logs
)
self
.
assertIn
(
'label/ROC-AUC'
,
logs
)
self
.
assertIn
(
'label/PR-AUC'
,
logs
)
self
.
assertIn
(
'label/RecallAtPrecision95'
,
logs
)
def
test_multi_head_video_classification
(
self
):
config
=
exp_cfg
.
mh_video_classification
()
config
.
task
.
train_data
.
global_batch_size
=
2
config
.
task
.
train_data
.
num_classes
=
[
123
,
456
]
config
.
task
.
train_data
.
label_names
=
[
'label_a'
,
'label_b'
]
config
.
task
.
validation_data
.
num_classes
=
[
123
,
456
]
config
.
task
.
validation_data
.
label_names
=
[
'label_a'
,
'label_b'
]
config
.
task
.
train_data
.
feature_shape
=
(
16
,
56
,
56
,
3
)
config
.
task
.
validation_data
.
feature_shape
=
(
16
,
56
,
56
,
3
)
task
=
multihead_video_classification
.
MultiHeadVideoClassificationTask
(
config
.
task
)
model
=
task
.
build_model
()
metrics
=
task
.
build_metrics
()
data_inputs
=
{
'image'
:
tf
.
ones
([
2
,
16
,
56
,
56
,
3
],
tf
.
float32
),
'label_a'
:
tf
.
ones
([
2
,
123
],
tf
.
float32
),
'label_b'
:
tf
.
ones
([
2
,
456
],
tf
.
float32
),
}
opt_factory
=
optimization
.
OptimizerFactory
(
config
.
trainer
.
optimizer_config
)
optimizer
=
opt_factory
.
build_optimizer
(
opt_factory
.
build_learning_rate
())
logs
=
task
.
train_step
(
data_inputs
,
model
,
optimizer
,
metrics
=
metrics
)
self
.
assertIn
(
'loss'
,
logs
)
self
.
assertIn
(
'label_a/accuracy'
,
logs
)
self
.
assertIn
(
'label_a/top_1_accuracy'
,
logs
)
self
.
assertIn
(
'label_a/top_5_accuracy'
,
logs
)
self
.
assertIn
(
'label_b/accuracy'
,
logs
)
self
.
assertIn
(
'label_b/top_1_accuracy'
,
logs
)
self
.
assertIn
(
'label_b/top_5_accuracy'
,
logs
)
self
.
assertIn
(
'label_joint/accuracy'
,
logs
)
logs
=
task
.
validation_step
(
data_inputs
,
model
,
metrics
=
metrics
)
self
.
assertIn
(
'loss'
,
logs
)
self
.
assertIn
(
'label_a/accuracy'
,
logs
)
self
.
assertIn
(
'label_a/top_1_accuracy'
,
logs
)
self
.
assertIn
(
'label_a/top_5_accuracy'
,
logs
)
self
.
assertIn
(
'label_b/accuracy'
,
logs
)
self
.
assertIn
(
'label_b/top_1_accuracy'
,
logs
)
self
.
assertIn
(
'label_b/top_5_accuracy'
,
logs
)
self
.
assertIn
(
'label_joint/accuracy'
,
logs
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/videoglue/tasks/spatiotemporal_action_localization_test.py
已删除
100644 → 0
浏览文件 @
8bbb4841
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for spatiotemporal_action_localization."""
import
tensorflow
as
tf
from
official.core
import
exp_factory
from
official.modeling
import
optimization
from
official.projects.videoglue.modeling
import
video_action_transformer_model
# pylint: disable=unused-import
from
official.projects.videoglue.tasks
import
spatiotemporal_action_localization
as
stal_task
class
SpatiotemporalActionLocalizationTest
(
tf
.
test
.
TestCase
):
def
test_spatiotemporal_action_localization
(
self
):
config
=
exp_factory
.
get_exp_config
(
'spatiotemporal_action_localization'
)
config
.
task
.
train_data
.
global_batch_size
=
2
config
.
task
.
train_data
.
feature_shape
=
(
32
,
56
,
56
,
3
)
config
.
task
.
validation_data
.
global_batch_size
=
2
config
.
task
.
validation_data
.
feature_shape
=
(
32
,
56
,
56
,
3
)
config
.
task
.
losses
.
l2_weight_decay
=
1e-7
task
=
stal_task
.
SpatiotemporalActionLocalizationTask
(
config
.
task
)
model
=
task
.
build_model
()
metrics
=
task
.
build_metrics
()
data_inputs
=
{
'image'
:
tf
.
ones
([
2
,
32
,
56
,
56
,
3
],
tf
.
float32
),
'instances_position'
:
tf
.
ones
([
2
,
32
,
4
],
tf
.
float32
),
'instances_score'
:
tf
.
ones
([
2
,
32
],
tf
.
float32
),
'instances_mask'
:
tf
.
ones
([
2
,
32
],
tf
.
float32
),
'label'
:
tf
.
ones
([
2
,
32
,
80
],
tf
.
float32
),
'nonmerge_label'
:
tf
.
ones
([
2
,
32
,
80
],
tf
.
float32
),
'nonmerge_instances_position'
:
tf
.
ones
([
2
,
32
,
4
],
tf
.
float32
),
}
opt_factory
=
optimization
.
OptimizerFactory
(
config
.
trainer
.
optimizer_config
)
optimizer
=
opt_factory
.
build_optimizer
(
opt_factory
.
build_learning_rate
())
logs
=
task
.
train_step
(
data_inputs
,
model
,
optimizer
,
metrics
=
metrics
)
self
.
assertIn
(
'loss'
,
logs
)
self
.
assertIn
(
'model_loss'
,
logs
)
self
.
assertIn
(
'regularization_loss'
,
logs
)
logs
=
task
.
validation_step
(
data_inputs
,
model
,
metrics
=
metrics
)
self
.
assertIn
(
'loss'
,
logs
)
self
.
assertIn
(
'model_loss'
,
logs
)
self
.
assertIn
(
'regularization_loss'
,
logs
)
self
.
assertIn
(
'nonmerge_label'
,
logs
)
self
.
assertIn
(
'nonmerge_instances_position'
,
logs
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录