Skip to content

Commit 6fd3958

Browse files
authored
upload source code (#2)
1 parent cd852ee commit 6fd3958

34 files changed

+2817
-0
lines changed

tests/__init__.py

Whitespace-only changes.

tests/test_auto_backbone.py

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import unittest
2+
3+
4+
class TestAutoBackbone(unittest.TestCase):
5+
def test_transformers_backbone(self):
6+
import torch
7+
8+
from video_transformers import AutoBackbone
9+
10+
config = {
11+
"framework": {"name": "timm"},
12+
"type": "2d_backbone",
13+
"model_name": "mobilevitv2_100",
14+
"num_timesteps": 8,
15+
}
16+
batch_size = 2
17+
18+
backbone = AutoBackbone.from_config(config)
19+
input = torch.randn(batch_size, 3, config["num_timesteps"], 224, 224)
20+
output = backbone(input)
21+
self.assertEqual(output.shape, (batch_size, config["num_timesteps"], backbone.num_features))
22+
23+
def test_timm_backbone(self):
24+
import torch
25+
26+
from video_transformers import AutoBackbone
27+
28+
config = {
29+
"framework": {"name": "transformers"},
30+
"type": "2d_backbone",
31+
"model_name": "microsoft/cvt-13",
32+
"num_timesteps": 8,
33+
}
34+
batch_size = 2
35+
36+
backbone = AutoBackbone.from_config(config)
37+
input = torch.randn(batch_size, 3, config["num_timesteps"], 224, 224)
38+
output = backbone(input)
39+
self.assertEqual(output.shape, (batch_size, config["num_timesteps"], backbone.num_features))
40+
41+
42+
if __name__ == "__main__":
43+
unittest.main()

tests/test_auto_head.py

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import unittest
2+
3+
4+
class TestAutoHead(unittest.TestCase):
5+
def test_liear_head(self):
6+
import torch
7+
8+
from video_transformers import AutoHead
9+
10+
config = {
11+
"name": "LinearHead",
12+
"hidden_size": 256,
13+
"num_classes": 10,
14+
"dropout_p": 0.1,
15+
}
16+
batch_size = 2
17+
18+
head = AutoHead.from_config(config)
19+
input = torch.randn(batch_size, config["hidden_size"])
20+
output = head(input)
21+
self.assertEqual(output.shape, (batch_size, config["num_classes"]))
22+
23+
24+
if __name__ == "__main__":
25+
unittest.main()

tests/test_auto_neck.py

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import unittest
2+
3+
4+
class TestAutoNeck(unittest.TestCase):
5+
def test_transformers_neck(self):
6+
import torch
7+
8+
from video_transformers import AutoNeck
9+
10+
config = {
11+
"name": "TransformerNeck",
12+
"num_features": 256,
13+
"num_timesteps": 8,
14+
"transformer_enc_num_heads": 4,
15+
"transformer_enc_num_layers": 2,
16+
"transformer_enc_act": "gelu",
17+
"dropout_p": 0.1,
18+
"return_mean": True,
19+
}
20+
batch_size = 2
21+
22+
neck = AutoNeck.from_config(config)
23+
input = torch.randn(batch_size, config["num_timesteps"], config["num_features"])
24+
output = neck(input)
25+
self.assertEqual(output.shape, (batch_size, neck.num_features))
26+
27+
def test_lstm_neck(self):
28+
import torch
29+
30+
from video_transformers import AutoNeck
31+
32+
config = {
33+
"name": "LSTMNeck",
34+
"num_features": 256,
35+
"num_timesteps": 8,
36+
"hidden_size": 128,
37+
"num_layers": 2,
38+
"return_last": True,
39+
}
40+
batch_size = 2
41+
42+
neck = AutoNeck.from_config(config)
43+
input = torch.randn(batch_size, config["num_timesteps"], config["num_features"])
44+
output = neck(input)
45+
self.assertEqual(output.shape, (batch_size, config["hidden_size"]))
46+
47+
def test_gru_neck(self):
48+
import torch
49+
50+
from video_transformers import AutoNeck
51+
52+
config = {
53+
"name": "GRUNeck",
54+
"num_features": 256,
55+
"num_timesteps": 8,
56+
"hidden_size": 128,
57+
"num_layers": 2,
58+
"return_last": True,
59+
}
60+
batch_size = 2
61+
62+
neck = AutoNeck.from_config(config)
63+
input = torch.randn(batch_size, config["num_timesteps"], config["num_features"])
64+
output = neck(input)
65+
self.assertEqual(output.shape, (batch_size, config["hidden_size"]))
66+
67+
68+
if __name__ == "__main__":
69+
unittest.main()

tests/test_backbone.py

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import unittest
2+
3+
4+
class TestBackbone(unittest.TestCase):
5+
def test_transformers_backbone(self):
6+
import torch
7+
8+
from video_transformers.backbones.transformers import TransformersBackbone
9+
10+
config = {"model_name": "microsoft/cvt-13"}
11+
batch_size = 2
12+
13+
backbone = TransformersBackbone(model_name=config["model_name"], num_unfrozen_stages=0)
14+
self.assertEqual(backbone.num_trainable_params, 0)
15+
16+
backbone = TransformersBackbone(model_name=config["model_name"], num_unfrozen_stages=-1)
17+
self.assertNotEqual(backbone.num_trainable_params, 0)
18+
19+
input = torch.randn(batch_size, 3, 224, 224)
20+
output = backbone(input)
21+
self.assertEqual(output.shape, (batch_size, backbone.num_features))
22+
23+
def test_timm_backbone(self):
24+
import torch
25+
26+
from video_transformers.backbones.timm import TimmBackbone
27+
28+
config = {"model_name": "mobilevitv2_100"}
29+
batch_size = 2
30+
31+
backbone = TimmBackbone(model_name=config["model_name"], num_unfrozen_stages=0)
32+
self.assertEqual(backbone.num_trainable_params, 0)
33+
34+
backbone = TimmBackbone(model_name=config["model_name"], num_unfrozen_stages=-1)
35+
self.assertNotEqual(backbone.num_trainable_params, 0)
36+
37+
input = torch.randn(batch_size, 3, 224, 224)
38+
output = backbone(input)
39+
self.assertEqual(output.shape, (batch_size, backbone.num_features))
40+
41+
42+
if __name__ == "__main__":
43+
unittest.main()

tests/test_onnx.py

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import unittest
2+
3+
4+
class TestOnnx(unittest.TestCase):
5+
def test_onnx_export(self):
6+
from video_transformers import VideoClassificationModel
7+
8+
config = {
9+
"backbone": {
10+
"name": "TransformersBackbone",
11+
"framework": {"name": "transformers", "version": "4.21.1"},
12+
"mean": [0.485, 0.456, 0.406],
13+
"model_name": "microsoft/cvt-13",
14+
"num_features": 384,
15+
"num_total_params": 19611712,
16+
"num_trainable_params": 18536448,
17+
"std": [0.229, 0.224, 0.225],
18+
"type": "2d_backbone",
19+
},
20+
"head": {"name": "LinearHead", "dropout_p": 0.0, "hidden_size": 384, "num_classes": 6},
21+
"neck": {
22+
"name": "TransformerNeck",
23+
"dropout_p": 0.1,
24+
"num_features": 384,
25+
"num_timesteps": 8,
26+
"transformer_enc_act": "gelu",
27+
"transformer_enc_num_heads": 4,
28+
"transformer_enc_num_layers": 2,
29+
"return_mean": True,
30+
},
31+
"preprocess_means": [0.485, 0.456, 0.406],
32+
"preprocess_stds": [0.229, 0.224, 0.225],
33+
"preprocess_min_short_side_scale": 256,
34+
"preprocess_input_size": 224,
35+
"num_timesteps": 8,
36+
"labels": ["BodyWeightSquats", "JumpRope", "Lunges", "PullUps", "PushUps", "WallPushups"],
37+
}
38+
39+
model = VideoClassificationModel.from_config(config)
40+
41+
model.to_onnx()
42+
43+
def test_quantized_onnx_export(self):
44+
from video_transformers import VideoClassificationModel
45+
46+
config = {
47+
"backbone": {
48+
"name": "TransformersBackbone",
49+
"framework": {"name": "transformers", "version": "4.21.1"},
50+
"mean": [0.485, 0.456, 0.406],
51+
"model_name": "microsoft/cvt-13",
52+
"num_features": 384,
53+
"num_total_params": 19611712,
54+
"num_trainable_params": 18536448,
55+
"std": [0.229, 0.224, 0.225],
56+
"type": "2d_backbone",
57+
},
58+
"head": {"name": "LinearHead", "dropout_p": 0.0, "hidden_size": 384, "num_classes": 6},
59+
"neck": {
60+
"name": "TransformerNeck",
61+
"dropout_p": 0.1,
62+
"num_features": 384,
63+
"num_timesteps": 8,
64+
"transformer_enc_act": "gelu",
65+
"transformer_enc_num_heads": 4,
66+
"transformer_enc_num_layers": 2,
67+
"return_mean": True,
68+
},
69+
"preprocess_means": [0.485, 0.456, 0.406],
70+
"preprocess_stds": [0.229, 0.224, 0.225],
71+
"preprocess_min_short_side_scale": 256,
72+
"preprocess_input_size": 224,
73+
"num_timesteps": 8,
74+
"labels": ["BodyWeightSquats", "JumpRope", "Lunges", "PullUps", "PushUps", "WallPushups"],
75+
}
76+
77+
model = VideoClassificationModel.from_config(config)
78+
79+
model.to_onnx(quantize=True)
80+
81+
82+
if __name__ == "__main__":
83+
unittest.main()
+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import unittest
2+
3+
4+
class TestVideoClassificationModel(unittest.TestCase):
5+
def test_transformers_backbone(self):
6+
import torch
7+
8+
from video_transformers import VideoClassificationModel
9+
10+
config = {
11+
"backbone": {
12+
"name": "TransformersBackbone",
13+
"framework": {"name": "transformers", "version": "4.21.1"},
14+
"mean": [0.485, 0.456, 0.406],
15+
"model_name": "microsoft/cvt-13",
16+
"num_features": 384,
17+
"num_total_params": 19611712,
18+
"num_trainable_params": 18536448,
19+
"std": [0.229, 0.224, 0.225],
20+
"type": "2d_backbone",
21+
},
22+
"head": {"name": "LinearHead", "dropout_p": 0.0, "hidden_size": 384, "num_classes": 6},
23+
"neck": {
24+
"name": "TransformerNeck",
25+
"dropout_p": 0.1,
26+
"num_features": 384,
27+
"num_timesteps": 8,
28+
"transformer_enc_act": "gelu",
29+
"transformer_enc_num_heads": 4,
30+
"transformer_enc_num_layers": 2,
31+
"return_mean": True,
32+
},
33+
"preprocess_means": [0.485, 0.456, 0.406],
34+
"preprocess_tds": [0.229, 0.224, 0.225],
35+
"preprocess_min_short_side_scale": 256,
36+
"preprocess_input_size": 224,
37+
"num_timesteps": 8,
38+
"labels": ["BodyWeightSquats", "JumpRope", "Lunges", "PullUps", "PushUps", "WallPushups"],
39+
}
40+
batch_size = 2
41+
42+
model = VideoClassificationModel.from_config(config)
43+
44+
input = torch.randn(batch_size, 3, config["num_timesteps"], 224, 224)
45+
output = model(input)
46+
self.assertEqual(output.shape, (batch_size, model.head.num_classes))
47+
48+
49+
if __name__ == "__main__":
50+
unittest.main()

video_transformers/__init__.py

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from video_transformers.auto.backbone import AutoBackbone
2+
from video_transformers.auto.head import AutoHead
3+
from video_transformers.auto.neck import AutoNeck
4+
from video_transformers.modules import TimeDistributed, VideoClassificationModel
5+
6+
__version__ = "0.0.2"

video_transformers/auto/__init__.py

Whitespace-only changes.

video_transformers/auto/backbone.py

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from typing import Dict, Union
2+
3+
from video_transformers.backbones.base import Backbone
4+
from video_transformers.modules import TimeDistributed
5+
6+
7+
class AutoBackbone:
8+
"""
9+
AutoBackbone is a class that automatically instantiates a video model backbone from a config.
10+
"""
11+
12+
@classmethod
13+
def from_config(cls, config: Dict) -> Union[Backbone, TimeDistributed]:
14+
backbone_framework = config.get("framework")
15+
backbone_type = config.get("type")
16+
backbone_model_name = config.get("model_name")
17+
18+
if backbone_framework["name"] == "transformers":
19+
from video_transformers.backbones.transformers import TransformersBackbone
20+
21+
backbone = TransformersBackbone(model_name=backbone_model_name)
22+
elif backbone_framework["name"] == "timm":
23+
from video_transformers.backbones.timm import TimmBackbone
24+
25+
backbone = TimmBackbone(model_name=backbone_model_name)
26+
else:
27+
raise ValueError(f"Unknown framework {backbone_framework}")
28+
29+
if backbone_type == "2d_backbone":
30+
from video_transformers.modules import TimeDistributed
31+
32+
backbone = TimeDistributed(backbone)
33+
return backbone

video_transformers/auto/head.py

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from typing import Dict
2+
3+
4+
class AutoHead:
5+
"""
6+
AutoHead is a class that automatically instantiates a video model head from a config.
7+
"""
8+
9+
@classmethod
10+
def from_config(cls, config: Dict):
11+
head_class_name = config.get("name")
12+
if head_class_name == "LinearHead":
13+
from video_transformers.heads import LinearHead
14+
15+
hidden_size = config.get("hidden_size")
16+
num_classes = config.get("num_classes")
17+
dropout_p = config.get("dropout_p")
18+
return LinearHead(hidden_size, num_classes, dropout_p)
19+
else:
20+
raise ValueError(f"Unsupported head class name: {head_class_name}")

0 commit comments

Comments
 (0)