TencentYoutuResearch
diff --git a/‎configs/data/megadepth_test_scale_1000.py
+3-9 b/‎configs/data/megadepth_test_scale_1000.py
+3-9
diff --git a/‎configs/data/megadepth_trainval_832.py
+5-5 b/‎configs/data/megadepth_trainval_832.py
+5-5
diff --git a/‎scripts/reproduce_test/googleurban_0.1.sh
+1-1 b/‎scripts/reproduce_test/googleurban_0.1.sh
+1-1
diff --git a/‎scripts/reproduce_test/outdoor_ada_scale.sh
+1-1 b/‎scripts/reproduce_test/outdoor_ada_scale.sh
+1-1
diff --git a/‎scripts/reproduce_test/phototourism_0.1.sh
+1-1 b/‎scripts/reproduce_test/phototourism_0.1.sh
+1-1
diff --git a/‎scripts/reproduce_test/pragueparks_0.1.sh
+1-1 b/‎scripts/reproduce_test/pragueparks_0.1.sh
+1-1
diff --git a/‎scripts/reproduce_test/yfcc100m.sh
+1-1 b/‎scripts/reproduce_test/yfcc100m.sh
+1-1
diff --git a/‎scripts/reproduce_train/32gpus_1bs_832.sh
+3-3 b/‎scripts/reproduce_train/32gpus_1bs_832.sh
+3-3
diff --git a/‎src/adamatcher/ada_module/linear_attention.py
-51 b/‎src/adamatcher/ada_module/linear_attention.py
-51
diff --git a/‎src/adamatcher/ada_module/transformer.py
+1-4 b/‎src/adamatcher/ada_module/transformer.py
+1-4
diff --git a/‎src/adamatcher/adamatcher.py
+2-7 b/‎src/adamatcher/adamatcher.py
+2-7
diff --git a/‎src/adamatcher/backbone/feature_interaction.py
+8-28 b/‎src/adamatcher/backbone/feature_interaction.py
+8-28
diff --git a/‎src/adamatcher/localization/__init__.py b/‎src/adamatcher/localization/__init__.py
@@ -1,25 +1,19 @@
 from configs.data.base import cfg
 
-# TEST_BASE_PATH = "assets/megadepth_test_1500_scene_info" # "./datasets/megadepth/index" # "assets/megadepth_test_1500_scene_info"
+
 TEST_BASE_PATH = "./datasets/megadepth_scale_data/scale_data_0125"
 
 cfg.DATASET.TEST_DATA_SOURCE = "MegaDepth"
 cfg.DATASET.TEST_DATA_ROOT = "./datasets/megadepth/test"
-cfg.DATASET.TEST_NPZ_ROOT = (
-    f"{TEST_BASE_PATH}"  # f"{TEST_BASE_PATH}/scene_info_0.1_0.7" # f"{TEST_BASE_PATH}"
-)
-# cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/megadepth_test_1500.txt" # f"assets/megadepth_test_1500_scene_info/megadepth_test_1500.txt"  # f"{TEST_BASE_PATH}/megadepth_test_1500.txt"
+cfg.DATASET.TEST_NPZ_ROOT = f"{TEST_BASE_PATH}"
+
 cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/test_45.txt"
 # cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/test_34.txt"
 # cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/test_23.txt"
 # cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/test_23.txt"
 # cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/test_25.txt"
 # cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/test_15.txt"
-# cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/test_13.txt"
-
-# cfg.DATASET.TEST_LIST_PATH = f"{TEST_BASE_PATH}/megadepth_test_1500_scale.txt"
 
-# cfg.TRAINER.N_SAMPLES_PER_SUBSET = 100
 
 cfg.DATASET.MGDPT_IMG_RESIZE = 832 
 cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0
@@ -8,19 +8,19 @@
 cfg.DATASET.TRAIN_LIST_PATH = f"{TRAIN_BASE_PATH}/trainvaltest_list/train_list3.txt"
 cfg.DATASET.MIN_OVERLAP_SCORE_TRAIN = 0.0
 
-TEST_BASE_PATH = "./datasets/megadepth/index"
+TEST_BASE_PATH = "./datasets/megadepth_scale_data/scale_data_0125"
 cfg.DATASET.TEST_DATA_SOURCE = "MegaDepth"
 cfg.DATASET.VAL_DATA_ROOT = cfg.DATASET.TEST_DATA_ROOT = "./datasets/megadepth/test"
 cfg.DATASET.VAL_NPZ_ROOT = (
     cfg.DATASET.TEST_NPZ_ROOT
-) = f"{TEST_BASE_PATH}/scene_info_val_1500"
+) = f"{TEST_BASE_PATH}"
 cfg.DATASET.VAL_LIST_PATH = (
     cfg.DATASET.TEST_LIST_PATH
-) = f"{TEST_BASE_PATH}/trainvaltest_list/val_list.txt"
+) = f"{TEST_BASE_PATH}/test_15.txt"
 cfg.DATASET.MIN_OVERLAP_SCORE_TEST = 0.0  # for both test and val
 
 # 368 scenes in total for MegaDepth
 # (with difficulty balanced (further split each scene to 3 sub-scenes))
-cfg.TRAINER.N_SAMPLES_PER_SUBSET = 100  # 100
+cfg.TRAINER.N_SAMPLES_PER_SUBSET = 100
 
-cfg.DATASET.MGDPT_IMG_RESIZE = 832  # for training on 11GB mem GPUs
+cfg.DATASET.MGDPT_IMG_RESIZE = 832
@@ -7,7 +7,7 @@ PROJECT_DIR="${SCRIPTPATH}/../../"
 export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH
 cd $PROJECT_DIR
 
-# data_cfg_path="configs/data/megadepth_test_1500.py"
+# data_cfg_path="configs/data/megadepth_test_4000.py"
 data_cfg_path="configs/data/googleurban_0.1_val.py"
 main_cfg_path="configs/loftr/outdoor/loftr_ds_dense.py"
 ckpt_path="./weights/adamatcher.ckpt"
 
@@ -7,7 +7,7 @@ PROJECT_DIR="${SCRIPTPATH}/../../"
 export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH
 cd $PROJECT_DIR
 
-# data_cfg_path="configs/data/megadepth_test_1500.py"
+# data_cfg_path="configs/data/megadepth_test_4000.py"
 data_cfg_path="configs/data/megadepth_test_scale_1000.py"
 main_cfg_path="configs/loftr/outdoor/loftr_ds_dense.py"
 ckpt_path="weights/adamatcher.ckpt"
 
@@ -7,7 +7,7 @@ PROJECT_DIR="${SCRIPTPATH}/../../"
 export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH
 cd $PROJECT_DIR
 
-# data_cfg_path="configs/data/megadepth_test_1500.py"
+# data_cfg_path="configs/data/megadepth_test_4000.py"
 data_cfg_path="configs/data/phototourism_0.1_val.py"
 main_cfg_path="configs/loftr/outdoor/loftr_ds_dense.py"
 ckpt_path="weights/adamatcher"
 
@@ -7,7 +7,7 @@ PROJECT_DIR="${SCRIPTPATH}/../../"
 export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH
 cd $PROJECT_DIR
 
-# data_cfg_path="configs/data/megadepth_test_1500.py"
+# data_cfg_path="configs/data/megadepth_test_4000.py"
 data_cfg_path="configs/data/pragueparks_0.1_val.py"
 main_cfg_path="configs/loftr/outdoor/loftr_ds_dense.py"
 ckpt_path="weights/adamatcher.ckpt"
 
@@ -7,7 +7,7 @@ PROJECT_DIR="${SCRIPTPATH}/../../"
 export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH
 cd $PROJECT_DIR
 
-# data_cfg_path="configs/data/megadepth_test_1500.py"
+# data_cfg_path="configs/data/megadepth_test_4000.py"
 data_cfg_path="configs/data/yfcc100m.py"
 main_cfg_path="configs/loftr/outdoor/loftr_ds_dense_yfcc.py"
 
 
@@ -8,15 +8,15 @@ export PYTHONPATH=$PROJECT_DIR:$PYTHONPATH
 cd $PROJECT_DIR
 
 TRAIN_IMG_SIZE=832
-data_cfg_path="configs/data/megadepth_all_trainval_${TRAIN_IMG_SIZE}.py"
+data_cfg_path="configs/data/megadepth_trainval_${TRAIN_IMG_SIZE}.py"
 main_cfg_path="configs/loftr/outdoor/loftr_ds_dense.py"
 
 n_nodes=4
 n_gpus_per_node=8 # 1 4 8
 torch_num_workers=8 # 1 4 8
 batch_size=1
 pin_memory=true
-exp_name="resnet_scc_c_scc_2500-${TRAIN_IMG_SIZE}-bs$(($n_gpus_per_node * $n_nodes * $batch_size))"
+exp_name="AdaMatcher-${TRAIN_IMG_SIZE}-bs$(($n_gpus_per_node * $n_nodes * $batch_size))"
 
 python3 -u ./train.py \
     ${data_cfg_path} \
@@ -30,4 +30,4 @@ python3 -u ./train.py \
     --limit_val_batches=1. \
     --num_sanity_val_steps=10 \
     --benchmark=True \
-    --max_epochs=30 >> ./OUTPUT/resnet_scc_c_scc_2500.txt
+    --max_epochs=30 >> ./OUTPUT/AdaMatcher.txt
@@ -103,57 +103,6 @@ def forward(self, q, k, v, q_mask=None, kv_mask=None):
         return message
 
 
-class SimAttention(Module):
-    def __init__(self, eps=1e-6):
-        super().__init__()
-        self.feature_map = elu_feature_map
-        self.eps = eps
-
-        self.linear = torch.nn.Linear(2, 1)
-        self.sigmoid = torch.nn.Sigmoid()
-
-    def forward(self, queries, keys, values, q_mask=None, kv_mask=None):
-        """Multi-Head linear attention proposed in "Transformers are RNNs"
-        Args:
-            queries: [N, L, H, D]
-            keys: [N, S, H, D]
-            values: [N, S, H, D]
-            q_mask: [N, L]
-            kv_mask: [N, S]
-        Returns:
-            queried_values: (N, L, H, D)
-        """
-        # pdb.set_trace()
-        Q = self.feature_map(queries)
-        K = self.feature_map(keys)
-        # Q = queries
-        # K = keys
-
-        # set padded position to zero
-        if q_mask is not None:
-            Q = Q * q_mask[:, :, None, None]
-            queries = queries * q_mask[:, :, None, None]
-        if kv_mask is not None:
-            K = K * kv_mask[:, :, None, None]
-            values = values * kv_mask[:, :, None, None]
-
-        # v_length = values.size(1)
-        # values = values / v_length  # prevent fp16 overflow
-        # KV = torch.einsum("nshd,nshv->nhdv", K, values)  # (S,D)' @ S,V
-        # Z = 1 / (torch.einsum("nlhd,nhd->nlh", Q, K.sum(dim=1)) + self.eps)
-        # queried_values = torch.einsum("nlhd,nhdv,nlh->nlhv", Q, KV, Z) * v_length
-
-        qk = torch.einsum('nlhd,nshd->nlsh', Q, K)  # [n,l,s,h]
-        qk_mean = torch.mean(qk, dim=2, keepdim=True)  # [n,l,1,h]
-        qk_max, _ = torch.max(qk, dim=2, keepdim=True)  # [n,l,1,h]
-        atten = torch.cat([qk_mean, qk_max], dim=2).permute(0, 1, 3,
-                                                            2)  # [n,l,h,2]
-        atten = self.sigmoid(self.linear(atten).squeeze(-1))  # [n,l,h]
-        queried_values = torch.einsum('nlhd,nlh->nlhd', queries, atten)
-        # queried_values = torch.einsum('nlhd,nlh->nlhd', Q, atten)
-        return queried_values.contiguous()
-
-
 class FullAttention(Module):
     def __init__(self, use_dropout=False, attention_dropout=0.1):
         super().__init__()
 
@@ -10,7 +10,6 @@
     FullAttention,
     LinearAttention,
     MultiHeadAttention,
-    SimAttention,
 )
 
 
@@ -28,8 +27,6 @@ def __init__(self, d_model, nhead, attention="linear"):
         self.attention = (
             LinearAttention()
             if attention == "linear"
-            else SimAttention()
-            if attention == "sim"
             else FullAttention()
         )
         self.merge = nn.Linear(d_model, d_model, bias=False)
@@ -89,7 +86,7 @@ def __init__(self, config):
         self.d_model = config["d_model"]
         self.nhead = config["nhead"]
         self.layer_names = config["layer_names"]
-        encoder_layer = LoFTREncoderLayer(
+        encoder_layer = EncoderLayer(
             config["d_model"], config["nhead"], config["attention"]
         )
         self.layers = nn.ModuleList(
 
@@ -28,8 +28,8 @@ def __init__(self, config, training=True):
         self.pos_encoding = PositionEncodingSine(
             config["coarse"]["d_model"], max_shape=(512, 512)
         )
-        self.backbone = build_backbone(config)  # ResNetFPN_64_8_2
-        self.feature_interaction = FICAS()  # FeatureAttention()
+        self.backbone = build_backbone(config)
+        self.feature_interaction = FICAS()
 
         self.coarse_module = CoarseModule(config["match_coarse"], config["resolution"])
         self.fine_module = FineModule(config["resolution"])
@@ -102,13 +102,8 @@ def forward(self, data):
             {"cas_score0": cas_score0, "cas_score1": cas_score1}  # [N, h0_l1, w0_l1]
         )  # [N, h1_l1, w1_l1]
 
-        # torch.cuda.synchronize()
-        # self.ficas_time += time.time() - t1
-
         # coarse match
         self.coarse_module(data, mask_feat0, mask_feat1)
 
         # sub-pixel refinement
-        # feat_d2_0 = self.pos_encoding_fine(feat_d2_0)
-        # feat_d2_1 = self.pos_encoding_fine(feat_d2_1)
         self.fine_module(data, feat_d2_0, feat_d2_1, feat_c_0, feat_c_1)
@@ -21,7 +21,7 @@ def make_head_layer(cnv_dim, curr_dim, out_dim, head_name=None):
         # nn.BatchNorm2d(curr_dim, eps=1e-3, momentum=0.01),
         nn.ReLU(inplace=True),
         nn.Conv2d(curr_dim, out_dim, kernel_size=3, stride=1, padding=1),
-    )  # kernel=1, padding=0, bias=True
+    )
 
     for l in fc.modules():
         if isinstance(l, nn.Conv2d):
@@ -72,7 +72,6 @@ def forward(self, x0, x1, x0_mask=None, x1_mask=None, flag=False):
         if x0_mask != None and x1_mask != None:
             x0_mask, x1_mask = x0_mask.flatten(-2), x1_mask.flatten(-2)
 
-        save_feat = []
         if flag is False:
             for i, (layer, name) in enumerate(zip(self.layers, self.layer_names)):
                 if name == "self":
@@ -85,9 +84,6 @@ def forward(self, x0, x1, x0_mask=None, x1_mask=None, flag=False):
                     raise KeyError
                 x0 = layer(x0, src0, x0_mask, src0_mask)
                 x1 = layer(x1, src1, x1_mask, src1_mask)
-                if i == 1:  # i==len(self.layer_names)//2-1:
-                    # print(i, len(self.layer_names))
-                    save_feat.append((x0, x1))
         elif flag == 1:  # origin
             for layer, name in zip(self.layers, self.layer_names):
                 if name == "self":
@@ -109,11 +105,7 @@ def forward(self, x0, x1, x0_mask=None, x1_mask=None, flag=False):
                 else:
                     raise KeyError
 
-        # return feat0, feat1
-        if len(save_feat) > 0:
-            return x0, x1, save_feat
-        else:
-            return x0, x1
+        return x0, x1
 
 
 class SegmentationModule(nn.Module):
@@ -129,22 +121,15 @@ def __init__(self, d_model, num_query):
     def forward(self, x, hs, mask=None):
         # x:[n, 256, h, w]  hs:[n, num_q, 256]
 
-        # TODO: BN
         if mask is not None:
-            # hs = self.encoderlayer(hs, x3_flatten, None, mask_flatten)
             attn_mask = torch.einsum("mqc,mchw->mqhw", hs, x)
-            # attn_mask = self.bn(attn_mask)
-            # attn_mask = attn_mask * self.gamma
             attn_mask = attn_mask.sigmoid() * mask.unsqueeze(1)
             classification = self.block(x * attn_mask + x).sigmoid().squeeze(1) * mask
         else:
-            # hs = self.encoderlayer(hs, x3_flatten)
             attn_mask = torch.einsum("mqc,mchw->mqhw", hs, x)
-            # attn_mask = self.bn(attn_mask)
-            # attn_mask = attn_mask * self.gamma
             attn_mask = attn_mask.sigmoid()
             classification = self.block(x * attn_mask + x).sigmoid().squeeze(1)
-        return classification  # , attn_mask # , mask_feat
+        return classification
 
 
 class FICAS(nn.Module):
@@ -166,7 +151,7 @@ def __init__(self, layer_num=4, d_model=256):
         self.layer_names1 = [
             "self",
             "cross",
-        ]  # ['self', 'cross', 'cross']  # ['self', 'cross']  origin for eccv
+        ]
         self.layers1 = nn.ModuleList(
             [copy.deepcopy(encoder_layer) for _ in range(len(self.layer_names1))]
         )
@@ -186,7 +171,7 @@ def __init__(self, layer_num=4, d_model=256):
         self.layer_names3 = [
             "self",
             "cross",
-        ]  # ['self', 'cross', 'cross']  # ['self', 'cross']  origin for eccv
+        ]
         self.layers3 = nn.ModuleList(
             [copy.deepcopy(encoder_layer) for _ in range(len(self.layer_names3))]
         )
@@ -216,8 +201,7 @@ def transformer(self, x0, x1, x0_mask, x1_mask, layer_name, layer):
             and src1_mask is not None
             and not self.training
             and 0
-        ):  #  \
-            # and layer_name == 'self' and 0:
+        ):
             temp_x = layer(
                 torch.cat([x0, x1], dim=0),
                 torch.cat([src0, src1], dim=0),
@@ -252,8 +236,7 @@ def feature_interaction(self, x0, x1, x0_mask=None, x1_mask=None):
         feature_embed1 = self.feature_embed.weight.unsqueeze(0).repeat(bs, 1, 1)
         tgt0 = torch.zeros_like(feature_embed0)
         tgt1 = torch.zeros_like(feature_embed1)
-        # hs0 = self.decoder(tgt0, x0, tgt_mask=None, memory_mask=x0_mask)
-        # hs1 = self.decoder(tgt1, x1, tgt_mask=None, memory_mask=x1_mask)
+
         if (
             0
         ):  # x0.shape==x1.shape and x0_mask is not None and x0_mask.shape==x1_mask.shape:
@@ -331,10 +314,7 @@ def forward(self, x0, x1, x0_mask=None, x1_mask=None, use_cas=True):
         out0, out1, hs0, hs1, x0_mid, x1_mid = self.feature_interaction(
             x0, x1, x0_mask, x1_mask
         )
-        # out0 = rearrange(out0, 'n (h w) c -> n c h w',
-        #                  h=h0, w=w0).contiguous()
-        # out1 = rearrange(out1, 'n (h w) c -> n c h w',
-        #                  h=h1, w=w1).contiguous()
+
         if use_cas:
             x0_mid = rearrange(x0_mid, "n (h w) c -> n c h w", h=h0, w=w0).contiguous()
             x1_mid = rearrange(x1_mid, "n (h w) c -> n c h w", h=h1, w=w1).contiguous()