@@ -19,7 +19,7 @@ static bool regSortCompare(regMap map1, regMap map2) {
19
19
return false ;
20
20
}
21
21
22
- static bool regSortCompareAfterRA (regMapBRA map1, regMapBRA map2) {
22
+ static bool regSortCompareBeforeRA (regMapBRA map1, regMapBRA map2) {
23
23
if (map1.opndNum < map2.opndNum ) {
24
24
return true ;
25
25
} else if (map1.opndNum > map2.opndNum ) {
@@ -430,7 +430,7 @@ void SRSubPass::SRSub(G4_BB *bb) {
430
430
431
431
// Check if current instruction is the candidate of sendi.
432
432
// Recorded as candidate.
433
- bool SRSubPassAfterRA::isSRCandidateAfterRA (G4_INST *inst,
433
+ bool SRSubPassBeforeRA::isSRCandidateBeforeRA (G4_INST *inst,
434
434
regCandidatesBRA &dstSrcRegs) {
435
435
if (!inst->isSend ()) {
436
436
return false ;
@@ -482,7 +482,6 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
482
482
int movInstNum = 0 ;
483
483
int32_t firstDefID = 0x7FFFFFFF ; // the ID of the first instruction define the
484
484
std::vector<std::pair<Gen4_Operand_Number, unsigned >> notRemoveableMap;
485
- std::vector<G4_INST *> immMovs;
486
485
for (auto I = inst->def_begin (), E = inst->def_end (); I != E; ++I) {
487
486
auto &&def = *I;
488
487
@@ -573,90 +572,14 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
573
572
574
573
// It's not global define
575
574
if (!(builder.getIsKernel () && kernel.fg .getNumBB () == 1 )) {
576
- if (kernel.fg .globalOpndHT .isOpndGlobal (dstRgn) && !dstRgn-> getTopDcl ()-> getIsBBLocal () ) {
575
+ if (kernel.fg .globalOpndHT .isOpndGlobal (dstRgn)) {
577
576
return false ;
578
577
}
579
578
}
580
579
581
580
return true ;
582
581
};
583
582
584
- // mov (16) r81.0<1>:f 0x8:f // $52:&54:
585
- // mov (16|M16) r89.0<1>:f 0x8:f // $53:&55:
586
- // mov (16) r82.0<1>:f 0x0:f // $54:&56:
587
- // mov (16|M16) r90.0<1>:f 0x0:f // $55:&57:
588
- // mov (16) r83.0<1>:f 0x0:f // $56:&58:
589
- // mov (16|M16) r91.0<1>:f 0x0:f // $57:&59:
590
- // mov (16) r84.0<1>:f 0x0:f // $58:&60:
591
- // mov (16|M16) r92.0<1>:f 0x0:f // $59:&61:
592
- // mov (16) r85.0<1>:f 0x0:f // $60:&62:
593
- // mov (16|M16) r93.0<1>:f 0x0:f // $61:&63:
594
- // mov (16) r86.0<1>:f 0x0:f // $62:&64:
595
- // mov (16|M16) r94.0<1>:f 0x0:f // $63:&65:
596
- // mov (16) r87.0<1>:f 0x0:f // $64:&66:
597
- // mov (16|M16) r95.0<1>:f 0x0:f // $65:&67:
598
- // mov (16) r88.0<1>:f 0x0:f // $66:&68:
599
- // mov (16|M16) r96.0<1>:f 0x0:f // $67:&69:
600
- // ==>
601
- // mov (16) r81.0<1>:f 0x8:f // $52:&54:
602
- // mov (16|M16) r89.0<1>:f 0x8:f // $53:&55:
603
- // mov (16) r82.0<1>:f 0x0:f // $54:&56:
604
- // mov (16|M16) r90.0<1>:f 0x0:f // $55:&57:
605
- //
606
- // Reuse r81, r89, r82, r90 in the gather send
607
- auto getRemoveableImm = [this ](G4_INST *inst,
608
- std::vector<G4_INST *> &immMovs) {
609
- // The instruction is only used for payload preparation.
610
- if (inst->use_size () != 1 ) {
611
- return (G4_INST *)nullptr ;
612
- }
613
-
614
- G4_DstRegRegion *dst = inst->getDst ();
615
- // dst GRF aligned and contigous
616
- if (dst->getSubRegOff () || dst->getHorzStride () != 1 ) {
617
- return (G4_INST *)nullptr ;
618
- }
619
-
620
- if (kernel.fg .globalOpndHT .isOpndGlobal (dst)) {
621
- return (G4_INST *)nullptr ;
622
- }
623
-
624
- // GRF Alignment with physical register assigned
625
- if (dst->getLinearizedStart () % builder.getGRFSize () != 0 ) {
626
- return (G4_INST *)nullptr ;
627
- }
628
-
629
- // If the destination operand size is less than 1 GRF
630
- if ((dst->getLinearizedEnd () - dst->getLinearizedStart () + 1 ) <
631
- builder.getGRFSize ()) {
632
- return (G4_INST *)nullptr ;
633
- }
634
-
635
- G4_Operand *src = inst->getSrc (0 );
636
- int64_t imm = src->asImm ()->getImm ();
637
- for (size_t i = 0 ; i < immMovs.size (); i++) {
638
- G4_INST *imov = immMovs[i];
639
- G4_Operand *isrc = imov->getSrc (0 );
640
- int64_t iimm = isrc->asImm ()->getImm ();
641
- if (imm == iimm &&
642
- src->getType () == isrc->getType () && // Same value and same type
643
- inst->getDst ()->getType () ==
644
- imov->getDst ()->getType () && // Same dst type
645
- inst->getDst ()->asDstRegRegion ()->getHorzStride () ==
646
- imov->getDst ()
647
- ->asDstRegRegion ()
648
- ->getHorzStride () && // Same region
649
- inst->getExecSize () == imov->getExecSize () && // Same execution size
650
- inst->getMaskOffset () ==
651
- imov->getMaskOffset ()) { // Same mask offset
652
- return imov;
653
- }
654
- }
655
- immMovs.push_back (inst);
656
-
657
- return (G4_INST *)nullptr ;
658
- };
659
-
660
583
// if opndNum + offset is defined multiple times, cannobe be removed
661
584
G4_Operand *dst = movInst->getDst ();
662
585
unsigned offset = dst->getLeftBound () / builder.getGRFSize ();
@@ -681,22 +604,6 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
681
604
movInstNum++;
682
605
}
683
606
} else {
684
- if (movInst->getSrc (0 ) && movInst->getSrc (0 )->isImm ()) {
685
- // Check if there is mov instruction with same imm value
686
- G4_INST *lvnMov = getRemoveableImm (movInst, immMovs);
687
-
688
- if (lvnMov) {
689
- // The offset is the offset of original dst, which is used to identify
690
- // the original register used in send.
691
- // The opndNum is the opndNum of send.
692
- regMapBRA regPair (movInst, opndNum, offset,
693
- lvnMov->getDst ()); // the lvn mov dst can be reused
694
- dstSrcRegs.dstSrcMap .push_back (regPair);
695
- firstDefID = std::min (firstDefID, def.first ->getLocalId ());
696
- movInstNum++;
697
- continue ;
698
- }
699
- }
700
607
notRemoveableMap.push_back (std::make_pair (opndNum, offset));
701
608
}
702
609
}
@@ -732,14 +639,14 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
732
639
dstSrcRegs.firstDefID = firstDefID;
733
640
// Sort according to the register order in the original payload
734
641
std::sort (dstSrcRegs.dstSrcMap .begin (), dstSrcRegs.dstSrcMap .end (),
735
- regSortCompareAfterRA );
642
+ regSortCompareBeforeRA );
736
643
737
644
return true ;
738
645
}
739
646
740
647
// Replace the send instruction with the payload of
741
648
// Insert the scalar register intialization mov instructions.
742
- bool SRSubPassAfterRA::replaceWithSendiAfterRA (G4_BB *bb,
649
+ bool SRSubPassBeforeRA::replaceWithSendiBeforeRA (G4_BB *bb,
743
650
INST_LIST_ITER instIter,
744
651
regCandidatesBRA &dstSrcRegs) {
745
652
G4_INST *inst = *instIter;
@@ -877,7 +784,7 @@ bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
877
784
return true ;
878
785
}
879
786
880
- void SRSubPassAfterRA::SRSubAfterRA (G4_BB *bb) {
787
+ void SRSubPassBeforeRA::SRSubBeforeRA (G4_BB *bb) {
881
788
bb->resetLocalIds ();
882
789
883
790
class CmpFirstDef {
@@ -896,7 +803,7 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
896
803
G4_INST *inst = *ii;
897
804
898
805
regCandidatesBRA dstSrcRegs;
899
- if (!isSRCandidateAfterRA (inst, dstSrcRegs)) {
806
+ if (!isSRCandidateBeforeRA (inst, dstSrcRegs)) {
900
807
ii++;
901
808
dstSrcRegs.dstSrcMap .clear ();
902
809
continue ;
@@ -933,26 +840,12 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
933
840
candidatesIt = candidates.find (inst);
934
841
// Is candidate send
935
842
if (candidatesIt != candidates.end ()) {
843
+ bool overwrite = false ;
936
844
// Scan backward from the send instruction.
937
845
INST_LIST_RITER scan_ri = ri;
938
846
scan_ri++;
939
847
G4_INST *rInst = *scan_ri;
940
-
941
848
while (rInst->getLocalId () > candidates[inst].firstDefID ) {
942
- if (rInst->isDead ()) {
943
- // If the inst is marked as dead, it's dst will not kill other value
944
- // Such as in following case, if third instruction is removed, r64
945
- // value of first instruction is kept.
946
- // mov (16) r16.0<1>:ud r64.0<1;1,0>:ud // $214:&226:
947
- // mov (16) r17.0<1>:ud r66.0<1;1,0>:ud // $216:&228:
948
- // mov (16) r64.0<1>:ud r68.0<1;1,0>:ud // $218:&230:
949
- scan_ri++;
950
- if (scan_ri == rend) {
951
- break ;
952
- }
953
- rInst = *scan_ri;
954
- continue ;
955
- }
956
849
G4_Operand *dst = rInst->getDst ();
957
850
if (dst && !dst->isNullReg ()) {
958
851
G4_VarBase *base = dst->getBase ();
@@ -986,22 +879,16 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
986
879
G4_Operand *dst = rInst->getDst ();
987
880
unsigned short dstRegLB = dst->getLinearizedStart ();
988
881
unsigned short dstRegRB = dst->getLinearizedEnd ();
882
+ for (int i = 0 ; i < (int )candidates[inst].dstSrcMap .size (); i++) {
883
+ int srcRegLB =
884
+ candidates[inst].dstSrcMap [i].opnd ->getLinearizedStart ();
885
+ int srcRegRB =
886
+ candidates[inst].dstSrcMap [i].opnd ->getLinearizedEnd ();
989
887
990
- // There is any none removeable offset, the offset define move
991
- // cannot be removed.
992
- std::vector<regMapBRA>::iterator dstSrcRegsIter;
993
- for (dstSrcRegsIter = candidates[inst].dstSrcMap .begin ();
994
- dstSrcRegsIter != candidates[inst].dstSrcMap .end ();) {
995
- std::vector<regMapBRA>::iterator nextIter = dstSrcRegsIter;
996
- nextIter++;
997
- int srcRegLB = (*dstSrcRegsIter).opnd ->getLinearizedStart ();
998
- int srcRegRB = (*dstSrcRegsIter).opnd ->getLinearizedEnd ();
999
888
if (!(srcRegRB < dstRegLB || srcRegLB > dstRegRB)) {
1000
889
// Register is reused.
1001
- dstSrcRegsIter =
1002
- candidates[inst].dstSrcMap .erase (dstSrcRegsIter);
1003
- } else {
1004
- dstSrcRegsIter = nextIter;
890
+ overwrite = true ;
891
+ break ;
1005
892
}
1006
893
}
1007
894
}
@@ -1013,24 +900,22 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
1013
900
}
1014
901
rInst = *scan_ri;
1015
902
}
1016
-
1017
- // Due to extra mov for s0, so don't use s0 if equal or less than 1 mov
1018
- // inst can be removed.
1019
- if (candidates[inst].dstSrcMap .size () <= 1 &&
1020
- builder.getuint32Option (vISA_EnableGatherWithImmPreRA) !=
1021
- INDIRECT_TYPE::ALWAYS_S0) {
903
+ if (overwrite) {
1022
904
candidates.erase (candidatesIt);
1023
- } else {
1024
- for (int j = 0 ; j < (int )candidatesIt->second .dstSrcMap .size (); j++) {
1025
- G4_INST *movInst = candidatesIt->second .dstSrcMap [j].inst ;
1026
- movInst->markDead ();
1027
- }
1028
905
}
1029
906
}
1030
907
1031
908
ri++;
1032
909
}
1033
910
911
+ for (candidatesIt = candidates.begin (); candidatesIt != candidates.end ();
912
+ candidatesIt++) {
913
+ for (int i = 0 ; i < (int )candidatesIt->second .dstSrcMap .size (); i++) {
914
+ G4_INST *movInst = candidatesIt->second .dstSrcMap [i].inst ;
915
+ movInst->markDead ();
916
+ }
917
+ }
918
+
1034
919
// Replace the send instruction with sendi
1035
920
// Remove the mov instructions that marked as dead
1036
921
INST_LIST_ITER iter;
@@ -1041,7 +926,7 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
1041
926
1042
927
candidatesIt = candidates.find (inst);
1043
928
if (candidatesIt != candidates.end ()) {
1044
- replaceWithSendiAfterRA (bb, curIter, candidates[inst]);
929
+ replaceWithSendiBeforeRA (bb, curIter, candidates[inst]);
1045
930
}
1046
931
if (inst->isDead ()) {
1047
932
bb->erase (curIter);
0 commit comments