Skip to content

Commit 2f20cb4

Browse files
bcheng0127igcbot
authored andcommitted
Changes in code.
1 parent 77ae407 commit 2f20cb4

File tree

5 files changed

+35
-195
lines changed

5 files changed

+35
-195
lines changed

visa/G4_Declare.h

-7
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,6 @@ class G4_Declare {
9494
uint16_t forceSpilled : 1;
9595
uint16_t exclusiveLoad : 1;
9696
uint16_t isCmpUseOnly : 1;
97-
// indicate if the declare is local referenced only
98-
// Especially for the variable with pseodu_kill,
99-
// while will be removed in removeLifetimeOps pass.
100-
uint16_t isBBLocal : 1;
10197

10298
unsigned declId; // global decl id for this builder
10399

@@ -338,9 +334,6 @@ class G4_Declare {
338334
void setIsCmpUseOnly(bool b) { isCmpUseOnly = b; }
339335
bool getIsCmpUseOnly() const { return isCmpUseOnly; }
340336

341-
void setIsBBLocal(bool b) { isBBLocal = b; }
342-
bool getIsBBLocal() const { return isBBLocal; }
343-
344337
unsigned getNumRegNeeded() const;
345338

346339
void emit(std::ostream &output) const;

visa/LocalDataflow.cpp

-64
Original file line numberDiff line numberDiff line change
@@ -453,47 +453,6 @@ static void processReadOpnds(G4_BB *BB, G4_INST *Inst, LocalLivenessInfo &LLI) {
453453
}
454454
}
455455

456-
static void
457-
processReadOpndsForPseudoKill(G4_BB *BB, G4_INST *Inst,
458-
std::unordered_set<G4_Declare *> &pseudoKills) {
459-
if (Inst->isPseudoKill()) {
460-
return;
461-
}
462-
// (1) Indirect dst operand reads address.
463-
G4_DstRegRegion *Dst = Inst->getDst();
464-
if (Dst && Dst->isIndirect()) {
465-
G4_Declare *dcl = Dst->getTopDcl();
466-
if (pseudoKills.find(dcl) != pseudoKills.end()) {
467-
dcl->setIsBBLocal(false);
468-
pseudoKills.erase(dcl);
469-
}
470-
}
471-
472-
// (2) Direct and indirect source operands.
473-
for (auto OpNum :
474-
{Gen4_Operand_Number::Opnd_src0, Gen4_Operand_Number::Opnd_src1,
475-
Gen4_Operand_Number::Opnd_src2, Gen4_Operand_Number::Opnd_src3,
476-
Gen4_Operand_Number::Opnd_src4, Gen4_Operand_Number::Opnd_src5,
477-
Gen4_Operand_Number::Opnd_src6, Gen4_Operand_Number::Opnd_src7,
478-
Gen4_Operand_Number::Opnd_pred, Gen4_Operand_Number::Opnd_implAccSrc}) {
479-
G4_Operand *opnd = Inst->getOperand(OpNum);
480-
if (opnd == nullptr || opnd->isImm() || opnd->isNullReg() ||
481-
opnd->isLabel())
482-
continue;
483-
484-
G4_Declare *dcl = nullptr;
485-
if (Inst->isPseudoAddrMovIntrinsic()) {
486-
dcl =opnd->asAddrExp()->getRegVar()->getDeclare();
487-
} else {
488-
dcl = opnd->getTopDcl();
489-
}
490-
if (pseudoKills.find(dcl) != pseudoKills.end()) {
491-
dcl->setIsBBLocal(false);
492-
pseudoKills.erase(dcl);
493-
}
494-
}
495-
}
496-
497456
// Process writes. If this is a partial definition, then record this partial
498457
// definition. When all partial definitions together define this live read node,
499458
// it is killed and du/ud links are added.
@@ -528,12 +487,6 @@ static void processWriteOpnds(G4_BB *BB, G4_INST *Inst,
528487
}
529488

530489
void FlowGraph::localDataFlowAnalysis() {
531-
// For pseudo kill varaible
532-
// If there is use exposed in a BB, it's treated as global.
533-
// Otherwise, it's treated as local even the same pseudo kill may appear in
534-
// multiple BBs
535-
std::unordered_set<G4_Declare *> pesudoKilledDcls;
536-
537490
for (auto BB : BBs) {
538491
LocalLivenessInfo LLI(!BB->isAllLaneActive());
539492
for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) {
@@ -551,24 +504,7 @@ void FlowGraph::localDataFlowAnalysis() {
551504
continue;
552505
}
553506
processWriteOpnds(BB, Inst, LLI);
554-
555-
if (Inst->isPseudoKill() && Inst->getDst() && !Inst->getDst()->isNullReg()) {
556-
G4_Declare *dcl = Inst->getDst()->getTopDcl();
557-
pesudoKilledDcls.insert(dcl);
558-
// In case the use in anther BB is analyzed before define
559-
if (!globalOpndHT.isOpndGlobal(Inst->getDst())) {
560-
G4_Declare *dcl = Inst->getDst()->getTopDcl();
561-
dcl->setIsBBLocal(true);
562-
}
563-
}
564-
565507
processReadOpnds(BB, Inst, LLI);
566-
if (pesudoKilledDcls
567-
.size()) { // Process the operand using variable which
568-
// has psuedo kill. Since the scan is from back to
569-
// front, exposed use will make variable global
570-
processReadOpndsForPseudoKill(BB, Inst, pesudoKilledDcls);
571-
}
572508
}
573509

574510
// All left over live nodes are global.

visa/Optimizer.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -834,7 +834,7 @@ void Optimizer::s0SubAfterRA() {
834834
kernel.fg.resetLocalDataFlowData();
835835
kernel.fg.localDataFlowAnalysis();
836836

837-
SRSubPassAfterRA s0Sub(builder, kernel);
837+
SRSubPassBeforeRA s0Sub(builder, kernel);
838838
s0Sub.run();
839839
}
840840

visa/Passes/SRSubstitution.cpp

+25-114
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ static bool regSortCompare(regMap map1, regMap map2) {
1919
return false;
2020
}
2121

22-
static bool regSortCompareAfterRA(regMapBRA map1, regMapBRA map2) {
22+
static bool regSortCompareBeforeRA(regMapBRA map1, regMapBRA map2) {
2323
if (map1.opndNum < map2.opndNum) {
2424
return true;
2525
} else if (map1.opndNum > map2.opndNum) {
@@ -430,7 +430,7 @@ void SRSubPass::SRSub(G4_BB *bb) {
430430

431431
// Check if current instruction is the candidate of sendi.
432432
// Recorded as candidate.
433-
bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
433+
bool SRSubPassBeforeRA::isSRCandidateBeforeRA(G4_INST *inst,
434434
regCandidatesBRA &dstSrcRegs) {
435435
if (!inst->isSend()) {
436436
return false;
@@ -482,7 +482,6 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
482482
int movInstNum = 0;
483483
int32_t firstDefID = 0x7FFFFFFF; // the ID of the first instruction define the
484484
std::vector<std::pair<Gen4_Operand_Number, unsigned>> notRemoveableMap;
485-
std::vector<G4_INST *> immMovs;
486485
for (auto I = inst->def_begin(), E = inst->def_end(); I != E; ++I) {
487486
auto &&def = *I;
488487

@@ -573,64 +572,14 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
573572

574573
// It's not global define
575574
if (!(builder.getIsKernel() && kernel.fg.getNumBB() == 1)) {
576-
if (kernel.fg.globalOpndHT.isOpndGlobal(dstRgn) && !dstRgn->getTopDcl()->getIsBBLocal()) {
575+
if (kernel.fg.globalOpndHT.isOpndGlobal(dstRgn)) {
577576
return false;
578577
}
579578
}
580579

581580
return true;
582581
};
583582

584-
// mov (16) r81.0<1>:f 0x8:f // $52:&54:
585-
// mov (16|M16) r89.0<1>:f 0x8:f // $53:&55:
586-
// mov (16) r82.0<1>:f 0x0:f // $54:&56:
587-
// mov (16|M16) r90.0<1>:f 0x0:f // $55:&57:
588-
// mov (16) r83.0<1>:f 0x0:f // $56:&58:
589-
// mov (16|M16) r91.0<1>:f 0x0:f // $57:&59:
590-
// mov (16) r84.0<1>:f 0x0:f // $58:&60:
591-
// mov (16|M16) r92.0<1>:f 0x0:f // $59:&61:
592-
// mov (16) r85.0<1>:f 0x0:f // $60:&62:
593-
// mov (16|M16) r93.0<1>:f 0x0:f // $61:&63:
594-
// mov (16) r86.0<1>:f 0x0:f // $62:&64:
595-
// mov (16|M16) r94.0<1>:f 0x0:f // $63:&65:
596-
// mov (16) r87.0<1>:f 0x0:f // $64:&66:
597-
// mov (16|M16) r95.0<1>:f 0x0:f // $65:&67:
598-
// mov (16) r88.0<1>:f 0x0:f // $66:&68:
599-
// mov (16|M16) r96.0<1>:f 0x0:f // $67:&69:
600-
// ==>
601-
// mov (16) r81.0<1>:f 0x8:f // $52:&54:
602-
// mov (16|M16) r89.0<1>:f 0x8:f // $53:&55:
603-
// mov (16) r82.0<1>:f 0x0:f // $54:&56:
604-
// mov (16|M16) r90.0<1>:f 0x0:f // $55:&57:
605-
//
606-
// Reuse r81, r89, r82, r90 in the gather send
607-
auto getRemoveableImm = [this](G4_INST *inst,
608-
std::vector<G4_INST *> &immMovs) {
609-
G4_Operand *src = inst->getSrc(0);
610-
int64_t imm = src->asImm()->getImm();
611-
for (size_t i = 0; i < immMovs.size(); i++) {
612-
G4_INST *imov = immMovs[i];
613-
G4_Operand *isrc = imov->getSrc(0);
614-
int64_t iimm = isrc->asImm()->getImm();
615-
if (imm == iimm &&
616-
src->getType() == isrc->getType() && // Same value and same type
617-
inst->getDst()->getType() ==
618-
imov->getDst()->getType() && // Same dst type
619-
inst->getDst()->asDstRegRegion()->getHorzStride() ==
620-
imov->getDst()
621-
->asDstRegRegion()
622-
->getHorzStride() && // Same region
623-
inst->getExecSize() == imov->getExecSize() && // Same execution size
624-
inst->getMaskOffset() ==
625-
imov->getMaskOffset()) { // Same mask offset
626-
return imov;
627-
}
628-
}
629-
immMovs.push_back(inst);
630-
631-
return (G4_INST *)nullptr;
632-
};
633-
634583
//if opndNum + offset is defined multiple times, cannobe be removed
635584
G4_Operand *dst = movInst->getDst();
636585
unsigned offset = dst->getLeftBound() / builder.getGRFSize();
@@ -655,22 +604,6 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
655604
movInstNum++;
656605
}
657606
} else {
658-
if (movInst->getSrc(0) && movInst->getSrc(0)->isImm()) {
659-
// Check if there is mov instruction with same imm value
660-
G4_INST *lvnMov = getRemoveableImm(movInst, immMovs);
661-
662-
if (lvnMov) {
663-
// The offset is the offset of original dst, which is used to identify
664-
// the original register used in send.
665-
// The opndNum is the opndNum of send.
666-
regMapBRA regPair(movInst, opndNum, offset,
667-
lvnMov->getDst()); // the lvn mov dst can be reused
668-
dstSrcRegs.dstSrcMap.push_back(regPair);
669-
firstDefID = std::min(firstDefID, def.first->getLocalId());
670-
movInstNum++;
671-
continue;
672-
}
673-
}
674607
notRemoveableMap.push_back(std::make_pair(opndNum, offset));
675608
}
676609
}
@@ -706,14 +639,14 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
706639
dstSrcRegs.firstDefID = firstDefID;
707640
// Sort according to the register order in the original payload
708641
std::sort(dstSrcRegs.dstSrcMap.begin(), dstSrcRegs.dstSrcMap.end(),
709-
regSortCompareAfterRA);
642+
regSortCompareBeforeRA);
710643

711644
return true;
712645
}
713646

714647
// Replace the send instruction with the payload of
715648
// Insert the scalar register intialization mov instructions.
716-
bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
649+
bool SRSubPassBeforeRA::replaceWithSendiBeforeRA(G4_BB *bb,
717650
INST_LIST_ITER instIter,
718651
regCandidatesBRA &dstSrcRegs) {
719652
G4_INST *inst = *instIter;
@@ -851,7 +784,7 @@ bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
851784
return true;
852785
}
853786

854-
void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
787+
void SRSubPassBeforeRA::SRSubBeforeRA(G4_BB *bb) {
855788
bb->resetLocalIds();
856789

857790
class CmpFirstDef {
@@ -870,7 +803,7 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
870803
G4_INST *inst = *ii;
871804

872805
regCandidatesBRA dstSrcRegs;
873-
if (!isSRCandidateAfterRA(inst, dstSrcRegs)) {
806+
if (!isSRCandidateBeforeRA(inst, dstSrcRegs)) {
874807
ii++;
875808
dstSrcRegs.dstSrcMap.clear();
876809
continue;
@@ -907,26 +840,12 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
907840
candidatesIt = candidates.find(inst);
908841
//Is candidate send
909842
if (candidatesIt != candidates.end()) {
843+
bool overwrite = false;
910844
// Scan backward from the send instruction.
911845
INST_LIST_RITER scan_ri = ri;
912846
scan_ri++;
913847
G4_INST *rInst = *scan_ri;
914-
915848
while (rInst->getLocalId() > candidates[inst].firstDefID) {
916-
if (rInst->isDead()) {
917-
// If the inst is marked as dead, it's dst will not kill other value
918-
// Such as in following case, if third instruction is removed, r64
919-
// value of first instruction is kept.
920-
// mov (16) r16.0<1>:ud r64.0<1;1,0>:ud // $214:&226:
921-
// mov (16) r17.0<1>:ud r66.0<1;1,0>:ud // $216:&228:
922-
// mov (16) r64.0<1>:ud r68.0<1;1,0>:ud // $218:&230:
923-
scan_ri++;
924-
if (scan_ri == rend) {
925-
break;
926-
}
927-
rInst = *scan_ri;
928-
continue;
929-
}
930849
G4_Operand *dst = rInst->getDst();
931850
if (dst && !dst->isNullReg()) {
932851
G4_VarBase *base = dst->getBase();
@@ -960,22 +879,16 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
960879
G4_Operand *dst = rInst->getDst();
961880
unsigned short dstRegLB = dst->getLinearizedStart();
962881
unsigned short dstRegRB = dst->getLinearizedEnd();
882+
for (int i = 0; i < (int)candidates[inst].dstSrcMap.size(); i++) {
883+
int srcRegLB =
884+
candidates[inst].dstSrcMap[i].opnd->getLinearizedStart();
885+
int srcRegRB =
886+
candidates[inst].dstSrcMap[i].opnd->getLinearizedEnd();
963887

964-
// There is any none removeable offset, the offset define move
965-
// cannot be removed.
966-
std::vector<regMapBRA>::iterator dstSrcRegsIter;
967-
for (dstSrcRegsIter = candidates[inst].dstSrcMap.begin();
968-
dstSrcRegsIter != candidates[inst].dstSrcMap.end();) {
969-
std::vector<regMapBRA>::iterator nextIter = dstSrcRegsIter;
970-
nextIter++;
971-
int srcRegLB = (*dstSrcRegsIter).opnd->getLinearizedStart();
972-
int srcRegRB = (*dstSrcRegsIter).opnd->getLinearizedEnd();
973888
if (!(srcRegRB < dstRegLB || srcRegLB > dstRegRB)) {
974889
// Register is reused.
975-
dstSrcRegsIter =
976-
candidates[inst].dstSrcMap.erase(dstSrcRegsIter);
977-
} else {
978-
dstSrcRegsIter = nextIter;
890+
overwrite = true;
891+
break;
979892
}
980893
}
981894
}
@@ -987,24 +900,22 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
987900
}
988901
rInst = *scan_ri;
989902
}
990-
991-
// Due to extra mov for s0, so don't use s0 if equal or less than 1 mov
992-
// inst can be removed.
993-
if (candidates[inst].dstSrcMap.size() <= 1 &&
994-
builder.getuint32Option(vISA_EnableGatherWithImmPreRA) !=
995-
INDIRECT_TYPE::ALWAYS_S0) {
903+
if (overwrite) {
996904
candidates.erase(candidatesIt);
997-
} else {
998-
for (int j = 0; j < (int)candidatesIt->second.dstSrcMap.size(); j++) {
999-
G4_INST *movInst = candidatesIt->second.dstSrcMap[j].inst;
1000-
movInst->markDead();
1001-
}
1002905
}
1003906
}
1004907

1005908
ri++;
1006909
}
1007910

911+
for (candidatesIt = candidates.begin(); candidatesIt != candidates.end();
912+
candidatesIt++) {
913+
for (int i = 0; i < (int)candidatesIt->second.dstSrcMap.size(); i++) {
914+
G4_INST *movInst = candidatesIt->second.dstSrcMap[i].inst;
915+
movInst->markDead();
916+
}
917+
}
918+
1008919
// Replace the send instruction with sendi
1009920
// Remove the mov instructions that marked as dead
1010921
INST_LIST_ITER iter;
@@ -1015,7 +926,7 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
1015926

1016927
candidatesIt = candidates.find(inst);
1017928
if (candidatesIt != candidates.end()) {
1018-
replaceWithSendiAfterRA(bb, curIter, candidates[inst]);
929+
replaceWithSendiBeforeRA(bb, curIter, candidates[inst]);
1019930
}
1020931
if (inst->isDead()) {
1021932
bb->erase(curIter);

0 commit comments

Comments
 (0)