Skip to content

Commit e37fe2e

Browse files
bcheng0127igcbot
authored andcommitted
Changes in code.
1 parent 110047b commit e37fe2e

File tree

5 files changed

+35
-221
lines changed

5 files changed

+35
-221
lines changed

visa/G4_Declare.h

-7
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,6 @@ class G4_Declare {
9494
uint16_t forceSpilled : 1;
9595
uint16_t exclusiveLoad : 1;
9696
uint16_t isCmpUseOnly : 1;
97-
// indicate if the declare is local referenced only
98-
// Especially for the variable with pseodu_kill,
99-
// while will be removed in removeLifetimeOps pass.
100-
uint16_t isBBLocal : 1;
10197

10298
unsigned declId; // global decl id for this builder
10399

@@ -338,9 +334,6 @@ class G4_Declare {
338334
void setIsCmpUseOnly(bool b) { isCmpUseOnly = b; }
339335
bool getIsCmpUseOnly() const { return isCmpUseOnly; }
340336

341-
void setIsBBLocal(bool b) { isBBLocal = b; }
342-
bool getIsBBLocal() const { return isBBLocal; }
343-
344337
unsigned getNumRegNeeded() const;
345338

346339
void emit(std::ostream &output) const;

visa/LocalDataflow.cpp

-64
Original file line numberDiff line numberDiff line change
@@ -453,47 +453,6 @@ static void processReadOpnds(G4_BB *BB, G4_INST *Inst, LocalLivenessInfo &LLI) {
453453
}
454454
}
455455

456-
static void
457-
processReadOpndsForPseudoKill(G4_BB *BB, G4_INST *Inst,
458-
std::unordered_set<G4_Declare *> &pseudoKills) {
459-
if (Inst->isPseudoKill()) {
460-
return;
461-
}
462-
// (1) Indirect dst operand reads address.
463-
G4_DstRegRegion *Dst = Inst->getDst();
464-
if (Dst && Dst->isIndirect()) {
465-
G4_Declare *dcl = Dst->getTopDcl();
466-
if (pseudoKills.find(dcl) != pseudoKills.end()) {
467-
dcl->setIsBBLocal(false);
468-
pseudoKills.erase(dcl);
469-
}
470-
}
471-
472-
// (2) Direct and indirect source operands.
473-
for (auto OpNum :
474-
{Gen4_Operand_Number::Opnd_src0, Gen4_Operand_Number::Opnd_src1,
475-
Gen4_Operand_Number::Opnd_src2, Gen4_Operand_Number::Opnd_src3,
476-
Gen4_Operand_Number::Opnd_src4, Gen4_Operand_Number::Opnd_src5,
477-
Gen4_Operand_Number::Opnd_src6, Gen4_Operand_Number::Opnd_src7,
478-
Gen4_Operand_Number::Opnd_pred, Gen4_Operand_Number::Opnd_implAccSrc}) {
479-
G4_Operand *opnd = Inst->getOperand(OpNum);
480-
if (opnd == nullptr || opnd->isImm() || opnd->isNullReg() ||
481-
opnd->isLabel())
482-
continue;
483-
484-
G4_Declare *dcl = nullptr;
485-
if (Inst->isPseudoAddrMovIntrinsic()) {
486-
dcl =opnd->asAddrExp()->getRegVar()->getDeclare();
487-
} else {
488-
dcl = opnd->getTopDcl();
489-
}
490-
if (pseudoKills.find(dcl) != pseudoKills.end()) {
491-
dcl->setIsBBLocal(false);
492-
pseudoKills.erase(dcl);
493-
}
494-
}
495-
}
496-
497456
// Process writes. If this is a partial definition, then record this partial
498457
// definition. When all partial definitions together define this live read node,
499458
// it is killed and du/ud links are added.
@@ -528,12 +487,6 @@ static void processWriteOpnds(G4_BB *BB, G4_INST *Inst,
528487
}
529488

530489
void FlowGraph::localDataFlowAnalysis() {
531-
// For pseudo kill varaible
532-
// If there is use exposed in a BB, it's treated as global.
533-
// Otherwise, it's treated as local even the same pseudo kill may appear in
534-
// multiple BBs
535-
std::unordered_set<G4_Declare *> pesudoKilledDcls;
536-
537490
for (auto BB : BBs) {
538491
LocalLivenessInfo LLI(!BB->isAllLaneActive());
539492
for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) {
@@ -551,24 +504,7 @@ void FlowGraph::localDataFlowAnalysis() {
551504
continue;
552505
}
553506
processWriteOpnds(BB, Inst, LLI);
554-
555-
if (Inst->isPseudoKill() && Inst->getDst() && !Inst->getDst()->isNullReg()) {
556-
G4_Declare *dcl = Inst->getDst()->getTopDcl();
557-
pesudoKilledDcls.insert(dcl);
558-
// In case the use in anther BB is analyzed before define
559-
if (!globalOpndHT.isOpndGlobal(Inst->getDst())) {
560-
G4_Declare *dcl = Inst->getDst()->getTopDcl();
561-
dcl->setIsBBLocal(true);
562-
}
563-
}
564-
565507
processReadOpnds(BB, Inst, LLI);
566-
if (pesudoKilledDcls
567-
.size()) { // Process the operand using variable which
568-
// has psuedo kill. Since the scan is from back to
569-
// front, exposed use will make variable global
570-
processReadOpndsForPseudoKill(BB, Inst, pesudoKilledDcls);
571-
}
572508
}
573509

574510
// All left over live nodes are global.

visa/Optimizer.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -834,7 +834,7 @@ void Optimizer::s0SubAfterRA() {
834834
kernel.fg.resetLocalDataFlowData();
835835
kernel.fg.localDataFlowAnalysis();
836836

837-
SRSubPassAfterRA s0Sub(builder, kernel);
837+
SRSubPassBeforeRA s0Sub(builder, kernel);
838838
s0Sub.run();
839839
}
840840

visa/Passes/SRSubstitution.cpp

+25-140
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ static bool regSortCompare(regMap map1, regMap map2) {
1919
return false;
2020
}
2121

22-
static bool regSortCompareAfterRA(regMapBRA map1, regMapBRA map2) {
22+
static bool regSortCompareBeforeRA(regMapBRA map1, regMapBRA map2) {
2323
if (map1.opndNum < map2.opndNum) {
2424
return true;
2525
} else if (map1.opndNum > map2.opndNum) {
@@ -430,7 +430,7 @@ void SRSubPass::SRSub(G4_BB *bb) {
430430

431431
// Check if current instruction is the candidate of sendi.
432432
// Recorded as candidate.
433-
bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
433+
bool SRSubPassBeforeRA::isSRCandidateBeforeRA(G4_INST *inst,
434434
regCandidatesBRA &dstSrcRegs) {
435435
if (!inst->isSend()) {
436436
return false;
@@ -482,7 +482,6 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
482482
int movInstNum = 0;
483483
int32_t firstDefID = 0x7FFFFFFF; // the ID of the first instruction define the
484484
std::vector<std::pair<Gen4_Operand_Number, unsigned>> notRemoveableMap;
485-
std::vector<G4_INST *> immMovs;
486485
for (auto I = inst->def_begin(), E = inst->def_end(); I != E; ++I) {
487486
auto &&def = *I;
488487

@@ -573,90 +572,14 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
573572

574573
// It's not global define
575574
if (!(builder.getIsKernel() && kernel.fg.getNumBB() == 1)) {
576-
if (kernel.fg.globalOpndHT.isOpndGlobal(dstRgn) && !dstRgn->getTopDcl()->getIsBBLocal()) {
575+
if (kernel.fg.globalOpndHT.isOpndGlobal(dstRgn)) {
577576
return false;
578577
}
579578
}
580579

581580
return true;
582581
};
583582

584-
// mov (16) r81.0<1>:f 0x8:f // $52:&54:
585-
// mov (16|M16) r89.0<1>:f 0x8:f // $53:&55:
586-
// mov (16) r82.0<1>:f 0x0:f // $54:&56:
587-
// mov (16|M16) r90.0<1>:f 0x0:f // $55:&57:
588-
// mov (16) r83.0<1>:f 0x0:f // $56:&58:
589-
// mov (16|M16) r91.0<1>:f 0x0:f // $57:&59:
590-
// mov (16) r84.0<1>:f 0x0:f // $58:&60:
591-
// mov (16|M16) r92.0<1>:f 0x0:f // $59:&61:
592-
// mov (16) r85.0<1>:f 0x0:f // $60:&62:
593-
// mov (16|M16) r93.0<1>:f 0x0:f // $61:&63:
594-
// mov (16) r86.0<1>:f 0x0:f // $62:&64:
595-
// mov (16|M16) r94.0<1>:f 0x0:f // $63:&65:
596-
// mov (16) r87.0<1>:f 0x0:f // $64:&66:
597-
// mov (16|M16) r95.0<1>:f 0x0:f // $65:&67:
598-
// mov (16) r88.0<1>:f 0x0:f // $66:&68:
599-
// mov (16|M16) r96.0<1>:f 0x0:f // $67:&69:
600-
// ==>
601-
// mov (16) r81.0<1>:f 0x8:f // $52:&54:
602-
// mov (16|M16) r89.0<1>:f 0x8:f // $53:&55:
603-
// mov (16) r82.0<1>:f 0x0:f // $54:&56:
604-
// mov (16|M16) r90.0<1>:f 0x0:f // $55:&57:
605-
//
606-
// Reuse r81, r89, r82, r90 in the gather send
607-
auto getRemoveableImm = [this](G4_INST *inst,
608-
std::vector<G4_INST *> &immMovs) {
609-
// The instruction is only used for payload preparation.
610-
if (inst->use_size() != 1) {
611-
return (G4_INST *)nullptr;
612-
}
613-
614-
G4_DstRegRegion *dst = inst->getDst();
615-
// dst GRF aligned and contigous
616-
if (dst->getSubRegOff() || dst->getHorzStride() != 1) {
617-
return (G4_INST *)nullptr;
618-
}
619-
620-
if (kernel.fg.globalOpndHT.isOpndGlobal(dst)) {
621-
return (G4_INST *)nullptr;
622-
}
623-
624-
// GRF Alignment with physical register assigned
625-
if (dst->getLinearizedStart() % builder.getGRFSize() != 0) {
626-
return (G4_INST *)nullptr;
627-
}
628-
629-
// If the destination operand size is less than 1 GRF
630-
if ((dst->getLinearizedEnd() - dst->getLinearizedStart() + 1) <
631-
builder.getGRFSize()) {
632-
return (G4_INST *)nullptr;
633-
}
634-
635-
G4_Operand *src = inst->getSrc(0);
636-
int64_t imm = src->asImm()->getImm();
637-
for (size_t i = 0; i < immMovs.size(); i++) {
638-
G4_INST *imov = immMovs[i];
639-
G4_Operand *isrc = imov->getSrc(0);
640-
int64_t iimm = isrc->asImm()->getImm();
641-
if (imm == iimm &&
642-
src->getType() == isrc->getType() && // Same value and same type
643-
inst->getDst()->getType() ==
644-
imov->getDst()->getType() && // Same dst type
645-
inst->getDst()->asDstRegRegion()->getHorzStride() ==
646-
imov->getDst()
647-
->asDstRegRegion()
648-
->getHorzStride() && // Same region
649-
inst->getExecSize() == imov->getExecSize() && // Same execution size
650-
inst->getMaskOffset() ==
651-
imov->getMaskOffset()) { // Same mask offset
652-
return imov;
653-
}
654-
}
655-
immMovs.push_back(inst);
656-
657-
return (G4_INST *)nullptr;
658-
};
659-
660583
//if opndNum + offset is defined multiple times, cannobe be removed
661584
G4_Operand *dst = movInst->getDst();
662585
unsigned offset = dst->getLeftBound() / builder.getGRFSize();
@@ -681,22 +604,6 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
681604
movInstNum++;
682605
}
683606
} else {
684-
if (movInst->getSrc(0) && movInst->getSrc(0)->isImm()) {
685-
// Check if there is mov instruction with same imm value
686-
G4_INST *lvnMov = getRemoveableImm(movInst, immMovs);
687-
688-
if (lvnMov) {
689-
// The offset is the offset of original dst, which is used to identify
690-
// the original register used in send.
691-
// The opndNum is the opndNum of send.
692-
regMapBRA regPair(movInst, opndNum, offset,
693-
lvnMov->getDst()); // the lvn mov dst can be reused
694-
dstSrcRegs.dstSrcMap.push_back(regPair);
695-
firstDefID = std::min(firstDefID, def.first->getLocalId());
696-
movInstNum++;
697-
continue;
698-
}
699-
}
700607
notRemoveableMap.push_back(std::make_pair(opndNum, offset));
701608
}
702609
}
@@ -732,14 +639,14 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
732639
dstSrcRegs.firstDefID = firstDefID;
733640
// Sort according to the register order in the original payload
734641
std::sort(dstSrcRegs.dstSrcMap.begin(), dstSrcRegs.dstSrcMap.end(),
735-
regSortCompareAfterRA);
642+
regSortCompareBeforeRA);
736643

737644
return true;
738645
}
739646

740647
// Replace the send instruction with the payload of
741648
// Insert the scalar register intialization mov instructions.
742-
bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
649+
bool SRSubPassBeforeRA::replaceWithSendiBeforeRA(G4_BB *bb,
743650
INST_LIST_ITER instIter,
744651
regCandidatesBRA &dstSrcRegs) {
745652
G4_INST *inst = *instIter;
@@ -877,7 +784,7 @@ bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
877784
return true;
878785
}
879786

880-
void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
787+
void SRSubPassBeforeRA::SRSubBeforeRA(G4_BB *bb) {
881788
bb->resetLocalIds();
882789

883790
class CmpFirstDef {
@@ -896,7 +803,7 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
896803
G4_INST *inst = *ii;
897804

898805
regCandidatesBRA dstSrcRegs;
899-
if (!isSRCandidateAfterRA(inst, dstSrcRegs)) {
806+
if (!isSRCandidateBeforeRA(inst, dstSrcRegs)) {
900807
ii++;
901808
dstSrcRegs.dstSrcMap.clear();
902809
continue;
@@ -933,26 +840,12 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
933840
candidatesIt = candidates.find(inst);
934841
//Is candidate send
935842
if (candidatesIt != candidates.end()) {
843+
bool overwrite = false;
936844
// Scan backward from the send instruction.
937845
INST_LIST_RITER scan_ri = ri;
938846
scan_ri++;
939847
G4_INST *rInst = *scan_ri;
940-
941848
while (rInst->getLocalId() > candidates[inst].firstDefID) {
942-
if (rInst->isDead()) {
943-
// If the inst is marked as dead, it's dst will not kill other value
944-
// Such as in following case, if third instruction is removed, r64
945-
// value of first instruction is kept.
946-
// mov (16) r16.0<1>:ud r64.0<1;1,0>:ud // $214:&226:
947-
// mov (16) r17.0<1>:ud r66.0<1;1,0>:ud // $216:&228:
948-
// mov (16) r64.0<1>:ud r68.0<1;1,0>:ud // $218:&230:
949-
scan_ri++;
950-
if (scan_ri == rend) {
951-
break;
952-
}
953-
rInst = *scan_ri;
954-
continue;
955-
}
956849
G4_Operand *dst = rInst->getDst();
957850
if (dst && !dst->isNullReg()) {
958851
G4_VarBase *base = dst->getBase();
@@ -986,22 +879,16 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
986879
G4_Operand *dst = rInst->getDst();
987880
unsigned short dstRegLB = dst->getLinearizedStart();
988881
unsigned short dstRegRB = dst->getLinearizedEnd();
882+
for (int i = 0; i < (int)candidates[inst].dstSrcMap.size(); i++) {
883+
int srcRegLB =
884+
candidates[inst].dstSrcMap[i].opnd->getLinearizedStart();
885+
int srcRegRB =
886+
candidates[inst].dstSrcMap[i].opnd->getLinearizedEnd();
989887

990-
// There is any none removeable offset, the offset define move
991-
// cannot be removed.
992-
std::vector<regMapBRA>::iterator dstSrcRegsIter;
993-
for (dstSrcRegsIter = candidates[inst].dstSrcMap.begin();
994-
dstSrcRegsIter != candidates[inst].dstSrcMap.end();) {
995-
std::vector<regMapBRA>::iterator nextIter = dstSrcRegsIter;
996-
nextIter++;
997-
int srcRegLB = (*dstSrcRegsIter).opnd->getLinearizedStart();
998-
int srcRegRB = (*dstSrcRegsIter).opnd->getLinearizedEnd();
999888
if (!(srcRegRB < dstRegLB || srcRegLB > dstRegRB)) {
1000889
// Register is reused.
1001-
dstSrcRegsIter =
1002-
candidates[inst].dstSrcMap.erase(dstSrcRegsIter);
1003-
} else {
1004-
dstSrcRegsIter = nextIter;
890+
overwrite = true;
891+
break;
1005892
}
1006893
}
1007894
}
@@ -1013,24 +900,22 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
1013900
}
1014901
rInst = *scan_ri;
1015902
}
1016-
1017-
// Due to extra mov for s0, so don't use s0 if equal or less than 1 mov
1018-
// inst can be removed.
1019-
if (candidates[inst].dstSrcMap.size() <= 1 &&
1020-
builder.getuint32Option(vISA_EnableGatherWithImmPreRA) !=
1021-
INDIRECT_TYPE::ALWAYS_S0) {
903+
if (overwrite) {
1022904
candidates.erase(candidatesIt);
1023-
} else {
1024-
for (int j = 0; j < (int)candidatesIt->second.dstSrcMap.size(); j++) {
1025-
G4_INST *movInst = candidatesIt->second.dstSrcMap[j].inst;
1026-
movInst->markDead();
1027-
}
1028905
}
1029906
}
1030907

1031908
ri++;
1032909
}
1033910

911+
for (candidatesIt = candidates.begin(); candidatesIt != candidates.end();
912+
candidatesIt++) {
913+
for (int i = 0; i < (int)candidatesIt->second.dstSrcMap.size(); i++) {
914+
G4_INST *movInst = candidatesIt->second.dstSrcMap[i].inst;
915+
movInst->markDead();
916+
}
917+
}
918+
1034919
// Replace the send instruction with sendi
1035920
// Remove the mov instructions that marked as dead
1036921
INST_LIST_ITER iter;
@@ -1041,7 +926,7 @@ void SRSubPassAfterRA::SRSubAfterRA(G4_BB *bb) {
1041926

1042927
candidatesIt = candidates.find(inst);
1043928
if (candidatesIt != candidates.end()) {
1044-
replaceWithSendiAfterRA(bb, curIter, candidates[inst]);
929+
replaceWithSendiBeforeRA(bb, curIter, candidates[inst]);
1045930
}
1046931
if (inst->isDead()) {
1047932
bb->erase(curIter);

0 commit comments

Comments
 (0)