From d0ecf99cbf8264f87be45e97ebc8f1bab73e4794 Mon Sep 17 00:00:00 2001 From: zhijian Date: Tue, 19 Nov 2024 21:04:53 +0000 Subject: [PATCH 1/5] [PowerPC] Deprecate uses of ISD::ADDC/ISD::ADDE/ISD::SUBC/ISD::SUBE --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 183 ++++++++++----- llvm/lib/Target/PowerPC/PPCISelLowering.h | 9 +- llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 20 +- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 17 ++ llvm/lib/Target/PowerPC/PPCInstrInfo.td | 44 +++- llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 7 + llvm/lib/Target/PowerPC/PPCRegisterInfo.h | 3 + llvm/lib/Target/PowerPC/PPCRegisterInfo.td | 1 + llvm/test/CodeGen/PowerPC/adde_return_type.ll | 2 +- llvm/test/CodeGen/PowerPC/addegluecrash.ll | 24 +- llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll | 16 +- llvm/test/CodeGen/PowerPC/aix-cc-abi.ll | 8 +- .../CodeGen/PowerPC/aix-cc-byval-split.ll | 8 +- .../CodeGen/PowerPC/aix-tls-gd-longlong.ll | 48 ++-- .../PowerPC/aix-tls-le-ldst-longlong.ll | 120 +++++----- .../PowerPC/aix-tls-le-xcoff-reloc-large32.ll | 24 +- .../PowerPC/atomicrmw-cond-sub-clamp.ll | 6 +- llvm/test/CodeGen/PowerPC/cvt_i64_to_fp.ll | 6 +- llvm/test/CodeGen/PowerPC/inc-of-add.ll | 2 +- llvm/test/CodeGen/PowerPC/pr35688.ll | 3 +- llvm/test/CodeGen/PowerPC/pr36292.ll | 7 +- llvm/test/CodeGen/PowerPC/pr40922.ll | 9 +- llvm/test/CodeGen/PowerPC/pr45448.ll | 12 +- llvm/test/CodeGen/PowerPC/sat-add.ll | 35 +-- llvm/test/CodeGen/PowerPC/select.ll | 20 +- llvm/test/CodeGen/PowerPC/uaddo-32.ll | 50 ++-- llvm/test/CodeGen/PowerPC/uaddo-64.ll | 82 ++++--- .../umulo-128-legalisation-lowering.ll | 219 +++++++++--------- .../PowerPC/urem-seteq-illegal-types.ll | 23 +- 29 files changed, 597 insertions(+), 411 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index ab78f33f5a630..9b0e7cd61eefd 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -196,7 +196,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, } } + // PowerPC uses addo,addo_carry,subo,subo_carry to propagate carry. setOperationAction(ISD::UADDO, RegVT, Custom); + setOperationAction(ISD::USUBO, RegVT, Custom); + setOperationAction(ISD::UADDO_CARRY, RegVT, Custom); + setOperationAction(ISD::USUBO_CARRY, RegVT, Custom); // On P10, the default lowering generates better code using the // setbc instruction. @@ -266,15 +270,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); } - // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry. - const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; - for (MVT VT : ScalarIntVTs) { - setOperationAction(ISD::ADDC, VT, Legal); - setOperationAction(ISD::ADDE, VT, Legal); - setOperationAction(ISD::SUBC, VT, Legal); - setOperationAction(ISD::SUBE, VT, Legal); - } - if (Subtarget.useCRBits()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); @@ -1864,6 +1859,14 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { return "PPCISD::SETBC"; case PPCISD::SETBCR: return "PPCISD::SETBCR"; + case PPCISD::ADDC: + return "PPCISD::ADDC"; + case PPCISD::ADDE: + return "PPCISD::ADDE"; + case PPCISD::SUBC: + return "PPCISD::SUBC"; + case PPCISD::SUBE: + return "PPCISD::SUBE"; } return nullptr; } @@ -12150,43 +12153,74 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("ERROR:Should return for all cases within swtich."); } -SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const { - // Default to target independent lowering if there is a logical user of the - // carry-bit. - for (SDNode *U : Op->users()) { - if (U->getOpcode() == ISD::SELECT) - return SDValue(); - if (ISD::isBitwiseLogicOp(U->getOpcode())) { - for (unsigned i = 0, ie = U->getNumOperands(); i != ie; ++i) { - if (U->getOperand(i).getOpcode() != ISD::UADDO && - U->getOperand(i).getOpcode() != ISD::MERGE_VALUES) - return SDValue(); - } - } - } - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDLoc dl(Op); - - // Default to target independent lowering for special cases handled there. - if (isOneConstant(RHS) || isAllOnesConstant(RHS)) - return SDValue(); +static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, + SelectionDAG &DAG, + const PPCSubtarget &STI) { + SDLoc DL(Value); + if (STI.useCRBits()) + Value = DAG.getNode(ISD::SELECT, DL, SumType, Value, + DAG.getConstant(1, DL, SumType), + DAG.getConstant(0, DL, SumType)); + else + Value = DAG.getZExtOrTrunc(Value, DL, SumType); + SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32), + Value, DAG.getAllOnesConstant(DL, SumType)); + return Sum.getValue(1); +} - EVT VT = Op.getNode()->getValueType(0); +static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, + EVT CarryType, SelectionDAG &DAG, + const PPCSubtarget &STI) { + SDLoc DL(Flag); + SDValue Zero = DAG.getConstant(0, DL, SumType); + SDValue Carry = DAG.getNode( + PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag); + if (STI.useCRBits()) + return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE); + return DAG.getZExtOrTrunc(Carry, DL, CarryType); +} - SDValue ADDC; - SDValue Overflow; - SDVTList VTs = Op.getNode()->getVTList(); +SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const { - ADDC = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), LHS, RHS); - Overflow = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(VT, MVT::Glue), - DAG.getConstant(0, dl, VT), DAG.getConstant(0, dl, VT), - ADDC.getValue(1)); - SDValue OverflowTrunc = - DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow); - SDValue Res = - DAG.getNode(ISD::MERGE_VALUES, dl, VTs, ADDC.getValue(0), OverflowTrunc); - return Res; + SDLoc DL(Op); + SDNode *N = Op.getNode(); + EVT VT = N->getValueType(0); + EVT CarryType = N->getValueType(1); + unsigned Opc = N->getOpcode(); + bool IsAdd = Opc == ISD::UADDO; + Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC; + SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32), + N->getOperand(0), N->getOperand(1)); + SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, + DAG, Subtarget); + if (!IsAdd) + Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry, + DAG.getAllOnesConstant(DL, CarryType)); + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry); +} + +SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDNode *N = Op.getNode(); + unsigned Opc = N->getOpcode(); + EVT VT = N->getValueType(0); + EVT CarryType = N->getValueType(1); + SDValue CarryOp = N->getOperand(2); + bool IsAdd = Opc == ISD::UADDO_CARRY; + Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE; + if (!IsAdd) + CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp, + DAG.getAllOnesConstant(DL, CarryOp.getValueType())); + CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget); + SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32), + Op.getOperand(0), Op.getOperand(1), CarryOp); + CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG, + Subtarget); + if (!IsAdd) + CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp, + DAG.getAllOnesConstant(DL, CarryOp.getValueType())); + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp); } SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const { @@ -12217,8 +12251,8 @@ SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const { /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - default: llvm_unreachable("Wasn't expecting to be able to lower this!"); - case ISD::UADDO: return LowerUaddo(Op, DAG); + default: + llvm_unreachable("Wasn't expecting to be able to lower this!"); case ISD::FPOW: return lowerPow(Op, DAG); case ISD::FSIN: return lowerSin(Op, DAG); case ISD::FCOS: return lowerCos(Op, DAG); @@ -12311,6 +12345,12 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerATOMIC_LOAD_STORE(Op, DAG); case ISD::IS_FPCLASS: return LowerIS_FPCLASS(Op, DAG); + case ISD::UADDO: + case ISD::USUBO: + return LowerADDSUBO(Op, DAG); + case ISD::UADDO_CARRY: + case ISD::USUBO_CARRY: + return LowerADDSUBO_CARRY(Op, DAG); } } @@ -16246,6 +16286,21 @@ static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) { return true; } +static SDValue DAGCombineAddc(SDNode *N, + llvm::PPCTargetLowering::DAGCombinerInfo &DCI) { + if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) { + // (ADDC (ADDE 0, 0, C), -1) -> C + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (LHS->getOpcode() == PPCISD::ADDE && + isNullConstant(LHS->getOperand(0)) && + isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) { + return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2)); + } + } + return SDValue(); +} + SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -17034,6 +17089,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } case ISD::BUILD_VECTOR: return DAGCombineBuildVector(N, DCI); + case PPCISD::ADDC: + return DAGCombineAddc(N, DCI); } return SDValue(); @@ -17087,6 +17144,16 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.Zero = 0xFFFF0000; break; } + case PPCISD::ADDE: { + if (Op.getResNo() == 0) { + // (0|1), _ = ADDE 0, 0, CARRY + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + if (isNullConstant(LHS) && isNullConstant(RHS)) + Known.Zero = ~1ULL; + } + break; + } case ISD::INTRINSIC_WO_CHAIN: { switch (Op.getConstantOperandVal(0)) { default: break; @@ -18356,7 +18423,8 @@ static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, return SDValue(); SDLoc DL(N); - SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue); + EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32; + SDVTList VTs = DAG.getVTList(MVT::i64, CarryType); SDValue Cmp = RHS.getOperand(0); SDValue Z = Cmp.getOperand(0); auto *Constant = cast(Cmp.getOperand(1)); @@ -18374,11 +18442,13 @@ static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z, DAG.getConstant(NegConstant, DL, MVT::i64)); SDValue AddOrZ = NegConstant != 0 ? Add : Z; - SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue), - AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64)); - return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64), + SDValue Addc = + DAG.getNode(ISD::UADDO, DL, DAG.getVTList(MVT::i64, CarryType), AddOrZ, + DAG.getConstant(-1ULL, DL, MVT::i64)); + return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS, + DAG.getConstant(0, DL, MVT::i64), SDValue(Addc.getNode(), 1)); - } + } case ISD::SETEQ: { // when C == 0 // --> addze X, (subfic Z, 0).carry @@ -18389,11 +18459,14 @@ static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z, DAG.getConstant(NegConstant, DL, MVT::i64)); SDValue AddOrZ = NegConstant != 0 ? Add : Z; - SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue), - DAG.getConstant(0, DL, MVT::i64), AddOrZ); - return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64), - SDValue(Subc.getNode(), 1)); - } + SDValue Subc = + DAG.getNode(ISD::USUBO, DL, DAG.getVTList(MVT::i64, CarryType), + DAG.getConstant(0, DL, MVT::i64), AddOrZ); + SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1), + DAG.getAllOnesConstant(DL, CarryType)); + return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS, + DAG.getConstant(0, DL, MVT::i64), Invert); + } } return SDValue(); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 1f22aa16a89be..7365f3103276c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -161,6 +161,12 @@ namespace llvm { SRA, SHL, + /// These nodes represent PPC arithmetic operations with carry. + ADDC, + ADDE, + SUBC, + SUBE, + /// FNMSUB - Negated multiply-subtract instruction. FNMSUB, @@ -1280,7 +1286,6 @@ namespace llvm { SDValue LowerGlobalTLSAddressLinux(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerUaddo(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSSUBO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; @@ -1316,6 +1321,8 @@ namespace llvm { SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const; SDValue lowerToLibCall(const char *LibCallName, SDValue Op, SelectionDAG &DAG) const; SDValue lowerLibCallBasedOnType(const char *LibCallFloatName, diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index bcac0de55d9d3..4205b3086a3c9 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -760,13 +760,13 @@ def STFDXTLS : XForm_8<31, 727, (outs), (ins f8rc:$RST, ptr_rc_nor0:$RA, tlsreg: let isCommutable = 1 in defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB), "addc", "$RT, $RA, $RB", IIC_IntGeneral, - [(set i64:$RT, (addc i64:$RA, i64:$RB))]>, + [(set i64:$RT, (PPCaddc i64:$RA, i64:$RB))]>, PPC970_DGroup_Cracked; let Defs = [CARRY] in def ADDIC8 : DForm_2<12, (outs g8rc:$RST), (ins g8rc:$RA, s16imm64:$D), "addic $RST, $RA, $D", IIC_IntGeneral, - [(set i64:$RST, (addc i64:$RA, imm64SExt16:$D))]>; + [(set i64:$RST, (PPCaddc i64:$RA, imm64SExt16:$D))]>; def ADDI8 : DForm_2<14, (outs g8rc:$RST), (ins g8rc_nox0:$RA, s16imm64:$D), "addi $RST, $RA, $D", IIC_IntSimple, [(set i64:$RST, (add i64:$RA, imm64SExt16:$D))]>; @@ -782,11 +782,11 @@ def LA8 : DForm_2<14, (outs g8rc:$RST), (ins g8rc_nox0:$RA, s16imm64:$D), let Defs = [CARRY] in { def SUBFIC8: DForm_2< 8, (outs g8rc:$RST), (ins g8rc:$RA, s16imm64:$D), "subfic $RST, $RA, $D", IIC_IntGeneral, - [(set i64:$RST, (subc imm64SExt16:$D, i64:$RA))]>; + [(set i64:$RST, (PPCsubc imm64SExt16:$D, i64:$RA))]>; } defm SUBFC8 : XOForm_1rc<31, 8, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB), "subfc", "$RT, $RA, $RB", IIC_IntGeneral, - [(set i64:$RT, (subc i64:$RB, i64:$RA))]>, + [(set i64:$RT, (PPCsubc i64:$RB, i64:$RA))]>, PPC970_DGroup_Cracked; defm SUBF8 : XOForm_1rx<31, 40, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB), "subf", "$RT, $RA, $RB", IIC_IntGeneral, @@ -798,22 +798,22 @@ let Uses = [CARRY] in { let isCommutable = 1 in defm ADDE8 : XOForm_1rc<31, 138, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB), "adde", "$RT, $RA, $RB", IIC_IntGeneral, - [(set i64:$RT, (adde i64:$RA, i64:$RB))]>; + [(set i64:$RT, (PPCadde i64:$RA, i64:$RB, CARRY))]>; defm ADDME8 : XOForm_3rc<31, 234, 0, (outs g8rc:$RT), (ins g8rc:$RA), "addme", "$RT, $RA", IIC_IntGeneral, - [(set i64:$RT, (adde i64:$RA, -1))]>; + [(set i64:$RT, (PPCadde i64:$RA, -1, CARRY))]>; defm ADDZE8 : XOForm_3rc<31, 202, 0, (outs g8rc:$RT), (ins g8rc:$RA), "addze", "$RT, $RA", IIC_IntGeneral, - [(set i64:$RT, (adde i64:$RA, 0))]>; + [(set i64:$RT, (PPCadde i64:$RA, 0, CARRY))]>; defm SUBFE8 : XOForm_1rc<31, 136, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB), "subfe", "$RT, $RA, $RB", IIC_IntGeneral, - [(set i64:$RT, (sube i64:$RB, i64:$RA))]>; + [(set i64:$RT, (PPCsube i64:$RB, i64:$RA, CARRY))]>; defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs g8rc:$RT), (ins g8rc:$RA), "subfme", "$RT, $RA", IIC_IntGeneral, - [(set i64:$RT, (sube -1, i64:$RA))]>; + [(set i64:$RT, (PPCsube -1, i64:$RA, CARRY))]>; defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$RT), (ins g8rc:$RA), "subfze", "$RT, $RA", IIC_IntGeneral, - [(set i64:$RT, (sube 0, i64:$RA))]>; + [(set i64:$RT, (PPCsube 0, i64:$RA, CARRY))]>; } } // isCodeGenOnly diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index f017073911950..97e9f59328f7e 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1758,6 +1758,23 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg); getKillRegState(KillSrc); return; + } else if ((PPC::G8RCRegClass.contains(DestReg) || + PPC::GPRCRegClass.contains(DestReg)) && + SrcReg == PPC::CARRY) { + bool Is64Bit = PPC::G8RCRegClass.contains(DestReg); + BuildMI(MBB, I, DL, get(Is64Bit ? PPC::MFSPR8 : PPC::MFSPR), DestReg) + .addImm(1) + .addReg(PPC::CARRY, RegState::Implicit); + return; + } else if ((PPC::G8RCRegClass.contains(SrcReg) || + PPC::GPRCRegClass.contains(SrcReg)) && + DestReg == PPC::CARRY) { + bool Is64Bit = PPC::G8RCRegClass.contains(SrcReg); + BuildMI(MBB, I, DL, get(Is64Bit ? PPC::MTSPR8 : PPC::MTSPR)) + .addImm(1) + .addReg(SrcReg) + .addReg(PPC::CARRY, RegState::ImplicitDefine); + return; } unsigned Opc; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 175ba6009364a..e2864c2405967 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -124,6 +124,21 @@ def SDT_PPCFPMinMax : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0> ]>; +// RES, CARRY = op LHS, RHS +def SDT_PPCBinaryArithWithFlagsOut : SDTypeProfile<2, 2, [ + SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, + SDTCisInt<0>, + SDTCisVT<1, i32>, +]>; + +// RES, CARRY = op LHS, RHS, CARRY +def SDT_PPCBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, [ + SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, + SDTCisInt<0>, + SDTCisSameAs<1, 4>, + SDTCisVT<1, i32>, +]>; + //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // @@ -401,6 +416,15 @@ def PPCtlsdynamatpcreladdr : SDNode<"PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR", def PPCtlslocalexecmataddr : SDNode<"PPCISD::TLS_LOCAL_EXEC_MAT_ADDR", SDTIntUnaryOp, []>; +def PPCaddc : SDNode<"PPCISD::ADDC", SDT_PPCBinaryArithWithFlagsOut, + [SDNPCommutative]>; +def PPCadde : SDNode<"PPCISD::ADDE", SDT_PPCBinaryArithWithFlagsInOut, + []>; +def PPCsubc : SDNode<"PPCISD::SUBC", SDT_PPCBinaryArithWithFlagsOut, + []>; +def PPCsube : SDNode<"PPCISD::SUBE", SDT_PPCBinaryArithWithFlagsInOut, + []>; + //===----------------------------------------------------------------------===// // PowerPC specific transformation functions and pattern fragments. // @@ -2291,7 +2315,7 @@ let BaseName = "addic" in { let Defs = [CARRY] in def ADDIC : DForm_2<12, (outs gprc:$RST), (ins gprc:$RA, s16imm:$D), "addic $RST, $RA, $D", IIC_IntGeneral, - [(set i32:$RST, (addc i32:$RA, imm32SExt16:$D))]>, + [(set i32:$RST, (PPCaddc i32:$RA, imm32SExt16:$D))]>, RecFormRel, PPC970_DGroup_Cracked; let Defs = [CARRY, CR0] in def ADDIC_rec : DForm_2<13, (outs gprc:$RST), (ins gprc:$RA, s16imm:$D), @@ -2312,7 +2336,7 @@ def MULLI : DForm_2< 7, (outs gprc:$RST), (ins gprc:$RA, s16imm:$D), let Defs = [CARRY] in def SUBFIC : DForm_2< 8, (outs gprc:$RST), (ins gprc:$RA, s16imm:$D), "subfic $RST, $RA, $D", IIC_IntGeneral, - [(set i32:$RST, (subc imm32SExt16:$D, i32:$RA))]>; + [(set i32:$RST, (PPCsubc imm32SExt16:$D, i32:$RA))]>; let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI : DForm_2_r0<14, (outs gprc:$RST), (ins s16imm:$D), @@ -2909,7 +2933,7 @@ def ADD4TLS : XOForm_1<31, 266, 0, (outs gprc:$RT), (ins gprc:$RA, tlsreg32:$RB let isCommutable = 1 in defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), "addc", "$RT, $RA, $RB", IIC_IntGeneral, - [(set i32:$RT, (addc i32:$RA, i32:$RB))]>, + [(set i32:$RT, (PPCaddc i32:$RA, i32:$RB))]>, PPC970_DGroup_Cracked; defm DIVW : XOForm_1rcr<31, 491, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), @@ -2942,7 +2966,7 @@ defm SUBF : XOForm_1rx<31, 40, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), [(set i32:$RT, (sub i32:$RB, i32:$RA))]>; defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), "subfc", "$RT, $RA, $RB", IIC_IntGeneral, - [(set i32:$RT, (subc i32:$RB, i32:$RA))]>, + [(set i32:$RT, (PPCsubc i32:$RB, i32:$RA))]>, PPC970_DGroup_Cracked; defm NEG : XOForm_3r<31, 104, 0, (outs gprc:$RT), (ins gprc:$RA), "neg", "$RT, $RA", IIC_IntSimple, @@ -2951,22 +2975,22 @@ let Uses = [CARRY] in { let isCommutable = 1 in defm ADDE : XOForm_1rc<31, 138, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), "adde", "$RT, $RA, $RB", IIC_IntGeneral, - [(set i32:$RT, (adde i32:$RA, i32:$RB))]>; + [(set i32:$RT, (PPCadde i32:$RA, i32:$RB, CARRY))]>; defm ADDME : XOForm_3rc<31, 234, 0, (outs gprc:$RT), (ins gprc:$RA), "addme", "$RT, $RA", IIC_IntGeneral, - [(set i32:$RT, (adde i32:$RA, -1))]>; + [(set i32:$RT, (PPCadde i32:$RA, -1, CARRY))]>; defm ADDZE : XOForm_3rc<31, 202, 0, (outs gprc:$RT), (ins gprc:$RA), "addze", "$RT, $RA", IIC_IntGeneral, - [(set i32:$RT, (adde i32:$RA, 0))]>; + [(set i32:$RT, (PPCadde i32:$RA, 0, CARRY))]>; defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), "subfe", "$RT, $RA, $RB", IIC_IntGeneral, - [(set i32:$RT, (sube i32:$RB, i32:$RA))]>; + [(set i32:$RT, (PPCsube i32:$RB, i32:$RA, CARRY))]>; defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$RT), (ins gprc:$RA), "subfme", "$RT, $RA", IIC_IntGeneral, - [(set i32:$RT, (sube -1, i32:$RA))]>; + [(set i32:$RT, (PPCsube -1, i32:$RA, CARRY))]>; defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$RT), (ins gprc:$RA), "subfze", "$RT, $RA", IIC_IntGeneral, - [(set i32:$RT, (sube 0, i32:$RA))]>; + [(set i32:$RT, (PPCsube 0, i32:$RA, CARRY))]>; } } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index b60a91be82406..2177dba1e5762 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -625,6 +625,13 @@ bool PPCRegisterInfo::getRegAllocationHints(Register VirtReg, return BaseImplRetVal; } +const TargetRegisterClass * +PPCRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { + if (RC == &PPC::CARRYRCRegClass) + return TM.isPPC64() ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + return RC; +} + unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { const PPCFrameLowering *TFI = getFrameLowering(MF); diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index 274c7cb68ae0a..21b6f7b13939a 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -76,6 +76,9 @@ class PPCRegisterInfo : public PPCGenRegisterInfo { const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override; + const TargetRegisterClass * + getCrossCopyRegClass(const TargetRegisterClass *RC) const override; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index 3cb7cd9d8f229..8b690b7b833b3 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -494,6 +494,7 @@ def LR8RC : RegisterClass<"PPC", [i64], 64, (add LR8)> { def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>; def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> { let CopyCost = -1; + let isAllocatable = 0; } // Make AllocationOrder as similar as G8RC's to avoid potential spilling. diff --git a/llvm/test/CodeGen/PowerPC/adde_return_type.ll b/llvm/test/CodeGen/PowerPC/adde_return_type.ll index 7ce11079a6267..47c5efc35afc6 100644 --- a/llvm/test/CodeGen/PowerPC/adde_return_type.ll +++ b/llvm/test/CodeGen/PowerPC/adde_return_type.ll @@ -3,7 +3,7 @@ ; RUN: < %s -o /dev/null 2>&1 | FileCheck %s define i64 @testAddeReturnType(i64 %X, i64 %Z) { -; CHECK: Legally typed node: {{.*}}: i64,glue = adde {{.*}} +; CHECK: Legally typed node: {{.*}}: i64,i1 = uaddo {{.*}} %cmp = icmp ne i64 %Z, 0 %conv1 = zext i1 %cmp to i64 %add = add nsw i64 %conv1, %X diff --git a/llvm/test/CodeGen/PowerPC/addegluecrash.ll b/llvm/test/CodeGen/PowerPC/addegluecrash.ll index a711b09b9bdfd..7cd94c0e4c2d5 100644 --- a/llvm/test/CodeGen/PowerPC/addegluecrash.ll +++ b/llvm/test/CodeGen/PowerPC/addegluecrash.ll @@ -9,20 +9,20 @@ define void @bn_mul_comba8(ptr nocapture %r, ptr nocapture readonly %a, ptr noca ; CHECK-NEXT: std 4, -8(1) # 8-byte Folded Spill ; CHECK-NEXT: mr 4, 3 ; CHECK-NEXT: ld 3, -8(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 9, 0(3) -; CHECK-NEXT: ld 8, 0(5) -; CHECK-NEXT: mulhdu 7, 8, 9 +; CHECK-NEXT: ld 6, 0(3) +; CHECK-NEXT: ld 11, 0(5) +; CHECK-NEXT: mulhdu 8, 11, 6 ; CHECK-NEXT: ld 3, 8(3) -; CHECK-NEXT: mulld 6, 3, 9 -; CHECK-NEXT: mulhdu 3, 3, 9 -; CHECK-NEXT: addc 6, 6, 7 -; CHECK-NEXT: addze 3, 3 -; CHECK-NEXT: ld 5, 8(5) -; CHECK-NEXT: mulld 7, 5, 8 -; CHECK-NEXT: mulhdu 5, 5, 8 -; CHECK-NEXT: addc 6, 6, 7 +; CHECK-NEXT: mulld 7, 3, 6 +; CHECK-NEXT: addc 9, 7, 8 +; CHECK-NEXT: ld 10, 8(5) +; CHECK-NEXT: mulhdu 5, 10, 11 +; CHECK-NEXT: mulld 10, 10, 11 +; CHECK-NEXT: addc 9, 9, 10 ; CHECK-NEXT: addze 5, 5 -; CHECK-NEXT: add 3, 5, 3 +; CHECK-NEXT: addc 7, 7, 8 +; CHECK-NEXT: mulhdu 3, 3, 6 +; CHECK-NEXT: adde 3, 5, 3 ; CHECK-NEXT: cmpld 3, 5 ; CHECK-NEXT: crmove 20, 0 ; CHECK-NEXT: li 5, 0 diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll index 501227c9072c4..aead5762d0921 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll @@ -1103,13 +1103,13 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6 ; 32BIT-NEXT: renamable $r11 = LWZ 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0) ; 32BIT-NEXT: renamable $r12 = LWZ 0, %fixed-stack.4 :: (load (s32) from %fixed-stack.4) ; 32BIT-NEXT: renamable $r0 = LBZ 3, %fixed-stack.1 :: (load (s8) from %fixed-stack.1 + 3, basealign 4) - ; 32BIT-NEXT: renamable $r31 = LWZ 4, %fixed-stack.3 :: (load (s32) from %fixed-stack.3 + 4, basealign 16) - ; 32BIT-NEXT: renamable $r30 = LWZ 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3, align 16) + ; 32BIT-NEXT: renamable $r31 = LWZ 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3, align 16) + ; 32BIT-NEXT: renamable $r30 = LWZ 4, %fixed-stack.3 :: (load (s32) from %fixed-stack.3 + 4, basealign 16) ; 32BIT-NEXT: renamable $r29 = LWZ 0, %fixed-stack.5 :: (load (s32) from %fixed-stack.5, align 8) ; 32BIT-NEXT: renamable $r28 = LBZ 3, %fixed-stack.6 :: (load (s8) from %fixed-stack.6 + 3, basealign 4) ; 32BIT-NEXT: renamable $r27 = LHA 2, %fixed-stack.7 :: (load (s16) from %fixed-stack.7 + 2, basealign 4) - ; 32BIT-NEXT: renamable $r26 = LWZ 4, %fixed-stack.9 :: (load (s32) from %fixed-stack.9 + 4, basealign 8) - ; 32BIT-NEXT: renamable $r25 = LWZ 0, %fixed-stack.9 :: (load (s32) from %fixed-stack.9, align 8) + ; 32BIT-NEXT: renamable $r26 = LWZ 0, %fixed-stack.9 :: (load (s32) from %fixed-stack.9, align 8) + ; 32BIT-NEXT: renamable $r25 = LWZ 4, %fixed-stack.9 :: (load (s32) from %fixed-stack.9 + 4, basealign 8) ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4 ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r5 ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r6 @@ -1120,8 +1120,8 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6 ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r9 ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r10 ; 32BIT-NEXT: renamable $r6 = SRAWI renamable $r3, 31, implicit-def dead $carry - ; 32BIT-NEXT: renamable $r3 = ADDC killed renamable $r3, killed renamable $r26, implicit-def $carry - ; 32BIT-NEXT: renamable $r6 = ADDE killed renamable $r6, killed renamable $r25, implicit-def dead $carry, implicit $carry + ; 32BIT-NEXT: renamable $r3 = ADDC killed renamable $r3, killed renamable $r25, implicit-def $carry + ; 32BIT-NEXT: renamable $r6 = ADDE killed renamable $r6, killed renamable $r26, implicit-def dead $carry, implicit $carry ; 32BIT-NEXT: renamable $r7 = SRAWI renamable $r27, 31, implicit-def dead $carry ; 32BIT-NEXT: renamable $r3 = ADDC killed renamable $r3, killed renamable $r27, implicit-def $carry ; 32BIT-NEXT: renamable $r6 = ADDE killed renamable $r6, killed renamable $r7, implicit-def dead $carry, implicit $carry @@ -1131,8 +1131,8 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6 ; 32BIT-NEXT: renamable $r6 = ADDZE killed renamable $r6, implicit-def dead $carry, implicit $carry ; 32BIT-NEXT: renamable $r3 = ADDC killed renamable $r3, killed renamable $r12, implicit-def $carry ; 32BIT-NEXT: renamable $r4 = ADDE killed renamable $r6, killed renamable $r4, implicit-def dead $carry, implicit $carry - ; 32BIT-NEXT: renamable $r3 = ADDC killed renamable $r3, killed renamable $r31, implicit-def $carry - ; 32BIT-NEXT: renamable $r4 = ADDE killed renamable $r4, killed renamable $r30, implicit-def dead $carry, implicit $carry + ; 32BIT-NEXT: renamable $r3 = ADDC killed renamable $r3, killed renamable $r30, implicit-def $carry + ; 32BIT-NEXT: renamable $r4 = ADDE killed renamable $r4, killed renamable $r31, implicit-def dead $carry, implicit $carry ; 32BIT-NEXT: renamable $r3 = ADDC killed renamable $r3, killed renamable $r0, implicit-def $carry ; 32BIT-NEXT: renamable $r6 = ADDZE killed renamable $r4, implicit-def dead $carry, implicit $carry ; 32BIT-NEXT: renamable $r4 = ADDC killed renamable $r3, killed renamable $r11, implicit-def $carry diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll index 79c59e925302a..8f33f5ef863e6 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -1213,14 +1213,14 @@ define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6 ; ASM32PWR4-NEXT: addc 3, 3, 6 ; ASM32PWR4-NEXT: addze 6, 7 ; ASM32PWR4-NEXT: addc 3, 3, 9 -; ASM32PWR4-NEXT: lwz 5, 84(1) +; ASM32PWR4-NEXT: lwz 7, 84(1) ; ASM32PWR4-NEXT: addze 6, 6 ; ASM32PWR4-NEXT: addc 3, 3, 31 -; ASM32PWR4-NEXT: lwz 7, 80(1) +; ASM32PWR4-NEXT: lwz 5, 80(1) ; ASM32PWR4-NEXT: adde 6, 6, 30 -; ASM32PWR4-NEXT: addc 3, 3, 5 +; ASM32PWR4-NEXT: addc 3, 3, 7 ; ASM32PWR4-NEXT: lbz 8, 91(1) -; ASM32PWR4-NEXT: adde 5, 6, 7 +; ASM32PWR4-NEXT: adde 5, 6, 5 ; ASM32PWR4-NEXT: addc 3, 3, 8 ; ASM32PWR4-NEXT: lbz 6, 103(1) ; ASM32PWR4-NEXT: addze 5, 5 diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll index f1bf7c262317d..9b1893b111556 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll @@ -36,17 +36,17 @@ entry: ; CHECK32: bb.0.entry: ; CHECK32-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 -; CHECK32: renamable $r[[REG1:[0-9]+]] = LWZ 84, %fixed-stack.0 +; CHECK32: renamable $r[[REG1:[0-9]+]] = LWZ 80, %fixed-stack.0 ; CHECK32-DAG: STW killed renamable $r3, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 ; CHECK32-DAG: STW killed renamable $r4, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4 -; CHECK32: renamable $r[[REG2:[0-9]+]] = LWZ 80, %fixed-stack.0 +; CHECK32: renamable $r[[REG2:[0-9]+]] = LWZ 84, %fixed-stack.0 ; CHECK32-DAG: STW killed renamable $r5, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8 ; CHECK32-DAG: STW killed renamable $r6, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12 ; CHECK32-DAG: STW renamable $r7, 16, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 16 ; CHECK32-DAG: STW renamable $r8, 20, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 20 ; CHECK32-DAG: STW killed renamable $r9, 24, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 24 -; CHECK32: renamable $r4 = ADDC killed renamable $r8, killed renamable $r[[REG1]], implicit-def $carry -; CHECK32: renamable $r3 = ADDE killed renamable $r7, killed renamable $r[[REG2]], implicit-def dead $carry, implicit killed $carry +; CHECK32: renamable $r4 = ADDC killed renamable $r8, killed renamable $r[[REG2]], implicit-def $carry +; CHECK32: renamable $r3 = ADDE killed renamable $r7, killed renamable $r[[REG1]], implicit-def dead $carry, implicit killed $carry ; CHECK32 STW killed renamable $r10, 28, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 28 ; CHECK32: BLR implicit $lr, implicit $rm, implicit $r3, implicit $r4 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll index 53a7cb0aad9ee..5f471ce83828a 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll @@ -325,12 +325,12 @@ define i64 @loadsTGInit() #1 { ; SMALL32-NEXT: stw 0, 40(1) ; SMALL32-NEXT: bla .__tls_get_addr[PR] ; SMALL32-NEXT: lwz 4, L..C7(2) # @GInit -; SMALL32-NEXT: lwz 5, 4(3) +; SMALL32-NEXT: lwz 5, 0(3) +; SMALL32-NEXT: lwz 3, 4(3) ; SMALL32-NEXT: lwz 6, 4(4) -; SMALL32-NEXT: lwz 3, 0(3) ; SMALL32-NEXT: lwz 7, 0(4) -; SMALL32-NEXT: addc 4, 6, 5 -; SMALL32-NEXT: adde 3, 7, 3 +; SMALL32-NEXT: addc 4, 6, 3 +; SMALL32-NEXT: adde 3, 7, 5 ; SMALL32-NEXT: addi 1, 1, 32 ; SMALL32-NEXT: lwz 0, 8(1) ; SMALL32-NEXT: mtlr 0 @@ -346,14 +346,14 @@ define i64 @loadsTGInit() #1 { ; LARGE32-NEXT: lwz 3, L..C0@l(3) ; LARGE32-NEXT: lwz 4, L..C1@l(4) ; LARGE32-NEXT: bla .__tls_get_addr[PR] -; LARGE32-NEXT: lwz 4, 4(3) -; LARGE32-NEXT: lwz 3, 0(3) -; LARGE32-NEXT: addis 5, L..C7@u(2) -; LARGE32-NEXT: lwz 5, L..C7@l(5) -; LARGE32-NEXT: lwz 6, 4(5) -; LARGE32-NEXT: lwz 5, 0(5) -; LARGE32-NEXT: addc 4, 6, 4 -; LARGE32-NEXT: adde 3, 5, 3 +; LARGE32-NEXT: lwz 5, 0(3) +; LARGE32-NEXT: lwz 3, 4(3) +; LARGE32-NEXT: addis 4, L..C7@u(2) +; LARGE32-NEXT: lwz 4, L..C7@l(4) +; LARGE32-NEXT: lwz 6, 4(4) +; LARGE32-NEXT: lwz 7, 0(4) +; LARGE32-NEXT: addc 4, 6, 3 +; LARGE32-NEXT: adde 3, 7, 5 ; LARGE32-NEXT: addi 1, 1, 32 ; LARGE32-NEXT: lwz 0, 8(1) ; LARGE32-NEXT: mtlr 0 @@ -589,12 +589,12 @@ define i64 @loadsTWInit() #1 { ; SMALL32-NEXT: stw 0, 40(1) ; SMALL32-NEXT: bla .__tls_get_addr[PR] ; SMALL32-NEXT: lwz 4, L..C7(2) # @GInit -; SMALL32-NEXT: lwz 5, 4(3) +; SMALL32-NEXT: lwz 5, 0(3) +; SMALL32-NEXT: lwz 3, 4(3) ; SMALL32-NEXT: lwz 6, 4(4) -; SMALL32-NEXT: lwz 3, 0(3) ; SMALL32-NEXT: lwz 7, 0(4) -; SMALL32-NEXT: addc 4, 6, 5 -; SMALL32-NEXT: adde 3, 7, 3 +; SMALL32-NEXT: addc 4, 6, 3 +; SMALL32-NEXT: adde 3, 7, 5 ; SMALL32-NEXT: addi 1, 1, 32 ; SMALL32-NEXT: lwz 0, 8(1) ; SMALL32-NEXT: mtlr 0 @@ -610,14 +610,14 @@ define i64 @loadsTWInit() #1 { ; LARGE32-NEXT: lwz 3, L..C5@l(3) ; LARGE32-NEXT: lwz 4, L..C6@l(4) ; LARGE32-NEXT: bla .__tls_get_addr[PR] -; LARGE32-NEXT: lwz 4, 4(3) -; LARGE32-NEXT: lwz 3, 0(3) -; LARGE32-NEXT: addis 5, L..C7@u(2) -; LARGE32-NEXT: lwz 5, L..C7@l(5) -; LARGE32-NEXT: lwz 6, 4(5) -; LARGE32-NEXT: lwz 5, 0(5) -; LARGE32-NEXT: addc 4, 6, 4 -; LARGE32-NEXT: adde 3, 5, 3 +; LARGE32-NEXT: lwz 5, 0(3) +; LARGE32-NEXT: lwz 3, 4(3) +; LARGE32-NEXT: addis 4, L..C7@u(2) +; LARGE32-NEXT: lwz 4, L..C7@l(4) +; LARGE32-NEXT: lwz 6, 4(4) +; LARGE32-NEXT: lwz 7, 0(4) +; LARGE32-NEXT: addc 4, 6, 3 +; LARGE32-NEXT: adde 3, 7, 5 ; LARGE32-NEXT: addi 1, 1, 32 ; LARGE32-NEXT: lwz 0, 8(1) ; LARGE32-NEXT: mtlr 0 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll index c2d7325107a84..533c866eb4e12 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-longlong.ll @@ -304,15 +304,15 @@ define i64 @loadITLUninit2() { ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit ; SMALL32-NEXT: bla .__get_tpointer[PR] -; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit ; SMALL32-NEXT: stw r0, 40(r1) ; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r6, 4(r5) -; SMALL32-NEXT: lwz r5, 0(r5) -; SMALL32-NEXT: lwz r4, 4(r3) -; SMALL32-NEXT: lwz r3, 0(r3) -; SMALL32-NEXT: addc r4, r6, r4 -; SMALL32-NEXT: adde r3, r5, r3 +; SMALL32-NEXT: lwz r4, L..C4(r2) # @VarInit +; SMALL32-NEXT: lwz r5, 0(r3) +; SMALL32-NEXT: lwz r3, 4(r3) +; SMALL32-NEXT: lwz r6, 0(r4) +; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: addc r4, r4, r3 +; SMALL32-NEXT: adde r3, r6, r5 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -327,14 +327,14 @@ define i64 @loadITLUninit2() { ; LARGE32-NEXT: lwz r4, L..C0@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] ; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r4, 4(r3) -; LARGE32-NEXT: lwz r3, 0(r3) -; LARGE32-NEXT: addis r5, L..C4@u(r2) -; LARGE32-NEXT: lwz r5, L..C4@l(r5) -; LARGE32-NEXT: lwz r6, 4(r5) -; LARGE32-NEXT: lwz r5, 0(r5) -; LARGE32-NEXT: addc r4, r6, r4 -; LARGE32-NEXT: adde r3, r5, r3 +; LARGE32-NEXT: lwz r5, 0(r3) +; LARGE32-NEXT: lwz r3, 4(r3) +; LARGE32-NEXT: addis r4, L..C4@u(r2) +; LARGE32-NEXT: lwz r4, L..C4@l(r4) +; LARGE32-NEXT: lwz r6, 0(r4) +; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: addc r4, r4, r3 +; LARGE32-NEXT: adde r3, r6, r5 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -424,15 +424,15 @@ define i64 @loadITLInit2() { ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] -; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit ; SMALL32-NEXT: stw r0, 40(r1) ; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r6, 4(r5) -; SMALL32-NEXT: lwz r5, 0(r5) -; SMALL32-NEXT: lwz r4, 4(r3) -; SMALL32-NEXT: lwz r3, 0(r3) -; SMALL32-NEXT: addc r4, r6, r4 -; SMALL32-NEXT: adde r3, r5, r3 +; SMALL32-NEXT: lwz r4, L..C4(r2) # @VarInit +; SMALL32-NEXT: lwz r5, 0(r3) +; SMALL32-NEXT: lwz r3, 4(r3) +; SMALL32-NEXT: lwz r6, 0(r4) +; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: addc r4, r4, r3 +; SMALL32-NEXT: adde r3, r6, r5 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -447,14 +447,14 @@ define i64 @loadITLInit2() { ; LARGE32-NEXT: lwz r4, L..C1@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] ; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r4, 4(r3) -; LARGE32-NEXT: lwz r3, 0(r3) -; LARGE32-NEXT: addis r5, L..C4@u(r2) -; LARGE32-NEXT: lwz r5, L..C4@l(r5) -; LARGE32-NEXT: lwz r6, 4(r5) -; LARGE32-NEXT: lwz r5, 0(r5) -; LARGE32-NEXT: addc r4, r6, r4 -; LARGE32-NEXT: adde r3, r5, r3 +; LARGE32-NEXT: lwz r5, 0(r3) +; LARGE32-NEXT: lwz r3, 4(r3) +; LARGE32-NEXT: addis r4, L..C4@u(r2) +; LARGE32-NEXT: lwz r4, L..C4@l(r4) +; LARGE32-NEXT: lwz r6, 0(r4) +; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: addc r4, r4, r3 +; LARGE32-NEXT: adde r3, r6, r5 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -544,15 +544,15 @@ define i64 @loadTLUninit2() { ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit ; SMALL32-NEXT: bla .__get_tpointer[PR] -; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit ; SMALL32-NEXT: stw r0, 40(r1) ; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r6, 4(r5) -; SMALL32-NEXT: lwz r5, 0(r5) -; SMALL32-NEXT: lwz r4, 4(r3) -; SMALL32-NEXT: lwz r3, 0(r3) -; SMALL32-NEXT: addc r4, r6, r4 -; SMALL32-NEXT: adde r3, r5, r3 +; SMALL32-NEXT: lwz r4, L..C4(r2) # @VarInit +; SMALL32-NEXT: lwz r5, 0(r3) +; SMALL32-NEXT: lwz r3, 4(r3) +; SMALL32-NEXT: lwz r6, 0(r4) +; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: addc r4, r4, r3 +; SMALL32-NEXT: adde r3, r6, r5 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -567,14 +567,14 @@ define i64 @loadTLUninit2() { ; LARGE32-NEXT: lwz r4, L..C2@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] ; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r4, 4(r3) -; LARGE32-NEXT: lwz r3, 0(r3) -; LARGE32-NEXT: addis r5, L..C4@u(r2) -; LARGE32-NEXT: lwz r5, L..C4@l(r5) -; LARGE32-NEXT: lwz r6, 4(r5) -; LARGE32-NEXT: lwz r5, 0(r5) -; LARGE32-NEXT: addc r4, r6, r4 -; LARGE32-NEXT: adde r3, r5, r3 +; LARGE32-NEXT: lwz r5, 0(r3) +; LARGE32-NEXT: lwz r3, 4(r3) +; LARGE32-NEXT: addis r4, L..C4@u(r2) +; LARGE32-NEXT: lwz r4, L..C4@l(r4) +; LARGE32-NEXT: lwz r6, 0(r4) +; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: addc r4, r4, r3 +; LARGE32-NEXT: adde r3, r6, r5 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 @@ -664,15 +664,15 @@ define i64 @loadTLInit2() { ; SMALL32-NEXT: stwu r1, -32(r1) ; SMALL32-NEXT: lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit ; SMALL32-NEXT: bla .__get_tpointer[PR] -; SMALL32-NEXT: lwz r5, L..C4(r2) # @VarInit ; SMALL32-NEXT: stw r0, 40(r1) ; SMALL32-NEXT: add r3, r3, r4 -; SMALL32-NEXT: lwz r6, 4(r5) -; SMALL32-NEXT: lwz r5, 0(r5) -; SMALL32-NEXT: lwz r4, 4(r3) -; SMALL32-NEXT: lwz r3, 0(r3) -; SMALL32-NEXT: addc r4, r6, r4 -; SMALL32-NEXT: adde r3, r5, r3 +; SMALL32-NEXT: lwz r4, L..C4(r2) # @VarInit +; SMALL32-NEXT: lwz r5, 0(r3) +; SMALL32-NEXT: lwz r3, 4(r3) +; SMALL32-NEXT: lwz r6, 0(r4) +; SMALL32-NEXT: lwz r4, 4(r4) +; SMALL32-NEXT: addc r4, r4, r3 +; SMALL32-NEXT: adde r3, r6, r5 ; SMALL32-NEXT: addi r1, r1, 32 ; SMALL32-NEXT: lwz r0, 8(r1) ; SMALL32-NEXT: mtlr r0 @@ -687,14 +687,14 @@ define i64 @loadTLInit2() { ; LARGE32-NEXT: lwz r4, L..C3@l(r3) ; LARGE32-NEXT: bla .__get_tpointer[PR] ; LARGE32-NEXT: add r3, r3, r4 -; LARGE32-NEXT: lwz r4, 4(r3) -; LARGE32-NEXT: lwz r3, 0(r3) -; LARGE32-NEXT: addis r5, L..C4@u(r2) -; LARGE32-NEXT: lwz r5, L..C4@l(r5) -; LARGE32-NEXT: lwz r6, 4(r5) -; LARGE32-NEXT: lwz r5, 0(r5) -; LARGE32-NEXT: addc r4, r6, r4 -; LARGE32-NEXT: adde r3, r5, r3 +; LARGE32-NEXT: lwz r5, 0(r3) +; LARGE32-NEXT: lwz r3, 4(r3) +; LARGE32-NEXT: addis r4, L..C4@u(r2) +; LARGE32-NEXT: lwz r4, L..C4@l(r4) +; LARGE32-NEXT: lwz r6, 0(r4) +; LARGE32-NEXT: lwz r4, 4(r4) +; LARGE32-NEXT: addc r4, r4, r3 +; LARGE32-NEXT: adde r3, r6, r5 ; LARGE32-NEXT: addi r1, r1, 32 ; LARGE32-NEXT: lwz r0, 8(r1) ; LARGE32-NEXT: mtlr r0 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll index 6c0ea782c2a38..268402170063e 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-le-xcoff-reloc-large32.ll @@ -290,16 +290,16 @@ entry: ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} bla 0 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: [[#NFA+1]]) .__get_tpointer[PR] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 4 -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 4(3) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 3, 0(3) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 5, 2, 0 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 0(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 3, 4(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} addis 4, 2, 0 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: [[#NFA+25]]) VarInit[TE] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 8(5) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 8(4) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+25]]) VarInit[TE] -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 6, 4(5) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 0(5) -; DIS-NEXT: addc 4, 6, 4 -; DIS-NEXT: adde 3, 5, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 6, 0(4) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 4(4) +; DIS-NEXT: addc 4, 4, 3 +; DIS-NEXT: adde 3, 6, 5 ; DIS-NEXT: addi 1, 1, 32 ; DIS-NEXT: lwz 0, 8(1) ; DIS-NEXT: mtlr 0 @@ -324,10 +324,10 @@ entry: ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 12(4) ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCL (idx: [[#NFA+27]]) IThreadLocalVarUninit2[TE] ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} add 3, 3, 4 -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 4, 4(3) -; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 3, 0(3) -; DIS-NEXT: addic 4, 4, 1 -; DIS-NEXT: addze 3, 3 +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 5, 0(3) +; DIS-NEXT: [[#%x, ADDR:]]: {{.*}} lwz 3, 4(3) +; DIS-NEXT: addic 4, 3, 1 +; DIS-NEXT: addze 3, 5 ; DIS-NEXT: addi 1, 1, 32 ; DIS-NEXT: lwz 0, 8(1) ; DIS-NEXT: mtlr 0 diff --git a/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll index 0ff2f28207ed4..4f00cff83942a 100644 --- a/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll +++ b/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll @@ -357,10 +357,10 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) { ; CHECK-NEXT: .LBB7_2: # %atomicrmw.start ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB7_4 Depth 2 -; CHECK-NEXT: sub 5, 6, 4 -; CHECK-NEXT: cmpld 5, 6 +; CHECK-NEXT: subc 5, 6, 4 ; CHECK-NEXT: li 7, 0 -; CHECK-NEXT: bgt 0, .LBB7_4 +; CHECK-NEXT: addze. 8, 7 +; CHECK-NEXT: beq 0, .LBB7_4 ; CHECK-NEXT: # %bb.3: # %atomicrmw.start ; CHECK-NEXT: # ; CHECK-NEXT: mr 7, 5 diff --git a/llvm/test/CodeGen/PowerPC/cvt_i64_to_fp.ll b/llvm/test/CodeGen/PowerPC/cvt_i64_to_fp.ll index 34091ba46c3f6..29e7a16739864 100644 --- a/llvm/test/CodeGen/PowerPC/cvt_i64_to_fp.ll +++ b/llvm/test/CodeGen/PowerPC/cvt_i64_to_fp.ll @@ -12,11 +12,11 @@ define double @postinctodbl(ptr nocapture %llp) #0 { ; CHECK-NEXT: addic 4, 4, 1 ; CHECK-NEXT: lwz 5, 0(3) ; CHECK-NEXT: stw 5, 8(1) -; CHECK-NEXT: addze 5, 5 ; CHECK-NEXT: lfd 0, 8(1) -; CHECK-NEXT: stw 5, 0(3) -; CHECK-NEXT: fcfid 1, 0 ; CHECK-NEXT: stw 4, 4(3) +; CHECK-NEXT: addze 4, 5 +; CHECK-NEXT: fcfid 1, 0 +; CHECK-NEXT: stw 4, 0(3) ; CHECK-NEXT: addi 1, 1, 16 ; CHECK-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/inc-of-add.ll b/llvm/test/CodeGen/PowerPC/inc-of-add.ll index 98b812e7845a5..432b5a6b362fe 100644 --- a/llvm/test/CodeGen/PowerPC/inc-of-add.ll +++ b/llvm/test/CodeGen/PowerPC/inc-of-add.ll @@ -412,8 +412,8 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; PPC32-NEXT: not 4, 4 ; PPC32-NEXT: not 3, 3 ; PPC32-NEXT: subc 4, 8, 4 -; PPC32-NEXT: not 6, 6 ; PPC32-NEXT: subfe 3, 3, 7 +; PPC32-NEXT: not 6, 6 ; PPC32-NEXT: not 5, 5 ; PPC32-NEXT: subc 6, 10, 6 ; PPC32-NEXT: subfe 5, 5, 9 diff --git a/llvm/test/CodeGen/PowerPC/pr35688.ll b/llvm/test/CodeGen/PowerPC/pr35688.ll index 8a4351b229fd1..5746934802eb2 100644 --- a/llvm/test/CodeGen/PowerPC/pr35688.ll +++ b/llvm/test/CodeGen/PowerPC/pr35688.ll @@ -8,10 +8,9 @@ define void @ec_GFp_nistp256_points_mul() { ; CHECK-LABEL: ec_GFp_nistp256_points_mul: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld 3, 0(3) +; CHECK-NEXT: subfic 4, 3, 0 ; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: subfic 5, 3, 0 ; CHECK-NEXT: subfze 5, 4 -; CHECK-NEXT: sradi 5, 5, 63 ; CHECK-NEXT: subc 3, 5, 3 ; CHECK-NEXT: subfe 3, 4, 5 ; CHECK-NEXT: sradi 3, 3, 63 diff --git a/llvm/test/CodeGen/PowerPC/pr36292.ll b/llvm/test/CodeGen/PowerPC/pr36292.ll index 1794b3ba526ed..98d94646bce65 100644 --- a/llvm/test/CodeGen/PowerPC/pr36292.ll +++ b/llvm/test/CodeGen/PowerPC/pr36292.ll @@ -12,11 +12,12 @@ define void @test() nounwind comdat { ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill ; CHECK-NEXT: stdu 1, -64(1) ; CHECK-NEXT: std 0, 80(1) +; CHECK-NEXT: li 4, 0 ; CHECK-NEXT: ld 3, 0(3) ; CHECK-NEXT: ld 30, 32(1) -; CHECK-NEXT: sub 4, 3, 30 -; CHECK-NEXT: cmpld 4, 3 -; CHECK-NEXT: iselgt 3, 0, 4 +; CHECK-NEXT: subc 3, 3, 30 +; CHECK-NEXT: addze. 4, 4 +; CHECK-NEXT: iseleq 3, 0, 3 ; CHECK-NEXT: addi 29, 3, 1 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # %forcond diff --git a/llvm/test/CodeGen/PowerPC/pr40922.ll b/llvm/test/CodeGen/PowerPC/pr40922.ll index 9252e9a3e3aa4..ed840ad12b7ed 100644 --- a/llvm/test/CodeGen/PowerPC/pr40922.ll +++ b/llvm/test/CodeGen/PowerPC/pr40922.ll @@ -23,11 +23,10 @@ define i32 @a() { ; CHECK-NEXT: li 5, 0 ; CHECK-NEXT: mr 30, 3 ; CHECK-NEXT: addic 6, 4, 6 -; CHECK-NEXT: addze 5, 5 -; CHECK-NEXT: rlwinm 6, 6, 0, 28, 26 -; CHECK-NEXT: andi. 5, 5, 1 -; CHECK-NEXT: cmplw 1, 6, 4 -; CHECK-NEXT: crorc 20, 1, 4 +; CHECK-NEXT: addze. 5, 5 +; CHECK-NEXT: rlwinm 5, 6, 0, 28, 26 +; CHECK-NEXT: cmplw 1, 5, 4 +; CHECK-NEXT: crnand 20, 4, 2 ; CHECK-NEXT: bc 12, 20, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: bl e diff --git a/llvm/test/CodeGen/PowerPC/pr45448.ll b/llvm/test/CodeGen/PowerPC/pr45448.ll index 0f2dcb3ccc8a0..0edbae47e9378 100644 --- a/llvm/test/CodeGen/PowerPC/pr45448.ll +++ b/llvm/test/CodeGen/PowerPC/pr45448.ll @@ -22,12 +22,14 @@ define hidden void @julia_tryparse_internal_45896() #0 { ; CHECK-NEXT: li r5, -3 ; CHECK-NEXT: sradi r4, r3, 63 ; CHECK-NEXT: rldic r5, r5, 4, 32 +; CHECK-NEXT: mulld r6, r4, r5 ; CHECK-NEXT: mulhdu r3, r3, r5 -; CHECK-NEXT: maddld r6, r4, r5, r3 -; CHECK-NEXT: cmpld cr1, r6, r3 -; CHECK-NEXT: mulhdu. r3, r4, r5 -; CHECK-NEXT: crorc 4*cr5+lt, 4*cr1+lt, eq -; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_9 +; CHECK-NEXT: mulhdu r4, r4, r5 +; CHECK-NEXT: addc r3, r3, r6 +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: addze r3, r3 +; CHECK-NEXT: or. r3, r4, r3 +; CHECK-NEXT: beq cr0, .LBB0_9 ; CHECK-NEXT: # %bb.8: # %L917 ; CHECK-NEXT: .LBB0_9: # %L994 top: diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll index 8fff2c28da245..d9b22bda85e44 100644 --- a/llvm/test/CodeGen/PowerPC/sat-add.ll +++ b/llvm/test/CodeGen/PowerPC/sat-add.ll @@ -156,10 +156,11 @@ define i64 @unsigned_sat_constant_i64_using_min(i64 %x) { define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) { ; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_sum: ; CHECK: # %bb.0: -; CHECK-NEXT: addi 4, 3, 42 -; CHECK-NEXT: cmpld 4, 3 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: isellt 3, 3, 4 +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: addic 3, 3, 42 +; CHECK-NEXT: addze. 4, 4 +; CHECK-NEXT: li 4, -1 +; CHECK-NEXT: iseleq 3, 3, 4 ; CHECK-NEXT: blr %a = add i64 %x, 42 %c = icmp ugt i64 %x, %a @@ -170,10 +171,11 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) { define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) { ; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_notval: ; CHECK: # %bb.0: -; CHECK-NEXT: addi 4, 3, 42 -; CHECK-NEXT: cmpld 4, 3 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: isellt 3, 3, 4 +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: addic 3, 3, 42 +; CHECK-NEXT: addze. 4, 4 +; CHECK-NEXT: li 4, -1 +; CHECK-NEXT: iseleq 3, 3, 4 ; CHECK-NEXT: blr %a = add i64 %x, 42 %c = icmp ugt i64 %x, -43 @@ -346,10 +348,11 @@ define i64 @unsigned_sat_variable_i64_using_min(i64 %x, i64 %y) { define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) { ; CHECK-LABEL: unsigned_sat_variable_i64_using_cmp_sum: ; CHECK: # %bb.0: -; CHECK-NEXT: add 4, 3, 4 -; CHECK-NEXT: cmpld 4, 3 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: isellt 3, 3, 4 +; CHECK-NEXT: addc 3, 3, 4 +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: addze. 4, 4 +; CHECK-NEXT: li 4, -1 +; CHECK-NEXT: iseleq 3, 3, 4 ; CHECK-NEXT: blr %a = add i64 %x, %y %c = icmp ugt i64 %x, %a @@ -859,9 +862,11 @@ define <4 x i128> @sadd(<4 x i128> %a, <4 x i128> %b) local_unnamed_addr { define i64 @unsigned_sat_constant_i64_with_single_use(i64 %x) { ; CHECK-LABEL: unsigned_sat_constant_i64_with_single_use: ; CHECK: # %bb.0: -; CHECK-NEXT: addi 4, 3, -4 -; CHECK-NEXT: cmpld 4, 3 -; CHECK-NEXT: iselgt 3, 0, 4 +; CHECK-NEXT: li 4, 4 +; CHECK-NEXT: subc 3, 3, 4 +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: addze. 4, 4 +; CHECK-NEXT: iseleq 3, 0, 3 ; CHECK-NEXT: blr %umin = call i64 @llvm.umin.i64(i64 %x, i64 4) %sub = sub i64 %x, %umin diff --git a/llvm/test/CodeGen/PowerPC/select.ll b/llvm/test/CodeGen/PowerPC/select.ll index 289f83c475ff3..10661030da8d8 100644 --- a/llvm/test/CodeGen/PowerPC/select.ll +++ b/llvm/test/CodeGen/PowerPC/select.ll @@ -135,18 +135,22 @@ define i64 @f4_sge_0(i64 %x) { ; ; CHECK-32-LABEL: f4_sge_0: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: mr r5, r4 +; CHECK-32-NEXT: mr r6, r4 ; CHECK-32-NEXT: subfic r4, r4, 0 -; CHECK-32-NEXT: mr r6, r3 ; CHECK-32-NEXT: cmpwi r3, -1 -; CHECK-32-NEXT: subfze r3, r3 -; CHECK-32-NEXT: bgt cr0, .LBB5_2 +; CHECK-32-NEXT: subfze r5, r3 +; CHECK-32-NEXT: ble cr0, .LBB5_3 ; CHECK-32-NEXT: # %bb.1: -; CHECK-32-NEXT: mr r3, r6 +; CHECK-32-NEXT: ble cr0, .LBB5_4 ; CHECK-32-NEXT: .LBB5_2: -; CHECK-32-NEXT: bgtlr cr0 -; CHECK-32-NEXT: # %bb.3: -; CHECK-32-NEXT: mr r4, r5 +; CHECK-32-NEXT: mr r3, r5 +; CHECK-32-NEXT: blr +; CHECK-32-NEXT: .LBB5_3: +; CHECK-32-NEXT: mr r4, r6 +; CHECK-32-NEXT: bgt cr0, .LBB5_2 +; CHECK-32-NEXT: .LBB5_4: +; CHECK-32-NEXT: mr r5, r3 +; CHECK-32-NEXT: mr r3, r5 ; CHECK-32-NEXT: blr %c = icmp sge i64 %x, 0 %x.neg = sub i64 0, %x diff --git a/llvm/test/CodeGen/PowerPC/uaddo-32.ll b/llvm/test/CodeGen/PowerPC/uaddo-32.ll index b5989fc2ee2da..5dd5a2672b166 100644 --- a/llvm/test/CodeGen/PowerPC/uaddo-32.ll +++ b/llvm/test/CodeGen/PowerPC/uaddo-32.ll @@ -1,15 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s -; RUN: llc < %s -mtriple=powerpc-ibm-aix-xcoff | FileCheck %s +; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s --check-prefix=LINUXASM +; RUN: llc < %s -mtriple=powerpc-ibm-aix-xcoff | FileCheck %s --check-prefix=AIXASM define noundef i32 @add(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) { -; CHECK-LABEL: add: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li 6, 0 -; CHECK-NEXT: addc 3, 3, 4 -; CHECK-NEXT: addze 4, 6 -; CHECK-NEXT: stw 4, 0(5) -; CHECK-NEXT: blr +; LINUXASM-LABEL: add: +; LINUXASM: # %bb.0: # %entry +; LINUXASM-NEXT: li 6, 0 +; LINUXASM-NEXT: addc 3, 3, 4 +; LINUXASM-NEXT: addze 4, 6 +; LINUXASM-NEXT: stw 4, 0(5) +; LINUXASM-NEXT: blr + +; AIXASM-LABEL: .add: +; AIXASM: # %bb.0: # %entry +; AIXASM-NEXT: addc 3, 3, 4 +; AIXASM-NEXT: li 4, 0 +; AIXASM-NEXT: addze 4, 4 +; AIXASM-NEXT: stw 4, 0(5) +; AIXASM-NEXT: blr + entry: %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) %1 = extractvalue { i32, i1 } %0, 1 @@ -22,13 +31,22 @@ entry: declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) define noundef zeroext i1 @add_overflow(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) { -; CHECK-LABEL: add_overflow: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li 6, 0 -; CHECK-NEXT: addc 4, 3, 4 -; CHECK-NEXT: addze 3, 6 -; CHECK-NEXT: stw 4, 0(5) -; CHECK-NEXT: blr +; LINUXASM-LABEL: add_overflow: +; LINUXASM: # %bb.0: # %entry +; LINUXASM-NEXT: li 6, 0 +; LINUXASM-NEXT: addc 4, 3, 4 +; LINUXASM-NEXT: addze 3, 6 +; LINUXASM-NEXT: stw 4, 0(5) +; LINUXASM-NEXT: blr + +; AIXASM-LABEL: .add_overflow: +; AIXASM: # %bb.0: # %entry +; AIXASM-NEXT: addc 4, 3, 4 +; AIXASM-NEXT: li 3, 0 +; AIXASM-NEXT: addze 3, 3 +; AIXASM-NEXT: stw 4, 0(5) +; AIXASM-NEXT: blr + entry: %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) %1 = extractvalue { i32, i1 } %0, 1 diff --git a/llvm/test/CodeGen/PowerPC/uaddo-64.ll b/llvm/test/CodeGen/PowerPC/uaddo-64.ll index 3c7ab2c2bab79..98e834f29467c 100644 --- a/llvm/test/CodeGen/PowerPC/uaddo-64.ll +++ b/llvm/test/CodeGen/PowerPC/uaddo-64.ll @@ -1,15 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mcpu=ppc -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff | FileCheck %s +; RUN: llc < %s -mcpu=ppc -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefix=LINUXASM +; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff | FileCheck %s --check-prefix=AIXASM define noundef i64 @add(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) { -; CHECK-LABEL: add: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li 6, 0 -; CHECK-NEXT: addc 3, 3, 4 -; CHECK-NEXT: addze 4, 6 -; CHECK-NEXT: std 4, 0(5) -; CHECK-NEXT: blr +; LINUXASM-LABEL: add: +; LINUXASM: # %bb.0: # %entry +; LINUXASM-NEXT: li 6, 0 +; LINUXASM-NEXT: addc 3, 3, 4 +; LINUXASM-NEXT: addze 4, 6 +; LINUXASM-NEXT: std 4, 0(5) +; LINUXASM-NEXT: blr + +; AIXASM-LABEL: .add: +; AIXASM: # %bb.0: # %entry +; AIXASM-NEXT: addc 3, 3, 4 +; AIXASM-NEXT: li 4, 0 +; AIXASM-NEXT: addze 4, 4 +; AIXASM-NEXT: std 4, 0(5) +; AIXASM-NEXT: blr + entry: %0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) %1 = extractvalue { i64, i1 } %0, 1 @@ -22,13 +31,22 @@ entry: declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) define noundef zeroext i1 @add_overflow(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) { -; CHECK-LABEL: add_overflow: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li 6, 0 -; CHECK-NEXT: addc 4, 3, 4 -; CHECK-NEXT: addze 3, 6 -; CHECK-NEXT: std 4, 0(5) -; CHECK-NEXT: blr +; LINUXASM-LABEL: add_overflow: +; LINUXASM: # %bb.0: # %entry +; LINUXASM-NEXT: li 6, 0 +; LINUXASM-NEXT: addc 4, 3, 4 +; LINUXASM-NEXT: addze 3, 6 +; LINUXASM-NEXT: std 4, 0(5) +; LINUXASM-NEXT: blr + +; AIXASM-LABEL: .add_overflow: +; AIXASM: # %bb.0: # %entry +; AIXASM-NEXT: addc 4, 3, 4 +; AIXASM-NEXT: li 3, 0 +; AIXASM-NEXT: addze 3, 3 +; AIXASM-NEXT: std 4, 0(5) +; AIXASM-NEXT: blr + entry: %0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) %1 = extractvalue { i64, i1 } %0, 1 @@ -38,16 +56,28 @@ entry: } define noundef i64 @addWithCarryIn (i64 noundef %a, i64 noundef %b, i64 noundef %c, ptr nocapture noundef writeonly %ovf) { -; CHECK-LABEL: addWithCarryIn: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li 7, 0 -; CHECK-NEXT: addc 3, 3, 4 -; CHECK-NEXT: addze 4, 7 -; CHECK-NEXT: addc 3, 3, 5 -; CHECK-NEXT: addze 5, 7 -; CHECK-NEXT: or 4, 4, 5 -; CHECK-NEXT: std 4, 0(6) -; CHECK-NEXT: blr +; LINUXASM-LABEL: addWithCarryIn: +; LINUXASM: # %bb.0: # %entry +; LINUXASM-NEXT: li 7, 0 +; LINUXASM-NEXT: addc 3, 3, 4 +; LINUXASM-NEXT: addze 4, 7 +; LINUXASM-NEXT: addc 3, 3, 5 +; LINUXASM-NEXT: addze 5, 7 +; LINUXASM-NEXT: or 4, 4, 5 +; LINUXASM-NEXT: std 4, 0(6) +; LINUXASM-NEXT: blr + +; AIXASM-LABEL: .addWithCarryIn: +; AIXASM: # %bb.0: # %entry +; AIXASM-NEXT: addc 3, 3, 4 +; AIXASM-NEXT: li 4, 0 +; AIXASM-NEXT: addze 7, 4 +; AIXASM-NEXT: addc 3, 3, 5 +; AIXASM-NEXT: addze 4, 4 +; AIXASM-NEXT: or 4, 7, 4 +; AIXASM-NEXT: std 4, 0(6) +; AIXASM-NEXT: blr + entry: %0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) %1 = extractvalue { i64, i1 } %0, 1 diff --git a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll index 84895e74f18d5..f573fdab1b153 100644 --- a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll @@ -5,137 +5,134 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; PPC64-LABEL: muloti_test: ; PPC64: # %bb.0: # %start -; PPC64-NEXT: addic 8, 5, -1 -; PPC64-NEXT: mulhdu 9, 5, 4 +; PPC64-NEXT: addic 9, 5, -1 ; PPC64-NEXT: mulld 10, 5, 4 +; PPC64-NEXT: mulld 11, 3, 6 +; PPC64-NEXT: subfe 9, 9, 5 +; PPC64-NEXT: add 10, 11, 10 +; PPC64-NEXT: addic 11, 3, -1 +; PPC64-NEXT: mulhdu 8, 3, 6 +; PPC64-NEXT: subfe 3, 11, 3 +; PPC64-NEXT: and 3, 3, 9 +; PPC64-NEXT: addic 9, 8, -1 +; PPC64-NEXT: subfe 8, 9, 8 +; PPC64-NEXT: or 3, 3, 8 +; PPC64-NEXT: mulhdu 5, 5, 4 +; PPC64-NEXT: addic 8, 5, -1 ; PPC64-NEXT: subfe 5, 8, 5 -; PPC64-NEXT: mulld 8, 3, 6 -; PPC64-NEXT: add 8, 8, 10 -; PPC64-NEXT: addic 10, 3, -1 -; PPC64-NEXT: mulhdu 7, 3, 6 -; PPC64-NEXT: subfe 3, 10, 3 -; PPC64-NEXT: and 5, 3, 5 -; PPC64-NEXT: addic 3, 7, -1 -; PPC64-NEXT: subfe 7, 3, 7 -; PPC64-NEXT: or 5, 5, 7 -; PPC64-NEXT: mulhdu 10, 4, 6 -; PPC64-NEXT: addic 7, 9, -1 -; PPC64-NEXT: add 3, 10, 8 -; PPC64-NEXT: subfe 7, 7, 9 -; PPC64-NEXT: or 5, 5, 7 -; PPC64-NEXT: subc 7, 3, 10 -; PPC64-NEXT: subfe 7, 3, 3 -; PPC64-NEXT: neg 7, 7 +; PPC64-NEXT: li 7, 0 +; PPC64-NEXT: or 5, 3, 5 +; PPC64-NEXT: mulhdu 8, 4, 6 +; PPC64-NEXT: addc 3, 8, 10 +; PPC64-NEXT: addze 7, 7 +; PPC64-NEXT: addic 8, 7, -1 +; PPC64-NEXT: subfe 7, 8, 7 ; PPC64-NEXT: or 5, 5, 7 ; PPC64-NEXT: mulld 4, 4, 6 ; PPC64-NEXT: blr ; ; PPC32-LABEL: muloti_test: ; PPC32: # %bb.0: # %start -; PPC32-NEXT: stwu 1, -80(1) -; PPC32-NEXT: mr 11, 7 -; PPC32-NEXT: stw 26, 56(1) # 4-byte Folded Spill -; PPC32-NEXT: mulhwu. 26, 11, 6 -; PPC32-NEXT: stw 24, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: stwu 1, -64(1) +; PPC32-NEXT: stw 26, 40(1) # 4-byte Folded Spill ; PPC32-NEXT: mfcr 12 -; PPC32-NEXT: stw 27, 60(1) # 4-byte Folded Spill -; PPC32-NEXT: mcrf 1, 0 -; PPC32-NEXT: stw 19, 28(1) # 4-byte Folded Spill -; PPC32-NEXT: mulhwu 27, 6, 10 -; PPC32-NEXT: stw 20, 32(1) # 4-byte Folded Spill -; PPC32-NEXT: cmpwi 6, 11, 0 -; PPC32-NEXT: stw 21, 36(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill +; PPC32-NEXT: mullw 27, 9, 4 +; PPC32-NEXT: stw 21, 20(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 11, 7 +; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill ; PPC32-NEXT: li 7, 0 -; PPC32-NEXT: stw 22, 40(1) # 4-byte Folded Spill -; PPC32-NEXT: mulhwu. 26, 5, 8 -; PPC32-NEXT: stw 23, 44(1) # 4-byte Folded Spill -; PPC32-NEXT: mcrf 5, 0 -; PPC32-NEXT: stw 25, 52(1) # 4-byte Folded Spill -; PPC32-NEXT: cmpwi 5, 0 -; PPC32-NEXT: stw 28, 64(1) # 4-byte Folded Spill -; PPC32-NEXT: mullw 24, 5, 10 -; PPC32-NEXT: stw 29, 68(1) # 4-byte Folded Spill -; PPC32-NEXT: crnor 20, 2, 26 -; PPC32-NEXT: stw 30, 72(1) # 4-byte Folded Spill -; PPC32-NEXT: cmpwi 3, 0 -; PPC32-NEXT: stw 12, 24(1) -; PPC32-NEXT: mulhwu 30, 5, 10 -; PPC32-NEXT: cmpwi 6, 9, 0 -; PPC32-NEXT: crnor 21, 26, 2 -; PPC32-NEXT: crorc 20, 20, 6 -; PPC32-NEXT: crorc 20, 20, 22 -; PPC32-NEXT: mulhwu 12, 5, 9 -; PPC32-NEXT: mullw 26, 5, 9 -; PPC32-NEXT: mullw 22, 5, 8 -; PPC32-NEXT: addc 5, 24, 27 -; PPC32-NEXT: addze 30, 30 +; PPC32-NEXT: mullw 26, 3, 10 +; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill +; PPC32-NEXT: add 27, 26, 27 +; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill +; PPC32-NEXT: cmpwi 7, 11, 0 +; PPC32-NEXT: stw 25, 36(1) # 4-byte Folded Spill +; PPC32-NEXT: mullw 24, 11, 6 +; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill +; PPC32-NEXT: mulhwu 0, 8, 6 +; PPC32-NEXT: stw 12, 16(1) +; PPC32-NEXT: mr 12, 5 +; PPC32-NEXT: mulhwu 5, 4, 10 +; PPC32-NEXT: addc 5, 5, 27 +; PPC32-NEXT: addze 27, 7 +; PPC32-NEXT: cmpwi 2, 27, 0 +; PPC32-NEXT: mullw 25, 12, 8 +; PPC32-NEXT: add 26, 24, 25 +; PPC32-NEXT: addc 0, 0, 26 +; PPC32-NEXT: addze 26, 7 +; PPC32-NEXT: mullw 23, 8, 6 +; PPC32-NEXT: mullw 22, 4, 10 +; PPC32-NEXT: addc 24, 22, 23 +; PPC32-NEXT: adde 22, 5, 0 +; PPC32-NEXT: mulhwu 29, 6, 10 +; PPC32-NEXT: mullw 21, 12, 10 +; PPC32-NEXT: addc 5, 21, 29 +; PPC32-NEXT: mulhwu 30, 12, 10 +; PPC32-NEXT: addze 0, 30 ; PPC32-NEXT: mullw 23, 6, 9 ; PPC32-NEXT: addc 5, 23, 5 -; PPC32-NEXT: mullw 21, 11, 6 -; PPC32-NEXT: add 27, 21, 22 -; PPC32-NEXT: mulhwu 28, 8, 6 -; PPC32-NEXT: add 27, 28, 27 -; PPC32-NEXT: cmplw 7, 27, 28 -; PPC32-NEXT: mulhwu. 23, 3, 10 +; PPC32-NEXT: mulhwu 28, 6, 9 +; PPC32-NEXT: addze 29, 28 +; PPC32-NEXT: addc 0, 0, 29 +; PPC32-NEXT: addze 29, 7 +; PPC32-NEXT: mullw 30, 12, 9 +; PPC32-NEXT: addc 0, 30, 0 +; PPC32-NEXT: mulhwu 25, 12, 9 +; PPC32-NEXT: adde 30, 25, 29 +; PPC32-NEXT: addc 0, 0, 24 +; PPC32-NEXT: adde 30, 30, 22 +; PPC32-NEXT: addze. 29, 7 +; PPC32-NEXT: mcrf 1, 0 +; PPC32-NEXT: mulhwu. 29, 11, 6 ; PPC32-NEXT: mcrf 6, 0 -; PPC32-NEXT: cror 24, 20, 28 -; PPC32-NEXT: crorc 25, 21, 26 -; PPC32-NEXT: mulhwu 0, 6, 9 -; PPC32-NEXT: mullw 20, 9, 4 +; PPC32-NEXT: mulhwu. 29, 12, 8 +; PPC32-NEXT: mcrf 5, 0 +; PPC32-NEXT: cmpwi 12, 0 +; PPC32-NEXT: crnor 20, 2, 30 +; PPC32-NEXT: cmpwi 3, 0 +; PPC32-NEXT: cmpwi 7, 9, 0 +; PPC32-NEXT: crnor 24, 30, 2 +; PPC32-NEXT: mulhwu. 12, 3, 10 +; PPC32-NEXT: crorc 20, 20, 26 +; PPC32-NEXT: mcrf 7, 0 +; PPC32-NEXT: crorc 20, 20, 22 +; PPC32-NEXT: cmpwi 26, 0 +; PPC32-NEXT: crorc 28, 20, 2 ; PPC32-NEXT: mulhwu. 9, 9, 4 -; PPC32-NEXT: mcrf 1, 0 -; PPC32-NEXT: addze 9, 0 -; PPC32-NEXT: mullw 19, 3, 10 -; PPC32-NEXT: or. 3, 4, 3 ; PPC32-NEXT: mcrf 5, 0 -; PPC32-NEXT: addc 3, 30, 9 -; PPC32-NEXT: add 24, 19, 20 -; PPC32-NEXT: mulhwu 29, 4, 10 -; PPC32-NEXT: add 28, 29, 24 -; PPC32-NEXT: cmplw 2, 28, 29 -; PPC32-NEXT: crorc 20, 25, 6 -; PPC32-NEXT: cror 20, 20, 8 -; PPC32-NEXT: mullw 22, 4, 10 -; PPC32-NEXT: or. 4, 8, 11 -; PPC32-NEXT: addze 4, 7 -; PPC32-NEXT: crnor 21, 2, 22 +; PPC32-NEXT: crorc 20, 24, 30 +; PPC32-NEXT: or. 3, 4, 3 +; PPC32-NEXT: mcrf 6, 0 +; PPC32-NEXT: crorc 20, 20, 22 +; PPC32-NEXT: or. 3, 8, 11 +; PPC32-NEXT: crorc 20, 20, 10 +; PPC32-NEXT: crnor 21, 2, 26 ; PPC32-NEXT: cror 20, 21, 20 -; PPC32-NEXT: mullw 25, 8, 6 -; PPC32-NEXT: addc 8, 26, 3 -; PPC32-NEXT: adde 9, 12, 4 -; PPC32-NEXT: addc 3, 22, 25 -; PPC32-NEXT: adde 11, 28, 27 -; PPC32-NEXT: addc 4, 8, 3 -; PPC32-NEXT: adde 3, 9, 11 -; PPC32-NEXT: cmplw 1, 3, 9 -; PPC32-NEXT: cmplw 4, 8 -; PPC32-NEXT: crandc 22, 4, 6 +; PPC32-NEXT: cror 20, 20, 28 +; PPC32-NEXT: crandc 20, 6, 20 ; PPC32-NEXT: mullw 6, 6, 10 -; PPC32-NEXT: bc 12, 22, .LBB0_3 +; PPC32-NEXT: bc 12, 20, .LBB0_2 ; PPC32-NEXT: # %bb.1: # %start -; PPC32-NEXT: crand 21, 6, 0 -; PPC32-NEXT: bc 12, 21, .LBB0_3 -; PPC32-NEXT: # %bb.2: # %start -; PPC32-NEXT: cror 20, 20, 24 -; PPC32-NEXT: bc 4, 20, .LBB0_4 -; PPC32-NEXT: .LBB0_3: # %start ; PPC32-NEXT: li 7, 1 -; PPC32-NEXT: .LBB0_4: # %start -; PPC32-NEXT: lwz 12, 24(1) -; PPC32-NEXT: lwz 30, 72(1) # 4-byte Folded Reload +; PPC32-NEXT: .LBB0_2: # %start +; PPC32-NEXT: lwz 12, 16(1) +; PPC32-NEXT: mr 3, 30 +; PPC32-NEXT: mr 4, 0 +; PPC32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload ; PPC32-NEXT: mtcrf 32, 12 # cr2 -; PPC32-NEXT: lwz 29, 68(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 28, 64(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 27, 60(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 26, 56(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 25, 52(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 24, 48(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 23, 44(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 22, 40(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 21, 36(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 20, 32(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 19, 28(1) # 4-byte Folded Reload -; PPC32-NEXT: addi 1, 1, 80 +; PPC32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 28, 48(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 27, 44(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 26, 40(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 25, 36(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 24, 32(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 23, 28(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 22, 24(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 21, 20(1) # 4-byte Folded Reload +; PPC32-NEXT: addi 1, 1, 64 ; PPC32-NEXT: blr start: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 diff --git a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll index e5c5356ce50a4..515dd0f70e948 100644 --- a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll @@ -207,33 +207,32 @@ define i1 @test_urem_oversized(i66 %X) nounwind { ; PPC: # %bb.0: ; PPC-NEXT: lis 6, -12795 ; PPC-NEXT: ori 6, 6, 40665 -; PPC-NEXT: mulhwu 7, 5, 6 +; PPC-NEXT: mulhwu 8, 5, 6 ; PPC-NEXT: lis 9, 12057 ; PPC-NEXT: ori 9, 9, 37186 ; PPC-NEXT: mullw 11, 4, 6 -; PPC-NEXT: addc 7, 11, 7 +; PPC-NEXT: addc 8, 11, 8 ; PPC-NEXT: lis 11, -5526 ; PPC-NEXT: ori 11, 11, 61135 -; PPC-NEXT: mulhwu 8, 4, 6 -; PPC-NEXT: addze 8, 8 +; PPC-NEXT: mulhwu 7, 4, 6 +; PPC-NEXT: addze 7, 7 ; PPC-NEXT: mulhwu 10, 5, 9 ; PPC-NEXT: mullw 4, 4, 9 ; PPC-NEXT: mullw 9, 5, 9 -; PPC-NEXT: addc 7, 9, 7 -; PPC-NEXT: addze 9, 10 -; PPC-NEXT: rotlwi 10, 7, 31 +; PPC-NEXT: addc 8, 9, 8 +; PPC-NEXT: adde 7, 7, 10 +; PPC-NEXT: add 4, 4, 7 +; PPC-NEXT: rotlwi 9, 8, 31 ; PPC-NEXT: mullw 3, 3, 6 ; PPC-NEXT: mullw 6, 5, 6 ; PPC-NEXT: slwi 5, 5, 1 ; PPC-NEXT: add 3, 5, 3 ; PPC-NEXT: rotlwi 5, 6, 31 -; PPC-NEXT: rlwimi 5, 7, 31, 0, 0 -; PPC-NEXT: add 7, 8, 9 -; PPC-NEXT: add 4, 4, 7 ; PPC-NEXT: add 3, 4, 3 -; PPC-NEXT: rlwimi 10, 3, 31, 0, 0 +; PPC-NEXT: rlwimi 5, 8, 31, 0, 0 +; PPC-NEXT: rlwimi 9, 3, 31, 0, 0 ; PPC-NEXT: cmplw 5, 11 -; PPC-NEXT: cmplwi 1, 10, 13 +; PPC-NEXT: cmplwi 1, 9, 13 ; PPC-NEXT: rlwinm 3, 3, 31, 31, 31 ; PPC-NEXT: crandc 20, 4, 6 ; PPC-NEXT: crand 21, 6, 0 From b6b99a14475ce2ebd56cb69672d2a6f48882958e Mon Sep 17 00:00:00 2001 From: zhijian Date: Thu, 16 Jan 2025 15:01:59 +0000 Subject: [PATCH 2/5] address comment --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 9b0e7cd61eefd..4c782eeeb3ea1 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -196,9 +196,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, } } - // PowerPC uses addo,addo_carry,subo,subo_carry to propagate carry. setOperationAction(ISD::UADDO, RegVT, Custom); setOperationAction(ISD::USUBO, RegVT, Custom); + + // PowerPC uses addo_carry,subo_carry to propagate carry. setOperationAction(ISD::UADDO_CARRY, RegVT, Custom); setOperationAction(ISD::USUBO_CARRY, RegVT, Custom); @@ -18443,8 +18444,9 @@ static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, DAG.getConstant(NegConstant, DL, MVT::i64)); SDValue AddOrZ = NegConstant != 0 ? Add : Z; SDValue Addc = - DAG.getNode(ISD::UADDO, DL, DAG.getVTList(MVT::i64, CarryType), AddOrZ, - DAG.getConstant(-1ULL, DL, MVT::i64)); + DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType), + AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64), + DAG.getConstant(0, DL, CarryType)); return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64), SDValue(Addc.getNode(), 1)); @@ -18460,8 +18462,9 @@ static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, DAG.getConstant(NegConstant, DL, MVT::i64)); SDValue AddOrZ = NegConstant != 0 ? Add : Z; SDValue Subc = - DAG.getNode(ISD::USUBO, DL, DAG.getVTList(MVT::i64, CarryType), - DAG.getConstant(0, DL, MVT::i64), AddOrZ); + DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType), + DAG.getConstant(0, DL, MVT::i64), AddOrZ, + DAG.getConstant(0, DL, CarryType)); SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1), DAG.getAllOnesConstant(DL, CarryType)); return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS, From 59fd16041ed8a7fb91c1c586972e67187df4a41a Mon Sep 17 00:00:00 2001 From: zhijian Date: Wed, 26 Mar 2025 13:02:23 -0400 Subject: [PATCH 3/5] fix a crash --- llvm/include/llvm/CodeGen/LivePhysRegs.h | 3 + llvm/lib/CodeGen/LivePhysRegs.cpp | 22 +++++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 8 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 27 +----- .../carry-liveness-after-expand-isel.ll | 82 +++++++++++++++++++ 5 files changed, 117 insertions(+), 25 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/carry-liveness-after-expand-isel.ll diff --git a/llvm/include/llvm/CodeGen/LivePhysRegs.h b/llvm/include/llvm/CodeGen/LivePhysRegs.h index 3b3608e4641e7..232bd1f11600a 100644 --- a/llvm/include/llvm/CodeGen/LivePhysRegs.h +++ b/llvm/include/llvm/CodeGen/LivePhysRegs.h @@ -195,6 +195,9 @@ void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs); void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB); +/// Check if physical register \p Reg is alive after \p MBI. +bool isPhysRegLiveAfter(Register Reg, MachineBasicBlock::iterator MBI); + /// Convenience function for recomputing live-in's for a MBB. Returns true if /// any changes were made. static inline bool recomputeLiveIns(MachineBasicBlock &MBB) { diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp index 7a06d108c66ca..0c28dfee9e984 100644 --- a/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -338,3 +338,25 @@ void llvm::computeAndAddLiveIns(LivePhysRegs &LiveRegs, computeLiveIns(LiveRegs, MBB); addLiveIns(MBB, LiveRegs); } + +bool llvm::isPhysRegLiveAfter(Register Reg, MachineBasicBlock::iterator MBI) { + assert(Reg.isPhysical() && "Apply to physical register only"); + + MachineBasicBlock *MBB = MBI->getParent(); + // Scan forward through BB for a use/def of Reg + for (const MachineInstr &MI : llvm::make_range(std::next(MBI), MBB->end())) { + if (MI.readsRegister(Reg, /*TRI=*/nullptr)) + return true; + // If we found a def, we can stop searching. + if (MI.definesRegister(Reg, /*TRI=*/nullptr)) + return false; + } + + // If we hit the end of the block, check whether Reg is live into a + // successor. + for (MachineBasicBlock *Succ : MBB->successors()) + if (Succ->isLiveIn(Reg)) + return true; + + return false; +} diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 4c782eeeb3ea1..ab8e5040b45e9 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -36,6 +36,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -13434,6 +13435,11 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, F->insert(It, copy0MBB); F->insert(It, sinkMBB); + if (isPhysRegLiveAfter(PPC::CARRY, MI.getIterator())) { + copy0MBB->addLiveIn(PPC::CARRY); + sinkMBB->addLiveIn(PPC::CARRY); + } + // Set the call frame size on entry to the new basic blocks. // See https://reviews.llvm.org/D156113. unsigned CallFrameSize = TII->getCallFrameSizeAt(MI); @@ -18445,7 +18451,7 @@ static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, SDValue AddOrZ = NegConstant != 0 ? Add : Z; SDValue Addc = DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType), - AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64), + AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64), DAG.getConstant(0, DL, CarryType)); return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64), diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f6b5d4af5ba4e..2d59f5a4dbf0b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -35432,28 +35433,6 @@ MVT X86TargetLowering::getPreferredSwitchConditionType(LLVMContext &Context, // X86 Scheduler Hooks //===----------------------------------------------------------------------===// -// Returns true if EFLAG is consumed after this iterator in the rest of the -// basic block or any successors of the basic block. -static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr, - MachineBasicBlock *BB) { - // Scan forward through BB for a use/def of EFLAGS. - for (const MachineInstr &mi : llvm::make_range(std::next(Itr), BB->end())) { - if (mi.readsRegister(X86::EFLAGS, /*TRI=*/nullptr)) - return true; - // If we found a def, we can stop searching. - if (mi.definesRegister(X86::EFLAGS, /*TRI=*/nullptr)) - return false; - } - - // If we hit the end of the block, check whether EFLAGS is live into a - // successor. - for (MachineBasicBlock *Succ : BB->successors()) - if (Succ->isLiveIn(X86::EFLAGS)) - return true; - - return false; -} - /// Utility function to emit xbegin specifying the start of an RTM region. static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, const TargetInstrInfo *TII) { @@ -35486,7 +35465,7 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, MF->insert(I, fallMBB); MF->insert(I, sinkMBB); - if (isEFLAGSLiveAfter(MI, MBB)) { + if (isPhysRegLiveAfter(X86::EFLAGS, MI)) { mainMBB->addLiveIn(X86::EFLAGS); fallMBB->addLiveIn(X86::EFLAGS); sinkMBB->addLiveIn(X86::EFLAGS); @@ -35825,7 +35804,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI, static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock* BB, const TargetRegisterInfo* TRI) { - if (isEFLAGSLiveAfter(SelectItr, BB)) + if (isPhysRegLiveAfter(X86::EFLAGS, SelectItr)) return false; // We found a def, or hit the end of the basic block and EFLAGS wasn't live diff --git a/llvm/test/CodeGen/PowerPC/carry-liveness-after-expand-isel.ll b/llvm/test/CodeGen/PowerPC/carry-liveness-after-expand-isel.ll new file mode 100644 index 0000000000000..15ab8aa05b329 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/carry-liveness-after-expand-isel.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck %s + +target datalayout = "E-m:e-p:32:32-Fn32-i64:64-n32" +target triple = "powerpc-unknown-linux-gnu" + +@md_seq_show___trans_tmp_57 = external global i8 + +define i32 @md_seq_show(i64 %0, i32 %1) #0 { + ; CHECK-LABEL: name: md_seq_show + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: liveins: $r3, $r4, $r5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprc = COPY $r5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprc = COPY $r4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprc = COPY $r3 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprc = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gprc = COPY [[COPY2]] + ; CHECK-NEXT: [[ADDIC:%[0-9]+]]:gprc = ADDIC [[COPY1]], 1, implicit-def $carry + ; CHECK-NEXT: [[CMPLWI:%[0-9]+]]:crrc = CMPLWI killed [[ADDIC]], 1 + ; CHECK-NEXT: [[LI:%[0-9]+]]:gprc_and_gprc_nor0 = LI 0 + ; CHECK-NEXT: [[LI1:%[0-9]+]]:gprc_and_gprc_nor0 = LI 1 + ; CHECK-NEXT: BCC 44, [[CMPLWI]], %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.entry: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: liveins: $carry + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: liveins: $carry + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gprc_and_gprc_nor0 = PHI [[LI]], %bb.3, [[LI1]], %bb.0 + ; CHECK-NEXT: [[ADDZE:%[0-9]+]]:gprc = ADDZE [[COPY2]], implicit-def dead $carry, implicit $carry + ; CHECK-NEXT: [[ADDIC1:%[0-9]+]]:gprc = ADDIC [[ADDZE]], -1, implicit-def $carry + ; CHECK-NEXT: [[SUBFE:%[0-9]+]]:gprc_and_gprc_nor0 = SUBFE killed [[ADDIC1]], [[ADDZE]], implicit-def dead $carry, implicit $carry + ; CHECK-NEXT: [[CMPLWI1:%[0-9]+]]:crrc = CMPLWI [[ADDZE]], 0 + ; CHECK-NEXT: BCC 76, [[CMPLWI1]], %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.entry: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.entry: + ; CHECK-NEXT: successors: %bb.1(0x55555556), %bb.2(0x2aaaaaaa) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gprc = PHI [[SUBFE]], %bb.5, [[PHI]], %bb.4 + ; CHECK-NEXT: [[CMPLWI2:%[0-9]+]]:crrc = CMPLWI killed [[PHI1]], 0 + ; CHECK-NEXT: BCC 68, killed [[CMPLWI2]], %bb.2 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.for.cond.i.preheader: + ; CHECK-NEXT: [[LI2:%[0-9]+]]:gprc = LI 0 + ; CHECK-NEXT: $r3 = COPY [[LI2]] + ; CHECK-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.status_resync.exit: + ; CHECK-NEXT: [[ADDIC2:%[0-9]+]]:gprc = ADDIC [[COPY]], -1, implicit-def $carry + ; CHECK-NEXT: [[SUBFE1:%[0-9]+]]:gprc = SUBFE killed [[ADDIC2]], [[COPY]], implicit-def dead $carry, implicit $carry + ; CHECK-NEXT: [[LIS:%[0-9]+]]:gprc_and_gprc_nor0 = LIS target-flags(ppc-ha) @md_seq_show___trans_tmp_57 + ; CHECK-NEXT: STB killed [[SUBFE1]], target-flags(ppc-lo) @md_seq_show___trans_tmp_57, killed [[LIS]] :: (store (s8) into @md_seq_show___trans_tmp_57) + ; CHECK-NEXT: [[LI3:%[0-9]+]]:gprc = LI 0 + ; CHECK-NEXT: $r3 = COPY [[LI3]] + ; CHECK-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 + + entry: + switch i64 %0, label %status_resync.exit [ + i64 -1, label %for.cond.i.preheader + i64 0, label %for.cond.i.preheader + ] + + for.cond.i.preheader: ; preds = %entry, %entry + ret i32 0 + + status_resync.exit: ; preds = %entry + %tobool = icmp ne i32 %1, 0 + %storedv = zext i1 %tobool to i8 + store i8 %storedv, ptr @md_seq_show___trans_tmp_57, align 1 + ret i32 0 +} + +attributes #0 = { "target-features"="-aix-shared-lib-tls-model-opt,-aix-small-local-dynamic-tls,-aix-small-local-exec-tls,-altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" } From 2a513857c7b725bcf399389c988a6411cf40be92 Mon Sep 17 00:00:00 2001 From: zhijian Date: Wed, 26 Mar 2025 12:19:20 -0400 Subject: [PATCH 4/5] fix a test-suite fail --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index ab8e5040b45e9..0594d4245be5e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12197,7 +12197,7 @@ SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const { DAG, Subtarget); if (!IsAdd) Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry, - DAG.getAllOnesConstant(DL, CarryType)); + DAG.getConstant(1UL, DL, CarryType)); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry); } @@ -12221,7 +12221,7 @@ SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op, Subtarget); if (!IsAdd) CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp, - DAG.getAllOnesConstant(DL, CarryOp.getValueType())); + DAG.getConstant(1UL, DL, CarryOp.getValueType())); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp); } @@ -18472,7 +18472,7 @@ static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, DAG.getConstant(0, DL, MVT::i64), AddOrZ, DAG.getConstant(0, DL, CarryType)); SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1), - DAG.getAllOnesConstant(DL, CarryType)); + DAG.getConstant(1UL, DL, CarryType)); return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64), Invert); } From ac7244adeea2b2f6f743634cbf12253ce619ca71 Mon Sep 17 00:00:00 2001 From: zhijian Date: Wed, 2 Apr 2025 17:57:03 -0400 Subject: [PATCH 5/5] change function from isPhysRegLiveAfter isPhysRegUsedAfter --- llvm/include/llvm/CodeGen/LivePhysRegs.h | 4 ++-- llvm/lib/CodeGen/LivePhysRegs.cpp | 4 +++- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/CodeGen/LivePhysRegs.h b/llvm/include/llvm/CodeGen/LivePhysRegs.h index 232bd1f11600a..2a719571fde2d 100644 --- a/llvm/include/llvm/CodeGen/LivePhysRegs.h +++ b/llvm/include/llvm/CodeGen/LivePhysRegs.h @@ -195,8 +195,8 @@ void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs); void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB); -/// Check if physical register \p Reg is alive after \p MBI. -bool isPhysRegLiveAfter(Register Reg, MachineBasicBlock::iterator MBI); +/// Check if physical register \p Reg is used after \p MBI. +bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI); /// Convenience function for recomputing live-in's for a MBB. Returns true if /// any changes were made. diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp index 0c28dfee9e984..bc711382420be 100644 --- a/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -339,7 +339,9 @@ void llvm::computeAndAddLiveIns(LivePhysRegs &LiveRegs, addLiveIns(MBB, LiveRegs); } -bool llvm::isPhysRegLiveAfter(Register Reg, MachineBasicBlock::iterator MBI) { +// Returns true if `Reg` is used after this iterator in the rest of the +// basic block or any successors of the basic block. +bool llvm::isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI) { assert(Reg.isPhysical() && "Apply to physical register only"); MachineBasicBlock *MBB = MBI->getParent(); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 0594d4245be5e..2cd35ddb86cef 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -13435,7 +13435,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, F->insert(It, copy0MBB); F->insert(It, sinkMBB); - if (isPhysRegLiveAfter(PPC::CARRY, MI.getIterator())) { + if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) { copy0MBB->addLiveIn(PPC::CARRY); sinkMBB->addLiveIn(PPC::CARRY); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2d59f5a4dbf0b..d883021fab5f2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35465,7 +35465,7 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, MF->insert(I, fallMBB); MF->insert(I, sinkMBB); - if (isPhysRegLiveAfter(X86::EFLAGS, MI)) { + if (isPhysRegUsedAfter(X86::EFLAGS, MI)) { mainMBB->addLiveIn(X86::EFLAGS); fallMBB->addLiveIn(X86::EFLAGS); sinkMBB->addLiveIn(X86::EFLAGS); @@ -35804,7 +35804,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI, static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock* BB, const TargetRegisterInfo* TRI) { - if (isPhysRegLiveAfter(X86::EFLAGS, SelectItr)) + if (isPhysRegUsedAfter(X86::EFLAGS, SelectItr)) return false; // We found a def, or hit the end of the basic block and EFLAGS wasn't live