[clang] [llvm] Add support for flag output operand "=@cc" for SystemZ. (PR #125970)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 5 16:23:33 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: None (anoopkg6)
<details>
<summary>Changes</summary>
Add support for flag output operand "=@<!-- -->cc" for SystemZ and optimizing conditional branch for 14 possible combinations of CC mask.
---
Patch is 616.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125970.diff
21 Files Affected:
- (modified) clang/lib/Basic/Targets/SystemZ.cpp (+11)
- (modified) clang/lib/Basic/Targets/SystemZ.h (+5)
- (modified) clang/lib/CodeGen/CGStmt.cpp (+8-2)
- (added) clang/test/CodeGen/inline-asm-systemz-flag-output.c (+149)
- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+3)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+61-9)
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+4)
- (modified) llvm/lib/Target/SystemZ/SystemZISelLowering.cpp (+598-2)
- (modified) llvm/lib/Target/SystemZ/SystemZISelLowering.h (+14)
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccand.ll (+500)
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccand_eq_noteq.ll (+939)
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccand_not.ll (+779)
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccmixed.ll (+2427)
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccmixed_eq_noteq.ll (+5248)
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccmixed_not.ll (+2543)
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccor.ll (+1047)
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccor_eq_noteq.ll (+854)
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccor_not.ll (+806)
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccxor.ll (+784)
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccxor_eq_noteq.ll (+1083)
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccxor_not.ll (+778)
``````````diff
diff --git a/clang/lib/Basic/Targets/SystemZ.cpp b/clang/lib/Basic/Targets/SystemZ.cpp
index 06f08db2eadd475..49f88b45220d0c4 100644
--- a/clang/lib/Basic/Targets/SystemZ.cpp
+++ b/clang/lib/Basic/Targets/SystemZ.cpp
@@ -90,6 +90,14 @@ bool SystemZTargetInfo::validateAsmConstraint(
case 'T': // Likewise, plus an index
Info.setAllowsMemory();
return true;
+ case '@':
+ // CC condition changes.
+ if (strlen(Name) >= 3 && *(Name + 1) == 'c' && *(Name + 2) == 'c') {
+ Name += 2;
+ Info.setAllowsRegister();
+ return true;
+ }
+ return false;
}
}
@@ -150,6 +158,9 @@ unsigned SystemZTargetInfo::getMinGlobalAlign(uint64_t Size,
void SystemZTargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
+ // Inline assembly supports SystemZ flag outputs.
+ Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__");
+
Builder.defineMacro("__s390__");
Builder.defineMacro("__s390x__");
Builder.defineMacro("__zarch__");
diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index ef9a07033a6e4ff..a6909ababdec001 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -118,6 +118,11 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
TargetInfo::ConstraintInfo &info) const override;
std::string convertConstraint(const char *&Constraint) const override {
+ if (strncmp(Constraint, "@cc", 3) == 0) {
+ std::string Converted = "{" + std::string(Constraint, 3) + "}";
+ Constraint += 3;
+ return Converted;
+ }
switch (Constraint[0]) {
case 'p': // Keep 'p' constraint.
return std::string("p");
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 41dc91c578c800a..27f7bb652895839 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -2563,9 +2563,15 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) {
// Target must guarantee the Value `Tmp` here is lowered to a boolean
// value.
- llvm::Constant *Two = llvm::ConstantInt::get(Tmp->getType(), 2);
+ unsigned CCUpperBound = 2;
+ if (CGF.getTarget().getTriple().getArch() == llvm::Triple::systemz) {
+ // On this target CC value can be in range [0, 3].
+ CCUpperBound = 4;
+ }
+ llvm::Constant *CCUpperBoundConst =
+ llvm::ConstantInt::get(Tmp->getType(), CCUpperBound);
llvm::Value *IsBooleanValue =
- Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, Two);
+ Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, CCUpperBoundConst);
llvm::Function *FnAssume = CGM.getIntrinsic(llvm::Intrinsic::assume);
Builder.CreateCall(FnAssume, IsBooleanValue);
}
diff --git a/clang/test/CodeGen/inline-asm-systemz-flag-output.c b/clang/test/CodeGen/inline-asm-systemz-flag-output.c
new file mode 100644
index 000000000000000..ab90e031df1f2b8
--- /dev/null
+++ b/clang/test/CodeGen/inline-asm-systemz-flag-output.c
@@ -0,0 +1,149 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple s390x-linux -emit-llvm -o - %s | FileCheck %s
+// CHECK-LABEL: define dso_local signext i32 @foo_012(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*]]:
+// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2:[0-9]+]], !srcloc [[META2:![0-9]+]]
+// CHECK-NEXT: [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT: store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT: call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT: store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP3]], 0
+// CHECK-NEXT: br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK: [[LOR_LHS_FALSE]]:
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 1
+// CHECK-NEXT: br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK: [[LOR_RHS]]:
+// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 2
+// CHECK-NEXT: br label %[[LOR_END]]
+// CHECK: [[LOR_END]]:
+// CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT: [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT: ret i32 [[COND]]
+//
+int foo_012(int x) {
+ int cc;
+ asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+ return cc == 0 || cc == 1 || cc == 2 ? 42 : 0;
+}
+
+// CHECK-LABEL: define dso_local signext i32 @foo_013(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*]]:
+// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2]], !srcloc [[META3:![0-9]+]]
+// CHECK-NEXT: [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT: store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT: call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT: store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP3]], 0
+// CHECK-NEXT: br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK: [[LOR_LHS_FALSE]]:
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 1
+// CHECK-NEXT: br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK: [[LOR_RHS]]:
+// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 3
+// CHECK-NEXT: br label %[[LOR_END]]
+// CHECK: [[LOR_END]]:
+// CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT: [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT: ret i32 [[COND]]
+//
+int foo_013(int x) {
+ int cc;
+ asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+ return cc == 0 || cc == 1 || cc == 3 ? 42 : 0;
+}
+
+// CHECK-LABEL: define dso_local signext i32 @foo_023(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*]]:
+// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2]], !srcloc [[META4:![0-9]+]]
+// CHECK-NEXT: [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT: store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT: call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT: store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP3]], 0
+// CHECK-NEXT: br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK: [[LOR_LHS_FALSE]]:
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 2
+// CHECK-NEXT: br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK: [[LOR_RHS]]:
+// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 3
+// CHECK-NEXT: br label %[[LOR_END]]
+// CHECK: [[LOR_END]]:
+// CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT: [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT: ret i32 [[COND]]
+//
+int foo_023(int x) {
+ int cc;
+ asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+ return cc == 0 || cc == 2 || cc == 3 ? 42 : 0;
+}
+
+// CHECK-LABEL: define dso_local signext i32 @foo_123(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*]]:
+// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2]], !srcloc [[META5:![0-9]+]]
+// CHECK-NEXT: [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT: store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT: call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT: store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP3]], 1
+// CHECK-NEXT: br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK: [[LOR_LHS_FALSE]]:
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 2
+// CHECK-NEXT: br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK: [[LOR_RHS]]:
+// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 3
+// CHECK-NEXT: br label %[[LOR_END]]
+// CHECK: [[LOR_END]]:
+// CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT: [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT: ret i32 [[COND]]
+//
+int foo_123(int x) {
+ int cc;
+ asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+ return cc == 1 || cc == 2 || cc == 3 ? 42 : 0;
+}
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index e0b638201a04740..cb136fe2f446b43 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5071,6 +5071,9 @@ class TargetLowering : public TargetLoweringBase {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
+ // Lower switch statement for flag output operand with SRL/IPM Sequence.
+ virtual bool canLowerSRL_IPM_Switch(SDValue Cond) const;
+
// Lower custom output constraints. If invalid, return SDValue().
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue,
const SDLoc &DL,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3b046aa25f54440..a32787bc882f175 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2831,8 +2831,37 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
Opcode = Instruction::And;
else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
Opcode = Instruction::Or;
-
- if (Opcode &&
+ auto &TLI = DAG.getTargetLoweringInfo();
+ bool BrSrlIPM = FuncInfo.MF->getTarget().getTargetTriple().getArch() ==
+ Triple::ArchType::systemz;
+ // For Flag output operands SRL/IPM sequence, we want to avoid
+ // creating switch case, as it creates Basic Block and inhibits
+ // optimization in DAGCombiner for flag output operands.
+ const auto checkSRLIPM = [&TLI](const SDValue &Op) {
+ if (!Op.getNumOperands())
+ return false;
+ SDValue OpVal = Op.getOperand(0);
+ SDNode *N = OpVal.getNode();
+ if (N && N->getOpcode() == ISD::SRL)
+ return TLI.canLowerSRL_IPM_Switch(OpVal);
+ else if (N && OpVal.getNumOperands() &&
+ (N->getOpcode() == ISD::AND || N->getOpcode() == ISD::OR)) {
+ SDValue OpVal1 = OpVal.getOperand(0);
+ SDNode *N1 = OpVal1.getNode();
+ if (N1 && N1->getOpcode() == ISD::SRL)
+ return TLI.canLowerSRL_IPM_Switch(OpVal1);
+ }
+ return false;
+ };
+ if (BrSrlIPM) {
+ if (NodeMap.count(BOp0) && NodeMap[BOp0].getNode()) {
+ BrSrlIPM &= checkSRLIPM(getValue(BOp0));
+ if (NodeMap.count(BOp1) && NodeMap[BOp1].getNode())
+ BrSrlIPM &= checkSRLIPM(getValue(BOp1));
+ } else
+ BrSrlIPM = false;
+ }
+ if (Opcode && !BrSrlIPM &&
!(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) &&
!shouldKeepJumpConditionsTogether(
@@ -12043,18 +12072,41 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
const APInt &SmallValue = Small.Low->getValue();
const APInt &BigValue = Big.Low->getValue();
+ // Creating switch cases optimizing tranformation inhibits DAGCombiner
+ // for SystemZ for flag output operands. DAGCobiner compute cumulative
+ // CCMask for flag output operands SRL/IPM sequence, we want to avoid
+ // creating switch case, as it creates Basic Block and inhibits
+ // optimization in DAGCombiner for flag output operands.
+ // cases like (CC == 0) || (CC == 2) || (CC == 3), or
+ // (CC == 0) || (CC == 1) ^ (CC == 3), there could potentially be
+ // more cases like this.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool IsSrlIPM = false;
+ if (NodeMap.count(Cond) && NodeMap[Cond].getNode())
+ IsSrlIPM = CurMF->getTarget().getTargetTriple().getArch() ==
+ Triple::ArchType::systemz &&
+ TLI.canLowerSRL_IPM_Switch(getValue(Cond));
// Check that there is only one bit different.
APInt CommonBit = BigValue ^ SmallValue;
- if (CommonBit.isPowerOf2()) {
+ if (CommonBit.isPowerOf2() || IsSrlIPM) {
SDValue CondLHS = getValue(Cond);
EVT VT = CondLHS.getValueType();
SDLoc DL = getCurSDLoc();
-
- SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
- DAG.getConstant(CommonBit, DL, VT));
- SDValue Cond = DAG.getSetCC(
- DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
- ISD::SETEQ);
+ SDValue Cond;
+
+ if (CommonBit.isPowerOf2()) {
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+ DAG.getConstant(CommonBit, DL, VT));
+ Cond = DAG.getSetCC(DL, MVT::i1, Or,
+ DAG.getConstant(BigValue | SmallValue, DL, VT),
+ ISD::SETEQ);
+ } else if (IsSrlIPM && BigValue == 3 && SmallValue == 0) {
+ SDValue SetCC =
+ DAG.getSetCC(DL, MVT::i32, CondLHS,
+ DAG.getConstant(SmallValue, DL, VT), ISD::SETEQ);
+ Cond = DAG.getSetCC(DL, MVT::i32, SetCC,
+ DAG.getConstant(BigValue, DL, VT), ISD::SETEQ);
+ }
// Update successor info.
// Both Small and Big will jump to Small.BB, so we sum up the
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8287565336b54d1..3d48adac509cb9e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5563,6 +5563,10 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
return nullptr;
}
+bool TargetLowering::canLowerSRL_IPM_Switch(SDValue Cond) const {
+ return false;
+}
+
SDValue TargetLowering::LowerAsmOutputForConstraint(
SDValue &Chain, SDValue &Glue, const SDLoc &DL,
const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 3999b54de81b657..259da48a3b22321 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1207,6 +1207,9 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
default:
break;
}
+ } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
+ if (StringRef("{@cc}").compare(Constraint) == 0)
+ return C_Other;
}
return TargetLowering::getConstraintType(Constraint);
}
@@ -1389,6 +1392,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
SystemZMC::VR128Regs, 32);
}
+ if (Constraint[1] == '@') {
+ if (StringRef("{@cc}").compare(Constraint) == 0)
+ return std::make_pair(0u, &SystemZ::GR32BitRegClass);
+ }
}
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
@@ -1421,6 +1428,35 @@ Register SystemZTargetLowering::getExceptionSelectorRegister(
return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
}
+// Lower @cc targets via setcc.
+SDValue SystemZTargetLowering::LowerAsmOutputForConstraint(
+ SDValue &Chain, SDValue &Glue, const SDLoc &DL,
+ const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
+ if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
+ return SDValue();
+
+ // Check that return type is valid.
+ if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
+ OpInfo.ConstraintVT.getSizeInBits() < 8)
+ report_fatal_error("Glue output operand is of invalid type");
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.addLiveIn(SystemZ::CC);
+
+ if (Glue.getNode()) {
+ Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
+ Chain = Glue.getValue(1);
+ } else
+ Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
+
+ SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+ SDValue CC = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+ DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
+
+ return CC;
+}
+
void SystemZTargetLowering::LowerAsmOperandForConstraint(
SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
@@ -2485,6 +2521,21 @@ static unsigned CCMaskForCondCode(ISD::CondCode CC) {
#undef CONV
}
+static unsigned CCMaskForSystemZCCVal(unsigned CC) {
+ switch (CC) {
+ default:
+ llvm_unreachable("invalid integer condition!");
+ case 0:
+ return SystemZ::CCMASK_CMP_EQ;
+ case 1:
+ return SystemZ::CCMASK_CMP_LT;
+ case 2:
+ return SystemZ::CCMASK_CMP_GT;
+ case 3:
+ return SystemZ::CCMASK_CMP_UO;
+ }
+}
+
// If C can be converted to a comparison against zero, ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/125970
More information about the llvm-commits
mailing list