[clang] [llvm] Add support for flag output operand "=@cc" for SystemZ. (PR #125970)

Wed Feb 5 16:23:33 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: None (anoopkg6)

<details>
<summary>Changes</summary>

Add support for flag output operand "=@cc" for SystemZ and optimizing conditional branch for 14 possible combinations of CC mask.

---

Patch is 616.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125970.diff


21 Files Affected:

- (modified) clang/lib/Basic/Targets/SystemZ.cpp (+11) 
- (modified) clang/lib/Basic/Targets/SystemZ.h (+5) 
- (modified) clang/lib/CodeGen/CGStmt.cpp (+8-2) 
- (added) clang/test/CodeGen/inline-asm-systemz-flag-output.c (+149) 
- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+3) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+61-9) 
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+4) 
- (modified) llvm/lib/Target/SystemZ/SystemZISelLowering.cpp (+598-2) 
- (modified) llvm/lib/Target/SystemZ/SystemZISelLowering.h (+14) 
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccand.ll (+500) 
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccand_eq_noteq.ll (+939) 
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccand_not.ll (+779) 
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccmixed.ll (+2427) 
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccmixed_eq_noteq.ll (+5248) 
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccmixed_not.ll (+2543) 
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccor.ll (+1047) 
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccor_eq_noteq.ll (+854) 
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccor_not.ll (+806) 
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccxor.ll (+784) 
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccxor_eq_noteq.ll (+1083) 
- (added) llvm/test/CodeGen/SystemZ/flag_output_operand_ccxor_not.ll (+778) 


``````````diff

diff --git a/clang/lib/Basic/Targets/SystemZ.cpp b/clang/lib/Basic/Targets/SystemZ.cpp
index 06f08db2eadd475..49f88b45220d0c4 100644
--- a/clang/lib/Basic/Targets/SystemZ.cpp
+++ b/clang/lib/Basic/Targets/SystemZ.cpp
@@ -90,6 +90,14 @@ bool SystemZTargetInfo::validateAsmConstraint(
   case 'T': // Likewise, plus an index
     Info.setAllowsMemory();
     return true;
+  case '@':
+    // CC condition changes.
+    if (strlen(Name) >= 3 && *(Name + 1) == 'c' && *(Name + 2) == 'c') {
+      Name += 2;
+      Info.setAllowsRegister();
+      return true;
+    }
+    return false;
   }
 }
 
@@ -150,6 +158,9 @@ unsigned SystemZTargetInfo::getMinGlobalAlign(uint64_t Size,
 
 void SystemZTargetInfo::getTargetDefines(const LangOptions &Opts,
                                          MacroBuilder &Builder) const {
+  // Inline assembly supports SystemZ flag outputs.
+  Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__");
+
   Builder.defineMacro("__s390__");
   Builder.defineMacro("__s390x__");
   Builder.defineMacro("__zarch__");
diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index ef9a07033a6e4ff..a6909ababdec001 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -118,6 +118,11 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
                              TargetInfo::ConstraintInfo &info) const override;
 
   std::string convertConstraint(const char *&Constraint) const override {
+    if (strncmp(Constraint, "@cc", 3) == 0) {
+      std::string Converted = "{" + std::string(Constraint, 3) + "}";
+      Constraint += 3;
+      return Converted;
+    }
     switch (Constraint[0]) {
     case 'p': // Keep 'p' constraint.
       return std::string("p");
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 41dc91c578c800a..27f7bb652895839 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -2563,9 +2563,15 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
     if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) {
       // Target must guarantee the Value `Tmp` here is lowered to a boolean
       // value.
-      llvm::Constant *Two = llvm::ConstantInt::get(Tmp->getType(), 2);
+      unsigned CCUpperBound = 2;
+      if (CGF.getTarget().getTriple().getArch() == llvm::Triple::systemz) {
+        // On this target CC value can be in range [0, 3].
+        CCUpperBound = 4;
+      }
+      llvm::Constant *CCUpperBoundConst =
+          llvm::ConstantInt::get(Tmp->getType(), CCUpperBound);
       llvm::Value *IsBooleanValue =
-          Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, Two);
+          Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, CCUpperBoundConst);
       llvm::Function *FnAssume = CGM.getIntrinsic(llvm::Intrinsic::assume);
       Builder.CreateCall(FnAssume, IsBooleanValue);
     }
diff --git a/clang/test/CodeGen/inline-asm-systemz-flag-output.c b/clang/test/CodeGen/inline-asm-systemz-flag-output.c
new file mode 100644
index 000000000000000..ab90e031df1f2b8
--- /dev/null
+++ b/clang/test/CodeGen/inline-asm-systemz-flag-output.c
@@ -0,0 +1,149 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple s390x-linux -emit-llvm -o - %s | FileCheck %s
+// CHECK-LABEL: define dso_local signext i32 @foo_012(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2:[0-9]+]], !srcloc [[META2:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 1
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 2
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_012(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 0 || cc == 1 || cc == 2 ? 42 : 0;
+}
+
+// CHECK-LABEL: define dso_local signext i32 @foo_013(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2]], !srcloc [[META3:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 1
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 3
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_013(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 0 || cc == 1 || cc == 3 ? 42 : 0;
+}
+
+// CHECK-LABEL: define dso_local signext i32 @foo_023(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2]], !srcloc [[META4:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 2
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 3
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_023(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 0 || cc == 2 || cc == 3 ? 42 : 0;
+}
+
+// CHECK-LABEL: define dso_local signext i32 @foo_123(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CC:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i32 } asm sideeffect "ahi $0,42\0A", "=d,={@cc},0"(i32 [[TMP0]]) #[[ATTR2]], !srcloc [[META5:![0-9]+]]
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ASMRESULT]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT:    call void @llvm.assume(i1 [[TMP2]])
+// CHECK-NEXT:    store i32 [[ASMRESULT1]], ptr [[CC]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], 1
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LOR_END:.*]], label %[[LOR_LHS_FALSE:.*]]
+// CHECK:       [[LOR_LHS_FALSE]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP4]], 2
+// CHECK-NEXT:    br i1 [[CMP2]], label %[[LOR_END]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[CC]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP5]], 3
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ true, %[[LOR_LHS_FALSE]] ], [ true, %[[ENTRY]] ], [ [[CMP3]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP6]], i32 42, i32 0
+// CHECK-NEXT:    ret i32 [[COND]]
+//
+int foo_123(int x) {
+  int cc;
+  asm volatile ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+  return cc == 1 || cc == 2 || cc == 3 ? 42 : 0;
+}
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index e0b638201a04740..cb136fe2f446b43 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5071,6 +5071,9 @@ class TargetLowering : public TargetLoweringBase {
                                             std::vector<SDValue> &Ops,
                                             SelectionDAG &DAG) const;
 
+  // Lower switch statement for flag output operand with SRL/IPM Sequence.
+  virtual bool canLowerSRL_IPM_Switch(SDValue Cond) const;
+
   // Lower custom output constraints. If invalid, return SDValue().
   virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue,
                                               const SDLoc &DL,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3b046aa25f54440..a32787bc882f175 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2831,8 +2831,37 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
       Opcode = Instruction::And;
     else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
       Opcode = Instruction::Or;
-
-    if (Opcode &&
+    auto &TLI = DAG.getTargetLoweringInfo();
+    bool BrSrlIPM = FuncInfo.MF->getTarget().getTargetTriple().getArch() ==
+                    Triple::ArchType::systemz;
+    // For Flag output operands SRL/IPM sequence, we want to avoid
+    // creating switch case, as it creates Basic Block and inhibits
+    // optimization in DAGCombiner for flag output operands.
+    const auto checkSRLIPM = [&TLI](const SDValue &Op) {
+      if (!Op.getNumOperands())
+        return false;
+      SDValue OpVal = Op.getOperand(0);
+      SDNode *N = OpVal.getNode();
+      if (N && N->getOpcode() == ISD::SRL)
+        return TLI.canLowerSRL_IPM_Switch(OpVal);
+      else if (N && OpVal.getNumOperands() &&
+               (N->getOpcode() == ISD::AND || N->getOpcode() == ISD::OR)) {
+        SDValue OpVal1 = OpVal.getOperand(0);
+        SDNode *N1 = OpVal1.getNode();
+        if (N1 && N1->getOpcode() == ISD::SRL)
+          return TLI.canLowerSRL_IPM_Switch(OpVal1);
+      }
+      return false;
+    };
+    if (BrSrlIPM) {
+      if (NodeMap.count(BOp0) && NodeMap[BOp0].getNode()) {
+        BrSrlIPM &= checkSRLIPM(getValue(BOp0));
+        if (NodeMap.count(BOp1) && NodeMap[BOp1].getNode())
+          BrSrlIPM &= checkSRLIPM(getValue(BOp1));
+      } else
+        BrSrlIPM = false;
+    }
+    if (Opcode && !BrSrlIPM &&
         !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
           match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) &&
         !shouldKeepJumpConditionsTogether(
@@ -12043,18 +12072,41 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
       const APInt &SmallValue = Small.Low->getValue();
       const APInt &BigValue = Big.Low->getValue();
 
+      // Creating switch cases optimizing tranformation inhibits DAGCombiner
+      // for SystemZ for flag output operands. DAGCobiner compute cumulative
+      // CCMask for flag output operands SRL/IPM sequence, we want to avoid
+      // creating switch case, as it creates Basic Block and inhibits
+      // optimization in DAGCombiner for flag output operands.
+      // cases like (CC == 0) || (CC == 2) || (CC == 3), or
+      // (CC == 0) || (CC == 1) ^ (CC == 3), there could potentially be
+      // more cases like this.
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      bool IsSrlIPM = false;
+      if (NodeMap.count(Cond) && NodeMap[Cond].getNode())
+        IsSrlIPM = CurMF->getTarget().getTargetTriple().getArch() ==
+                       Triple::ArchType::systemz &&
+                   TLI.canLowerSRL_IPM_Switch(getValue(Cond));
       // Check that there is only one bit different.
       APInt CommonBit = BigValue ^ SmallValue;
-      if (CommonBit.isPowerOf2()) {
+      if (CommonBit.isPowerOf2() || IsSrlIPM) {
         SDValue CondLHS = getValue(Cond);
         EVT VT = CondLHS.getValueType();
         SDLoc DL = getCurSDLoc();
-
-        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
-                                 DAG.getConstant(CommonBit, DL, VT));
-        SDValue Cond = DAG.getSetCC(
-            DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
-            ISD::SETEQ);
+        SDValue Cond;
+
+        if (CommonBit.isPowerOf2()) {
+          SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+                                   DAG.getConstant(CommonBit, DL, VT));
+          Cond = DAG.getSetCC(DL, MVT::i1, Or,
+                              DAG.getConstant(BigValue | SmallValue, DL, VT),
+                              ISD::SETEQ);
+        } else if (IsSrlIPM && BigValue == 3 && SmallValue == 0) {
+          SDValue SetCC =
+              DAG.getSetCC(DL, MVT::i32, CondLHS,
+                           DAG.getConstant(SmallValue, DL, VT), ISD::SETEQ);
+          Cond = DAG.getSetCC(DL, MVT::i32, SetCC,
+                              DAG.getConstant(BigValue, DL, VT), ISD::SETEQ);
+        }
 
         // Update successor info.
         // Both Small and Big will jump to Small.BB, so we sum up the
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8287565336b54d1..3d48adac509cb9e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5563,6 +5563,10 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
   return nullptr;
 }
 
+bool TargetLowering::canLowerSRL_IPM_Switch(SDValue Cond) const {
+  return false;
+}
+
 SDValue TargetLowering::LowerAsmOutputForConstraint(
     SDValue &Chain, SDValue &Glue, const SDLoc &DL,
     const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 3999b54de81b657..259da48a3b22321 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1207,6 +1207,9 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
     default:
       break;
     }
+  } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
+    if (StringRef("{@cc}").compare(Constraint) == 0)
+      return C_Other;
   }
   return TargetLowering::getConstraintType(Constraint);
 }
@@ -1389,6 +1392,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
       return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
                                  SystemZMC::VR128Regs, 32);
     }
+    if (Constraint[1] == '@') {
+      if (StringRef("{@cc}").compare(Constraint) == 0)
+        return std::make_pair(0u, &SystemZ::GR32BitRegClass);
+    }
   }
   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
@@ -1421,6 +1428,35 @@ Register SystemZTargetLowering::getExceptionSelectorRegister(
   return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
 }
 
+// Lower @cc targets via setcc.
+SDValue SystemZTargetLowering::LowerAsmOutputForConstraint(
+    SDValue &Chain, SDValue &Glue, const SDLoc &DL,
+    const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
+  if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
+    return SDValue();
+
+  // Check that return type is valid.
+  if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
+      OpInfo.ConstraintVT.getSizeInBits() < 8)
+    report_fatal_error("Glue output operand is of invalid type");
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MRI.addLiveIn(SystemZ::CC);
+
+  if (Glue.getNode()) {
+    Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
+    Chain = Glue.getValue(1);
+  } else
+    Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
+
+  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+  SDValue CC = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+                           DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
+
+  return CC;
+}
+
 void SystemZTargetLowering::LowerAsmOperandForConstraint(
     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
     SelectionDAG &DAG) const {
@@ -2485,6 +2521,21 @@ static unsigned CCMaskForCondCode(ISD::CondCode CC) {
 #undef CONV
 }
 
+static unsigned CCMaskForSystemZCCVal(unsigned CC) {
+  switch (CC) {
+  default:
+    llvm_unreachable("invalid integer condition!");
+  case 0:
+    return SystemZ::CCMASK_CMP_EQ;
+  case 1:
+    return SystemZ::CCMASK_CMP_LT;
+  case 2:
+    return SystemZ::CCMASK_CMP_GT;
+  case 3:
+    return SystemZ::CCMASK_CMP_UO;
+  }
+}
+
 // If C can be converted to a comparison against zero, ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/125970