[llvm] 9879e58 - [InlineAsm][AArch64]Add backend support for flag output parameters

Wed Apr 26 09:44:49 PDT 2023

Author: Mingming Liu
Date: 2023-04-26T09:18:41-07:00
New Revision: 9879e5865a8c5429ceaa180f433f1e3140d105ed

URL: https://github.com/llvm/llvm-project/commit/9879e5865a8c5429ceaa180f433f1e3140d105ed
DIFF: https://github.com/llvm/llvm-project/commit/9879e5865a8c5429ceaa180f433f1e3140d105ed.diff

LOG: [InlineAsm][AArch64]Add backend support for flag output parameters

- The set of flag is from https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Flag-Output-Operands

Before:
- ARM64 GCC supports flag output constraints, while Clang doesn't parse condition code, as shown in https://gcc.godbolt.org/z/7jzMEK796
- LLVM ISel won't lower them either (as shown in https://gcc.godbolt.org/z/Pv4PPf56c)

After:
- Given flag output constraints in LLVM IR, condition code is parsed and flag output is lowered to 'cset'.
- Clang parse is not added in this patch.

Differential Revision: https://reviews.llvm.org/D149032

Added: 
    llvm/test/CodeGen/AArch64/inline-asm-flag-output.ll

Modified: 
    llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.h
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index e0357c50e555a..3925611f14858 100644

--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -391,10 +391,12 @@ bool InlineAsmLowering::lowerInlineAsm(
         Inst.addReg(SourceRegs[0]);
       } else {
         // Otherwise, this outputs to a register (directly for C_Register /
-        // C_RegisterClass. Find a register that we can use.
+        // C_RegisterClass/C_Other.
         assert(OpInfo.ConstraintType == TargetLowering::C_Register ||
-               OpInfo.ConstraintType == TargetLowering::C_RegisterClass);
+               OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+               OpInfo.ConstraintType == TargetLowering::C_Other);
 
+        // Find a register that we can use.
         if (OpInfo.Regs.empty()) {
           LLVM_DEBUG(dbgs()
                      << "Couldn't allocate output register for constraint\n");

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cb54ee082e341..a5c19eb1bf126 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9864,6 +9864,72 @@ static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
   return P;
 }
 
+// The set of cc code supported is from
+// https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Flag-Output-Operands
+static AArch64CC::CondCode parseConstraintCode(llvm::StringRef Constraint) {
+  AArch64CC::CondCode Cond = StringSwitch<AArch64CC::CondCode>(Constraint)
+                                 .Case("{@cchi}", AArch64CC::HI)
+                                 .Case("{@cccs}", AArch64CC::HS)
+                                 .Case("{@cclo}", AArch64CC::LO)
+                                 .Case("{@ccls}", AArch64CC::LS)
+                                 .Case("{@cccc}", AArch64CC::LO)
+                                 .Case("{@cceq}", AArch64CC::EQ)
+                                 .Case("{@ccgt}", AArch64CC::GT)
+                                 .Case("{@ccge}", AArch64CC::GE)
+                                 .Case("{@cclt}", AArch64CC::LT)
+                                 .Case("{@ccle}", AArch64CC::LE)
+                                 .Case("{@cchs}", AArch64CC::HS)
+                                 .Case("{@ccne}", AArch64CC::NE)
+                                 .Case("{@ccvc}", AArch64CC::VC)
+                                 .Case("{@ccpl}", AArch64CC::PL)
+                                 .Case("{@ccvs}", AArch64CC::VS)
+                                 .Case("{@ccmi}", AArch64CC::MI)
+                                 .Default(AArch64CC::Invalid);
+  return Cond;
+}
+
+/// Helper function to create 'CSET', which is equivalent to 'CSINC <Wd>, WZR,
+/// WZR, invert(<cond>)'.
+static SDValue getSETCC(AArch64CC::CondCode CC, SDValue NZCV, const SDLoc &DL,
+                        SelectionDAG &DAG) {
+  return DAG.getNode(
+      AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32),
+      DAG.getConstant(0, DL, MVT::i32),
+      DAG.getConstant(getInvertedCondCode(CC), DL, MVT::i32), NZCV);
+}
+
+// Lower @cc flag output via getSETCC.
+SDValue AArch64TargetLowering::LowerAsmOutputForConstraint(
+    SDValue &Chain, SDValue &Glue, const SDLoc &DL,
+    const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
+  AArch64CC::CondCode Cond = parseConstraintCode(OpInfo.ConstraintCode);
+  if (Cond == AArch64CC::Invalid)
+    return SDValue();
+  // The output variable should be a scalar integer.
+  if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
+      OpInfo.ConstraintVT.getSizeInBits() < 8)
+    report_fatal_error("Flag output operand is of invalid type");
+
+  // Get NZCV register. Only update chain when copyfrom is glued.
+  if (Glue.getNode()) {
+    Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32, Glue);
+    Chain = Glue.getValue(1);
+  } else
+    Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32);
+  // Extract CC code.
+  SDValue CC = getSETCC(Cond, Glue, DL, DAG);
+
+  SDValue Result;
+
+  // Truncate or ZERO_EXTEND based on value types.
+  if (OpInfo.ConstraintVT.getSizeInBits() <= 32)
+    Result = DAG.getNode(ISD::TRUNCATE, DL, OpInfo.ConstraintVT, CC);
+  else
+    Result = DAG.getNode(ISD::ZERO_EXTEND, DL, OpInfo.ConstraintVT, CC);
+
+  return Result;
+}
+
 /// getConstraintType - Given a constraint letter, return the type of
 /// constraint it is for this target.
 AArch64TargetLowering::ConstraintType
@@ -9896,6 +9962,8 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
   } else if (parsePredicateConstraint(Constraint) !=
              PredicateConstraint::Invalid)
       return C_RegisterClass;
+  else if (parseConstraintCode(Constraint) != AArch64CC::Invalid)
+    return C_Other;
   return TargetLowering::getConstraintType(Constraint);
 }
 
@@ -9993,7 +10061,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
                         : std::make_pair(0U, &AArch64::PPRRegClass);
     }
   }
-  if (StringRef("{cc}").equals_insensitive(Constraint))
+  if (StringRef("{cc}").equals_insensitive(Constraint) ||
+      parseConstraintCode(Constraint) != AArch64CC::Invalid)
     return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
 
   // Use the default implementation in TargetLowering to convert the register

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 82653d473f152..bd034669426f7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1165,6 +1165,12 @@ class AArch64TargetLowering : public TargetLowering {
     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
   }
 
+  /// Handle Lowering flag assembly outputs.
+  SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
+                                      const SDLoc &DL,
+                                      const AsmOperandInfo &Constraint,
+                                      SelectionDAG &DAG) const override;
+
   bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
   bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
index fdde869224c11..0a2d695acb4e0 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -154,6 +154,20 @@ define i32 @gc_intr() gc "statepoint-example" {
    ret i32 %ret
 }
 
+declare void @llvm.assume(i1)
+
+; FALLBACK-WITH-REPORT-ERR: <unknown>:0:0: unable to translate instruction: call: '  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cchi},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)' (in function: inline_asm_with_output_constraint)
+; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for inline_asm_with_output_constraint
+; FALLBACK-WITH-REPORT-OUT-LABEL: inline_asm_with_output_constraint
+define i32 @inline_asm_with_output_constraint(i64 %a) {
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cchi},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
 attributes #1 = { "target-features"="+sve" }
 attributes #2 = { "target-features"="+ls64" }
 

diff  --git a/llvm/test/CodeGen/AArch64/inline-asm-flag-output.ll b/llvm/test/CodeGen/AArch64/inline-asm-flag-output.ll
new file mode 100644
index 0000000000000..c761758ee51d3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/inline-asm-flag-output.ll
@@ -0,0 +1,259 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+define i32 @test_cchi(i64 %a) {
+; CHECK-LABEL: test_cchi:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, hi
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cchi},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+define i32 @test_cccs(i64 %a) {
+; CHECK-LABEL: test_cccs:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, hs
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cccs},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+define i32 @test_cclo(i64 %a) {
+; CHECK-LABEL: test_cclo:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cclo},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+define i32 @test_ccls(i64 %a) {
+; CHECK-LABEL: test_ccls:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, ls
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccls},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+define i32 @test_cccc(i64 %a) {
+; CHECK-LABEL: test_cccc:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cccc},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+define i32 @test_cceq(i64 %a) {
+; CHECK-LABEL: test_cceq:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cceq},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+define i32 @test_ccgt(i64 %a) {
+; CHECK-LABEL: test_ccgt:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccgt},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+define i32 @test_ccge(i64 %a) {
+; CHECK-LABEL: test_ccge:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, ge
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccge},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+define i32 @test_cclt(i64 %a) {
+; CHECK-LABEL: test_cclt:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, lt
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cclt},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+define i32 @test_ccle(i64 %a) {
+; CHECK-LABEL: test_ccle:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, le
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccle},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+define i32 @test_cchs(i64 %a) {
+; CHECK-LABEL: test_cchs:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, hs
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cchs},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+define i32 @test_ccne(i64 %a) {
+; CHECK-LABEL: test_ccne:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccne},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+define i32 @test_ccvc(i64 %a) {
+; CHECK-LABEL: test_ccvc:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, vc
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccvc},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+define i32 @test_ccpl(i64 %a) {
+; CHECK-LABEL: test_ccpl:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, pl
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccpl},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+define i32 @test_ccvs(i64 %a) {
+; CHECK-LABEL: test_ccvs:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, vs
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccvs},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+define i32 @test_ccmi(i64 %a) {
+; CHECK-LABEL: test_ccmi:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    subs x0, x0, #3
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    cset w0, mi
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccmi},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+  %asmresult1 = extractvalue { i64, i32 } %0, 1
+  %1 = icmp ult i32 %asmresult1, 2
+  tail call void @llvm.assume(i1 %1)
+  ret i32 %asmresult1
+}
+
+declare void @llvm.assume(i1)