[llvm] 9879e58 - [InlineAsm][AArch64]Add backend support for flag output parameters
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 26 09:44:49 PDT 2023
Author: Mingming Liu
Date: 2023-04-26T09:18:41-07:00
New Revision: 9879e5865a8c5429ceaa180f433f1e3140d105ed
URL: https://github.com/llvm/llvm-project/commit/9879e5865a8c5429ceaa180f433f1e3140d105ed
DIFF: https://github.com/llvm/llvm-project/commit/9879e5865a8c5429ceaa180f433f1e3140d105ed.diff
LOG: [InlineAsm][AArch64]Add backend support for flag output parameters
- The set of flag is from https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Flag-Output-Operands
Before:
- ARM64 GCC supports flag output constraints, while Clang doesn't parse condition code, as shown in https://gcc.godbolt.org/z/7jzMEK796
- LLVM ISel won't lower them either (as shown in https://gcc.godbolt.org/z/Pv4PPf56c)
After:
- Given flag output constraints in LLVM IR, condition code is parsed and flag output is lowered to 'cset'.
- Clang parse is not added in this patch.
Differential Revision: https://reviews.llvm.org/D149032
Added:
llvm/test/CodeGen/AArch64/inline-asm-flag-output.ll
Modified:
llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index e0357c50e555a..3925611f14858 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -391,10 +391,12 @@ bool InlineAsmLowering::lowerInlineAsm(
Inst.addReg(SourceRegs[0]);
} else {
// Otherwise, this outputs to a register (directly for C_Register /
- // C_RegisterClass. Find a register that we can use.
+ // C_RegisterClass/C_Other.
assert(OpInfo.ConstraintType == TargetLowering::C_Register ||
- OpInfo.ConstraintType == TargetLowering::C_RegisterClass);
+ OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Other);
+ // Find a register that we can use.
if (OpInfo.Regs.empty()) {
LLVM_DEBUG(dbgs()
<< "Couldn't allocate output register for constraint\n");
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cb54ee082e341..a5c19eb1bf126 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9864,6 +9864,72 @@ static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
return P;
}
+// The set of cc code supported is from
+// https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Flag-Output-Operands
+static AArch64CC::CondCode parseConstraintCode(llvm::StringRef Constraint) {
+ AArch64CC::CondCode Cond = StringSwitch<AArch64CC::CondCode>(Constraint)
+ .Case("{@cchi}", AArch64CC::HI)
+ .Case("{@cccs}", AArch64CC::HS)
+ .Case("{@cclo}", AArch64CC::LO)
+ .Case("{@ccls}", AArch64CC::LS)
+ .Case("{@cccc}", AArch64CC::LO)
+ .Case("{@cceq}", AArch64CC::EQ)
+ .Case("{@ccgt}", AArch64CC::GT)
+ .Case("{@ccge}", AArch64CC::GE)
+ .Case("{@cclt}", AArch64CC::LT)
+ .Case("{@ccle}", AArch64CC::LE)
+ .Case("{@cchs}", AArch64CC::HS)
+ .Case("{@ccne}", AArch64CC::NE)
+ .Case("{@ccvc}", AArch64CC::VC)
+ .Case("{@ccpl}", AArch64CC::PL)
+ .Case("{@ccvs}", AArch64CC::VS)
+ .Case("{@ccmi}", AArch64CC::MI)
+ .Default(AArch64CC::Invalid);
+ return Cond;
+}
+
+/// Helper function to create 'CSET', which is equivalent to 'CSINC <Wd>, WZR,
+/// WZR, invert(<cond>)'.
+static SDValue getSETCC(AArch64CC::CondCode CC, SDValue NZCV, const SDLoc &DL,
+ SelectionDAG &DAG) {
+ return DAG.getNode(
+ AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(getInvertedCondCode(CC), DL, MVT::i32), NZCV);
+}
+
+// Lower @cc flag output via getSETCC.
+SDValue AArch64TargetLowering::LowerAsmOutputForConstraint(
+ SDValue &Chain, SDValue &Glue, const SDLoc &DL,
+ const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
+ AArch64CC::CondCode Cond = parseConstraintCode(OpInfo.ConstraintCode);
+ if (Cond == AArch64CC::Invalid)
+ return SDValue();
+ // The output variable should be a scalar integer.
+ if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
+ OpInfo.ConstraintVT.getSizeInBits() < 8)
+ report_fatal_error("Flag output operand is of invalid type");
+
+ // Get NZCV register. Only update chain when copyfrom is glued.
+ if (Glue.getNode()) {
+ Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32, Glue);
+ Chain = Glue.getValue(1);
+ } else
+ Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32);
+ // Extract CC code.
+ SDValue CC = getSETCC(Cond, Glue, DL, DAG);
+
+ SDValue Result;
+
+ // Truncate or ZERO_EXTEND based on value types.
+ if (OpInfo.ConstraintVT.getSizeInBits() <= 32)
+ Result = DAG.getNode(ISD::TRUNCATE, DL, OpInfo.ConstraintVT, CC);
+ else
+ Result = DAG.getNode(ISD::ZERO_EXTEND, DL, OpInfo.ConstraintVT, CC);
+
+ return Result;
+}
+
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
AArch64TargetLowering::ConstraintType
@@ -9896,6 +9962,8 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
} else if (parsePredicateConstraint(Constraint) !=
PredicateConstraint::Invalid)
return C_RegisterClass;
+ else if (parseConstraintCode(Constraint) != AArch64CC::Invalid)
+ return C_Other;
return TargetLowering::getConstraintType(Constraint);
}
@@ -9993,7 +10061,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
: std::make_pair(0U, &AArch64::PPRRegClass);
}
}
- if (StringRef("{cc}").equals_insensitive(Constraint))
+ if (StringRef("{cc}").equals_insensitive(Constraint) ||
+ parseConstraintCode(Constraint) != AArch64CC::Invalid)
return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
// Use the default implementation in TargetLowering to convert the register
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 82653d473f152..bd034669426f7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1165,6 +1165,12 @@ class AArch64TargetLowering : public TargetLowering {
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
+ /// Handle Lowering flag assembly outputs.
+ SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
+ const SDLoc &DL,
+ const AsmOperandInfo &Constraint,
+ SelectionDAG &DAG) const override;
+
bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
index fdde869224c11..0a2d695acb4e0 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -154,6 +154,20 @@ define i32 @gc_intr() gc "statepoint-example" {
ret i32 %ret
}
+declare void @llvm.assume(i1)
+
+; FALLBACK-WITH-REPORT-ERR: <unknown>:0:0: unable to translate instruction: call: ' %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cchi},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)' (in function: inline_asm_with_output_constraint)
+; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for inline_asm_with_output_constraint
+; FALLBACK-WITH-REPORT-OUT-LABEL: inline_asm_with_output_constraint
+define i32 @inline_asm_with_output_constraint(i64 %a) {
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cchi},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
attributes #1 = { "target-features"="+sve" }
attributes #2 = { "target-features"="+ls64" }
diff --git a/llvm/test/CodeGen/AArch64/inline-asm-flag-output.ll b/llvm/test/CodeGen/AArch64/inline-asm-flag-output.ll
new file mode 100644
index 0000000000000..c761758ee51d3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/inline-asm-flag-output.ll
@@ -0,0 +1,259 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+define i32 @test_cchi(i64 %a) {
+; CHECK-LABEL: test_cchi:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, hi
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cchi},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+define i32 @test_cccs(i64 %a) {
+; CHECK-LABEL: test_cccs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, hs
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cccs},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+define i32 @test_cclo(i64 %a) {
+; CHECK-LABEL: test_cclo:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cclo},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+define i32 @test_ccls(i64 %a) {
+; CHECK-LABEL: test_ccls:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, ls
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccls},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+define i32 @test_cccc(i64 %a) {
+; CHECK-LABEL: test_cccc:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cccc},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+define i32 @test_cceq(i64 %a) {
+; CHECK-LABEL: test_cceq:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cceq},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+define i32 @test_ccgt(i64 %a) {
+; CHECK-LABEL: test_ccgt:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, gt
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccgt},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+define i32 @test_ccge(i64 %a) {
+; CHECK-LABEL: test_ccge:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, ge
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccge},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+define i32 @test_cclt(i64 %a) {
+; CHECK-LABEL: test_cclt:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cclt},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+define i32 @test_ccle(i64 %a) {
+; CHECK-LABEL: test_ccle:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, le
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccle},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+define i32 @test_cchs(i64 %a) {
+; CHECK-LABEL: test_cchs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, hs
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cchs},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+define i32 @test_ccne(i64 %a) {
+; CHECK-LABEL: test_ccne:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccne},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+define i32 @test_ccvc(i64 %a) {
+; CHECK-LABEL: test_ccvc:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, vc
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccvc},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+define i32 @test_ccpl(i64 %a) {
+; CHECK-LABEL: test_ccpl:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, pl
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccpl},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+define i32 @test_ccvs(i64 %a) {
+; CHECK-LABEL: test_ccvs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, vs
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccvs},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+define i32 @test_ccmi(i64 %a) {
+; CHECK-LABEL: test_ccmi:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: //APP
+; CHECK-NEXT: subs x0, x0, #3
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: cset w0, mi
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccmi},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)
+ %asmresult1 = extractvalue { i64, i32 } %0, 1
+ %1 = icmp ult i32 %asmresult1, 2
+ tail call void @llvm.assume(i1 %1)
+ ret i32 %asmresult1
+}
+
+declare void @llvm.assume(i1)
More information about the llvm-commits
mailing list