[llvm] [AArch64][GlobalISel] Take abs scalar codegen closer to SDAG (PR #84886)
Madhur Amilkanthwar via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 18 07:48:12 PDT 2024
https://github.com/madhur13490 updated https://github.com/llvm/llvm-project/pull/84886
>From 49d59313f9d5a8999ba3e6890ca3799eca08166a Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Tue, 12 Mar 2024 22:49:29 +0530
Subject: [PATCH] [AArch64][GlobalISel] Take abs scalar codegen closer to SDAG
This patch improves codegen for scalar (<128bits) version
of llvm.abs intrinsic by using the existing non-XOR based lowering.
This takes the generated code closer to SDAG.
codegen with GISel for > 128 bit types is not very good
with these method so not doing so.
---
.../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 1 +
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 21 +++++++-
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 8 ++-
.../AArch64/GlobalISel/legalize-abs.mir | 28 ++++++----
llvm/test/CodeGen/AArch64/abs.ll | 51 +++++++------------
5 files changed, 64 insertions(+), 45 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 5bb3692f0a46b4..284f434fbb9b0c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -429,6 +429,7 @@ class LegalizerHelper {
LegalizeResult lowerDIVREM(MachineInstr &MI);
LegalizeResult lowerAbsToAddXor(MachineInstr &MI);
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI);
+ LegalizeResult lowerAbsToCNeg(MachineInstr &MI);
LegalizeResult lowerVectorReduction(MachineInstr &MI);
LegalizeResult lowerMemcpyInline(MachineInstr &MI);
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index abe23af00a7890..7aa867ba71c3b5 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3914,8 +3914,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_SSHLSAT:
case G_USHLSAT:
return lowerShlSat(MI);
- case G_ABS:
+ case G_ABS: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (Ty.isScalar() && LI.isLegalOrCustom({G_ICMP, {Ty, Ty}}))
+ return lowerAbsToCNeg(MI);
return lowerAbsToAddXor(MI);
+ }
case G_SELECT:
return lowerSelect(MI);
case G_IS_FPCLASS:
@@ -8215,9 +8219,22 @@ LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
// %res = G_SMAX %a, %v2
Register SrcReg = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(SrcReg);
+ auto Zero = MIRBuilder.buildConstant(Ty, 0);
+ auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
+ MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerAbsToCNeg(MachineInstr &MI) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DestReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(SrcReg);
auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
- MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
+ auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, Ty, SrcReg, Zero);
+ MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
MI.eraseFromParent();
return Legalized;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 2ae2923dfb353e..649a3b8651bab0 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -515,6 +515,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_ICMP)
.legalFor({{s32, s32},
{s32, s64},
+ {s64, s64},
{s32, p0},
{v4s32, v4s32},
{v2s32, v2s32},
@@ -724,7 +725,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
getActionDefinitionsBuilder(G_SELECT)
- .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
+ .legalFor({{s32, s32}, {s64, s32}, {s64, s64}, {p0, s32}})
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
.clampScalar(1, s32, s32)
@@ -1010,6 +1011,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
ABSActions
.legalFor({s32, s64});
ABSActions.legalFor(PackedVectorAllTypeList)
+ .customIf([=](const LegalityQuery &Q) {
+ LLT SrcTy = Q.Types[0];
+ return SrcTy.isScalar(); })
.widenScalarIf(
[=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
[=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
@@ -1262,6 +1266,8 @@ bool AArch64LegalizerInfo::legalizeCustom(
return legalizeDynStackAlloc(MI, Helper);
case TargetOpcode::G_PREFETCH:
return legalizePrefetch(MI, Helper);
+ case TargetOpcode::G_ABS:
+ return Helper.lowerAbsToCNeg(MI);
}
llvm_unreachable("expected switch to return");
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-abs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-abs.mir
index 3123e304116fe5..305b7e8cf31ea4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-abs.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-abs.mir
@@ -8,11 +8,12 @@ body: |
bb.0:
; CHECK-LABEL: name: abs_s32
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31
- ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64)
- ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
- ; CHECK-NEXT: $w0 = COPY [[XOR]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY]]
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[C]]
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[SUB]]
+ ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32)
+ ;
; CHECK-CSSC-LABEL: name: abs_s32
; CHECK-CSSC: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-CSSC-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[COPY]]
@@ -28,11 +29,12 @@ body: |
bb.0:
; CHECK-LABEL: name: abs_s64
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
- ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64)
- ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[ASHR]]
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ADD]], [[ASHR]]
- ; CHECK-NEXT: $x0 = COPY [[XOR]](s64)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[C]], [[COPY]]
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sgt), [[COPY]](s64), [[C]]
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[COPY]], [[SUB]]
+ ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64)
+ ;
; CHECK-CSSC-LABEL: name: abs_s64
; CHECK-CSSC: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-CSSC-NEXT: [[ABS:%[0-9]+]]:_(s64) = G_ABS [[COPY]]
@@ -55,6 +57,7 @@ body: |
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<4 x s16>) = G_ABS [[COPY]]
; CHECK-NEXT: $d0 = COPY [[ABS]](<4 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
+ ;
; CHECK-CSSC-LABEL: name: abs_v4s16
; CHECK-CSSC: liveins: $d0
; CHECK-CSSC-NEXT: {{ $}}
@@ -82,6 +85,7 @@ body: |
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<8 x s16>) = G_ABS [[COPY]]
; CHECK-NEXT: $q0 = COPY [[ABS]](<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
+ ;
; CHECK-CSSC-LABEL: name: abs_v8s16
; CHECK-CSSC: liveins: $q0
; CHECK-CSSC-NEXT: {{ $}}
@@ -109,6 +113,7 @@ body: |
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<2 x s32>) = G_ABS [[COPY]]
; CHECK-NEXT: $d0 = COPY [[ABS]](<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
+ ;
; CHECK-CSSC-LABEL: name: abs_v2s32
; CHECK-CSSC: liveins: $d0
; CHECK-CSSC-NEXT: {{ $}}
@@ -136,6 +141,7 @@ body: |
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<4 x s32>) = G_ABS [[COPY]]
; CHECK-NEXT: $q0 = COPY [[ABS]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
+ ;
; CHECK-CSSC-LABEL: name: abs_v4s32
; CHECK-CSSC: liveins: $q0
; CHECK-CSSC-NEXT: {{ $}}
@@ -163,6 +169,7 @@ body: |
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<8 x s8>) = G_ABS [[COPY]]
; CHECK-NEXT: $d0 = COPY [[ABS]](<8 x s8>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
+ ;
; CHECK-CSSC-LABEL: name: abs_v4s8
; CHECK-CSSC: liveins: $d0
; CHECK-CSSC-NEXT: {{ $}}
@@ -190,6 +197,7 @@ body: |
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<16 x s8>) = G_ABS [[COPY]]
; CHECK-NEXT: $q0 = COPY [[ABS]](<16 x s8>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
+ ;
; CHECK-CSSC-LABEL: name: abs_v16s8
; CHECK-CSSC: liveins: $q0
; CHECK-CSSC-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
index e00f70b94e3b42..5a837dc59cba09 100644
--- a/llvm/test/CodeGen/AArch64/abs.ll
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -15,9 +15,8 @@ define i8 @abs_i8(i8 %a){
; CHECK-GI-LABEL: abs_i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sxtb w8, w0
-; CHECK-GI-NEXT: asr w8, w8, #7
-; CHECK-GI-NEXT: add w9, w0, w8
-; CHECK-GI-NEXT: eor w0, w9, w8
+; CHECK-GI-NEXT: cmp w8, #0
+; CHECK-GI-NEXT: cneg w0, w0, le
; CHECK-GI-NEXT: ret
entry:
%res = call i8 @llvm.abs.i8(i8 %a, i1 0)
@@ -36,9 +35,8 @@ define i16 @abs_i16(i16 %a){
; CHECK-GI-LABEL: abs_i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sxth w8, w0
-; CHECK-GI-NEXT: asr w8, w8, #15
-; CHECK-GI-NEXT: add w9, w0, w8
-; CHECK-GI-NEXT: eor w0, w9, w8
+; CHECK-GI-NEXT: cmp w8, #0
+; CHECK-GI-NEXT: cneg w0, w0, le
; CHECK-GI-NEXT: ret
entry:
%res = call i16 @llvm.abs.i16(i16 %a, i1 0)
@@ -55,9 +53,8 @@ define i32 @abs_i32(i32 %a){
;
; CHECK-GI-LABEL: abs_i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: asr w8, w0, #31
-; CHECK-GI-NEXT: add w9, w0, w8
-; CHECK-GI-NEXT: eor w0, w9, w8
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: cneg w0, w0, le
; CHECK-GI-NEXT: ret
entry:
%res = call i32 @llvm.abs.i32(i32 %a, i1 0)
@@ -74,9 +71,8 @@ define i64 @abs_i64(i64 %a){
;
; CHECK-GI-LABEL: abs_i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: asr x8, x0, #63
-; CHECK-GI-NEXT: add x9, x0, x8
-; CHECK-GI-NEXT: eor x0, x9, x8
+; CHECK-GI-NEXT: cmp x0, #0
+; CHECK-GI-NEXT: cneg x0, x0, le
; CHECK-GI-NEXT: ret
entry:
%res = call i64 @llvm.abs.i64(i64 %a, i1 0)
@@ -85,23 +81,14 @@ entry:
declare i64 @llvm.abs.i64(i64, i1)
define i128 @abs_i128(i128 %a){
-; CHECK-SD-LABEL: abs_i128:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: asr x8, x1, #63
-; CHECK-SD-NEXT: eor x9, x0, x8
-; CHECK-SD-NEXT: eor x10, x1, x8
-; CHECK-SD-NEXT: subs x0, x9, x8
-; CHECK-SD-NEXT: sbc x1, x10, x8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: abs_i128:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: asr x8, x1, #63
-; CHECK-GI-NEXT: adds x9, x0, x8
-; CHECK-GI-NEXT: adc x10, x1, x8
-; CHECK-GI-NEXT: eor x0, x9, x8
-; CHECK-GI-NEXT: eor x1, x10, x8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: abs_i128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: asr x8, x1, #63
+; CHECK-NEXT: eor x9, x0, x8
+; CHECK-NEXT: eor x10, x1, x8
+; CHECK-NEXT: subs x0, x9, x8
+; CHECK-NEXT: sbc x1, x10, x8
+; CHECK-NEXT: ret
entry:
%res = call i128 @llvm.abs.i128(i128 %a, i1 0)
ret i128 %res
@@ -248,9 +235,9 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
; CHECK-GI-LABEL: abs_v1i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: asr w9, w8, #31
-; CHECK-GI-NEXT: add w8, w8, w9
-; CHECK-GI-NEXT: eor w8, w8, w9
+; CHECK-GI-NEXT: fmov w9, s0
+; CHECK-GI-NEXT: cmp w8, #0
+; CHECK-GI-NEXT: cneg w8, w9, le
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
More information about the llvm-commits
mailing list