[llvm] [AArch64][GlobalISel] Take abs scalar codegen closer to SDAG (PR #84886)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 12 01:21:54 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-globalisel

Author: Madhur Amilkanthwar (madhur13490)

<details>
<summary>Changes</summary>

This patch improves codegen for scalar (<128bits) version
of llvm.abs intrinsic by using the existing non-XOR based lowering.
This takes the generated code closer to SDAG.

codegen with GISel for > 128 bit types is not very good
with these method so not doing so.


---
Full diff: https://github.com/llvm/llvm-project/pull/84886.diff


3 Files Affected:

- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+8) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-abs.mir (+18-10) 
- (modified) llvm/test/CodeGen/AArch64/abs.ll (+15-15) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 36adada2796531..6aa43f8b14a782 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1021,6 +1021,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .clampNumElements(0, v2s32, v4s32)
       .clampNumElements(0, v2s64, v2s64)
       .moreElementsToNextPow2(0)
+      // Do SMax based lowering for < 128 bits.
+      .customIf(
+          [=](const LegalityQuery &Q) {
+            LLT SrcTy = Q.Types[0];
+            return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
+          })
       .lower();
 
   // For fadd reductions we have pairwise operations available. We treat the
@@ -1262,6 +1268,8 @@ bool AArch64LegalizerInfo::legalizeCustom(
     return legalizeDynStackAlloc(MI, Helper);
   case TargetOpcode::G_PREFETCH:
     return legalizePrefetch(MI, Helper);
+  case TargetOpcode::G_ABS:
+    return Helper.lowerAbsToMaxNeg(MI);
   }
 
   llvm_unreachable("expected switch to return");
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-abs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-abs.mir
index 3123e304116fe5..cd27d4498e0ef9 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-abs.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-abs.mir
@@ -8,11 +8,12 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: abs_s32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31
-    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64)
-    ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
-    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
-    ; CHECK-NEXT: $w0 = COPY [[XOR]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY]]
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[SUB]]
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[SUB]]
+    ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32)
+    ;
     ; CHECK-CSSC-LABEL: name: abs_s32
     ; CHECK-CSSC: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
     ; CHECK-CSSC-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[COPY]]
@@ -28,11 +29,12 @@ body:             |
   bb.0:
     ; CHECK-LABEL: name: abs_s64
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
-    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64)
-    ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[ASHR]]
-    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ADD]], [[ASHR]]
-    ; CHECK-NEXT: $x0 = COPY [[XOR]](s64)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[C]], [[COPY]]
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[SUB]]
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[SUB]]
+    ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64)
+    ;
     ; CHECK-CSSC-LABEL: name: abs_s64
     ; CHECK-CSSC: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
     ; CHECK-CSSC-NEXT: [[ABS:%[0-9]+]]:_(s64) = G_ABS [[COPY]]
@@ -55,6 +57,7 @@ body:             |
     ; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<4 x s16>) = G_ABS [[COPY]]
     ; CHECK-NEXT: $d0 = COPY [[ABS]](<4 x s16>)
     ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    ;
     ; CHECK-CSSC-LABEL: name: abs_v4s16
     ; CHECK-CSSC: liveins: $d0
     ; CHECK-CSSC-NEXT: {{  $}}
@@ -82,6 +85,7 @@ body:             |
     ; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<8 x s16>) = G_ABS [[COPY]]
     ; CHECK-NEXT: $q0 = COPY [[ABS]](<8 x s16>)
     ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    ;
     ; CHECK-CSSC-LABEL: name: abs_v8s16
     ; CHECK-CSSC: liveins: $q0
     ; CHECK-CSSC-NEXT: {{  $}}
@@ -109,6 +113,7 @@ body:             |
     ; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<2 x s32>) = G_ABS [[COPY]]
     ; CHECK-NEXT: $d0 = COPY [[ABS]](<2 x s32>)
     ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    ;
     ; CHECK-CSSC-LABEL: name: abs_v2s32
     ; CHECK-CSSC: liveins: $d0
     ; CHECK-CSSC-NEXT: {{  $}}
@@ -136,6 +141,7 @@ body:             |
     ; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<4 x s32>) = G_ABS [[COPY]]
     ; CHECK-NEXT: $q0 = COPY [[ABS]](<4 x s32>)
     ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    ;
     ; CHECK-CSSC-LABEL: name: abs_v4s32
     ; CHECK-CSSC: liveins: $q0
     ; CHECK-CSSC-NEXT: {{  $}}
@@ -163,6 +169,7 @@ body:             |
     ; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<8 x s8>) = G_ABS [[COPY]]
     ; CHECK-NEXT: $d0 = COPY [[ABS]](<8 x s8>)
     ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    ;
     ; CHECK-CSSC-LABEL: name: abs_v4s8
     ; CHECK-CSSC: liveins: $d0
     ; CHECK-CSSC-NEXT: {{  $}}
@@ -190,6 +197,7 @@ body:             |
     ; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<16 x s8>) = G_ABS [[COPY]]
     ; CHECK-NEXT: $q0 = COPY [[ABS]](<16 x s8>)
     ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    ;
     ; CHECK-CSSC-LABEL: name: abs_v16s8
     ; CHECK-CSSC: liveins: $q0
     ; CHECK-CSSC-NEXT: {{  $}}
diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
index e00f70b94e3b42..bfedd2bce8c86c 100644
--- a/llvm/test/CodeGen/AArch64/abs.ll
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -15,9 +15,9 @@ define i8 @abs_i8(i8 %a){
 ; CHECK-GI-LABEL: abs_i8:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    sxtb w8, w0
-; CHECK-GI-NEXT:    asr w8, w8, #7
-; CHECK-GI-NEXT:    add w9, w0, w8
-; CHECK-GI-NEXT:    eor w0, w9, w8
+; CHECK-GI-NEXT:    neg w9, w0
+; CHECK-GI-NEXT:    cmp w8, w9, sxtb
+; CHECK-GI-NEXT:    cneg w0, w0, le
 ; CHECK-GI-NEXT:    ret
 entry:
   %res = call i8 @llvm.abs.i8(i8 %a, i1 0)
@@ -36,9 +36,9 @@ define i16 @abs_i16(i16 %a){
 ; CHECK-GI-LABEL: abs_i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    sxth w8, w0
-; CHECK-GI-NEXT:    asr w8, w8, #15
-; CHECK-GI-NEXT:    add w9, w0, w8
-; CHECK-GI-NEXT:    eor w0, w9, w8
+; CHECK-GI-NEXT:    neg w9, w0
+; CHECK-GI-NEXT:    cmp w8, w9, sxth
+; CHECK-GI-NEXT:    cneg w0, w0, le
 ; CHECK-GI-NEXT:    ret
 entry:
   %res = call i16 @llvm.abs.i16(i16 %a, i1 0)
@@ -55,9 +55,9 @@ define i32 @abs_i32(i32 %a){
 ;
 ; CHECK-GI-LABEL: abs_i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    asr w8, w0, #31
-; CHECK-GI-NEXT:    add w9, w0, w8
-; CHECK-GI-NEXT:    eor w0, w9, w8
+; CHECK-GI-NEXT:    neg w8, w0
+; CHECK-GI-NEXT:    cmp w0, w8
+; CHECK-GI-NEXT:    cneg w0, w0, le
 ; CHECK-GI-NEXT:    ret
 entry:
   %res = call i32 @llvm.abs.i32(i32 %a, i1 0)
@@ -74,9 +74,9 @@ define i64 @abs_i64(i64 %a){
 ;
 ; CHECK-GI-LABEL: abs_i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    asr x8, x0, #63
-; CHECK-GI-NEXT:    add x9, x0, x8
-; CHECK-GI-NEXT:    eor x0, x9, x8
+; CHECK-GI-NEXT:    neg x8, x0
+; CHECK-GI-NEXT:    cmp x0, x8
+; CHECK-GI-NEXT:    cneg x0, x0, le
 ; CHECK-GI-NEXT:    ret
 entry:
   %res = call i64 @llvm.abs.i64(i64 %a, i1 0)
@@ -248,9 +248,9 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
 ; CHECK-GI-LABEL: abs_v1i32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    asr w9, w8, #31
-; CHECK-GI-NEXT:    add w8, w8, w9
-; CHECK-GI-NEXT:    eor w8, w8, w9
+; CHECK-GI-NEXT:    neg w9, w8
+; CHECK-GI-NEXT:    cmp w8, w9
+; CHECK-GI-NEXT:    cneg w8, w8, le
 ; CHECK-GI-NEXT:    fmov s0, w8
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret

``````````

</details>


https://github.com/llvm/llvm-project/pull/84886


More information about the llvm-commits mailing list