[llvm] 41eba6c - [AArch64][GISel] Legalize non-power-of-two G_CTTZ

Archibald Elliott via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 13 05:38:46 PDT 2023


Author: Archibald Elliott
Date: 2023-04-13T13:38:40+01:00
New Revision: 41eba6cddec05fa41720ccf1eb5dd7a906050af0

URL: https://github.com/llvm/llvm-project/commit/41eba6cddec05fa41720ccf1eb5dd7a906050af0
DIFF: https://github.com/llvm/llvm-project/commit/41eba6cddec05fa41720ccf1eb5dd7a906050af0.diff

LOG: [AArch64][GISel] Legalize non-power-of-two G_CTTZ

The main change here is to add a `widenScalarToNextPow2` before the
`clampScalar` so that non-power-of-two sizes between 32 and 64 are
turned into s64 count trailing zeroes.

However, if you make the legalisation rules depend on TypeIdx 0 (the
output), then you still get crashes for the s65 testcase, which I solved
by instead flipping the rules around to be about TypeIdx 1 (the input),
with a `scalarSameSizeAs` at the end to tie index 0 to index 1. This,
incidentally, is how things are written for `G_CTLZ`.

Differential Revision: https://reviews.llvm.org/D147602

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz-zero-undef.mir
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 51499d7ef1f83..f4aaa5886d6dd 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -699,8 +699,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   getActionDefinitionsBuilder(G_CTTZ)
       .lowerIf(isVector(0))
-      .clampScalar(0, s32, s64)
-      .scalarSameSizeAs(1, 0)
+      .widenScalarToNextPow2(1, /*Min=*/32)
+      .clampScalar(1, s32, s64)
+      .scalarSameSizeAs(0, 1)
       .legalIf([=](const LegalityQuery &Query) {
         return (HasCSSC && typeInSet(0, {s32, s64})(Query));
       })

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz-zero-undef.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz-zero-undef.mir
index 045d05e739c12..ef6af8fe22dc4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz-zero-undef.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz-zero-undef.mir
@@ -16,8 +16,7 @@ body:             |
     ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
     ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]]
     ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32)
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32)
-    ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32)
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     ; CHECK-CSSC-LABEL: name: s8
     ; CHECK-CSSC: liveins: $w0
@@ -26,8 +25,7 @@ body:             |
     ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
     ; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
     ; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32)
-    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
-    ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-CSSC-NEXT: $w0 = COPY [[CTTZ]](s32)
     ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
     %val:_(s8) = G_IMPLICIT_DEF
     %cttz:_(s8) = G_CTTZ_ZERO_UNDEF %val(s8)
@@ -50,8 +48,7 @@ body:             |
     ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
     ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]]
     ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32)
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32)
-    ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32)
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     ; CHECK-CSSC-LABEL: name: s16
     ; CHECK-CSSC: liveins: $w0
@@ -60,8 +57,7 @@ body:             |
     ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536
     ; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
     ; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32)
-    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
-    ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-CSSC-NEXT: $w0 = COPY [[CTTZ]](s32)
     ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
     %val:_(s16) = G_IMPLICIT_DEF
     %cttz:_(s16) = G_CTTZ_ZERO_UNDEF %val(s16)

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir
index 6618ced662fda..17b80e03df7e2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir
@@ -17,8 +17,7 @@ body:             |
     ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
     ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]]
     ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32)
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32)
-    ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32)
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     ; CHECK-CSSC-LABEL: name: s8
     ; CHECK-CSSC: liveins: $w0
@@ -27,8 +26,7 @@ body:             |
     ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
     ; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
     ; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32)
-    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
-    ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-CSSC-NEXT: $w0 = COPY [[CTTZ]](s32)
     ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
     %val:_(s8) = G_IMPLICIT_DEF
     %cttz:_(s8) = G_CTTZ %val(s8)
@@ -51,8 +49,7 @@ body:             |
     ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
     ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]]
     ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32)
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32)
-    ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32)
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     ; CHECK-CSSC-LABEL: name: s16
     ; CHECK-CSSC: liveins: $w0
@@ -61,8 +58,7 @@ body:             |
     ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536
     ; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
     ; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32)
-    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
-    ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-CSSC-NEXT: $w0 = COPY [[CTTZ]](s32)
     ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
     %val:_(s16) = G_IMPLICIT_DEF
     %cttz:_(s16) = G_CTTZ %val(s16)
@@ -166,3 +162,104 @@ body:             |
     RET_ReallyLR implicit $q0
 
 ...
+---
+name:            s35
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: s35
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[C]]
+    ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s64) = G_BITREVERSE [[OR]]
+    ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE]](s64)
+    ; CHECK-NEXT: $x0 = COPY [[CTLZ]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    ; CHECK-CSSC-LABEL: name: s35
+    ; CHECK-CSSC: liveins: $x0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368
+    ; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[C]]
+    ; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s64) = G_CTTZ [[OR]](s64)
+    ; CHECK-CSSC-NEXT: $x0 = COPY [[CTTZ]](s64)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $x0
+    %1:_(s64) = COPY $x0
+    %0:_(s35) = G_TRUNC %1(s64)
+    %2:_(s35) = G_CTTZ %0(s35)
+    %3:_(s64) = G_ANYEXT %2(s35)
+    $x0 = COPY %3(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            s65
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: s65
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[C]]
+    ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[COPY1]], [[C1]]
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]]
+    ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s64) = G_BITREVERSE [[OR1]]
+    ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE]](s64)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+    ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[CTLZ]], [[C2]]
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C3]]
+    ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[AND]]
+    ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:_(s64) = G_BITREVERSE [[OR]]
+    ; CHECK-NEXT: [[CTLZ1:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE1]](s64)
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]]
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[CTLZ1]]
+    ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDE]], [[C]]
+    ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64)
+    ; CHECK-NEXT: $x1 = COPY [[SELECT1]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    ; CHECK-CSSC-LABEL: name: s65
+    ; CHECK-CSSC: liveins: $x0, $x1
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-CSSC-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-CSSC-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[C]]
+    ; CHECK-CSSC-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[COPY1]], [[C1]]
+    ; CHECK-CSSC-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]]
+    ; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s64) = G_CTTZ [[OR1]](s64)
+    ; CHECK-CSSC-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+    ; CHECK-CSSC-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[CTTZ]], [[C2]]
+    ; CHECK-CSSC-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-CSSC-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C3]]
+    ; CHECK-CSSC-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[AND]]
+    ; CHECK-CSSC-NEXT: [[CTTZ1:%[0-9]+]]:_(s64) = G_CTTZ [[OR]](s64)
+    ; CHECK-CSSC-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]]
+    ; CHECK-CSSC-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[CTTZ1]]
+    ; CHECK-CSSC-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDE]], [[C]]
+    ; CHECK-CSSC-NEXT: $x0 = COPY [[SELECT]](s64)
+    ; CHECK-CSSC-NEXT: $x1 = COPY [[SELECT1]](s64)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    %1:_(s64) = COPY $x0
+    %2:_(s64) = COPY $x1
+    %3:_(s128) = G_MERGE_VALUES %1(s64), %2(s64)
+    %0:_(s65) = G_TRUNC %3(s128)
+    %4:_(s65) = G_CTTZ %0(s65)
+    %7:_(s128) = G_ANYEXT %4(s65)
+    %5:_(s64), %6:_(s64) = G_UNMERGE_VALUES %7(s128)
+    $x0 = COPY %5(s64)
+    $x1 = COPY %6(s64)
+    RET_ReallyLR implicit $x0, implicit $x1
+...


        


More information about the llvm-commits mailing list