[llvm] 41eba6c - [AArch64][GISel] Legalize non-power-of-two G_CTTZ
Archibald Elliott via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 13 05:38:46 PDT 2023
Author: Archibald Elliott
Date: 2023-04-13T13:38:40+01:00
New Revision: 41eba6cddec05fa41720ccf1eb5dd7a906050af0
URL: https://github.com/llvm/llvm-project/commit/41eba6cddec05fa41720ccf1eb5dd7a906050af0
DIFF: https://github.com/llvm/llvm-project/commit/41eba6cddec05fa41720ccf1eb5dd7a906050af0.diff
LOG: [AArch64][GISel] Legalize non-power-of-two G_CTTZ
The main change here is to add a `widenScalarToNextPow2` before the
`clampScalar` so that non-power-of-two sizes between 32 and 64 are
turned into s64 count trailing zeroes.
However, if you make the legalisation rules depend on TypeIdx 0 (the
output), then you still get crashes for the s65 testcase, which I solved
by instead flipping the rules around to be about TypeIdx 1 (the input),
with a `scalarSameSizeAs` at the end to tie index 0 to index 1. This,
incidentally, is how things are written for `G_CTLZ`.
Differential Revision: https://reviews.llvm.org/D147602
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz-zero-undef.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 51499d7ef1f83..f4aaa5886d6dd 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -699,8 +699,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_CTTZ)
.lowerIf(isVector(0))
- .clampScalar(0, s32, s64)
- .scalarSameSizeAs(1, 0)
+ .widenScalarToNextPow2(1, /*Min=*/32)
+ .clampScalar(1, s32, s64)
+ .scalarSameSizeAs(0, 1)
.legalIf([=](const LegalityQuery &Query) {
return (HasCSSC && typeInSet(0, {s32, s64})(Query));
})
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz-zero-undef.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz-zero-undef.mir
index 045d05e739c12..ef6af8fe22dc4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz-zero-undef.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz-zero-undef.mir
@@ -16,8 +16,7 @@ body: |
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]]
; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32)
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32)
- ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
; CHECK-CSSC-LABEL: name: s8
; CHECK-CSSC: liveins: $w0
@@ -26,8 +25,7 @@ body: |
; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32)
- ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
- ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-CSSC-NEXT: $w0 = COPY [[CTTZ]](s32)
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
%val:_(s8) = G_IMPLICIT_DEF
%cttz:_(s8) = G_CTTZ_ZERO_UNDEF %val(s8)
@@ -50,8 +48,7 @@ body: |
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]]
; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32)
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32)
- ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
; CHECK-CSSC-LABEL: name: s16
; CHECK-CSSC: liveins: $w0
@@ -60,8 +57,7 @@ body: |
; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536
; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32)
- ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
- ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-CSSC-NEXT: $w0 = COPY [[CTTZ]](s32)
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
%val:_(s16) = G_IMPLICIT_DEF
%cttz:_(s16) = G_CTTZ_ZERO_UNDEF %val(s16)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir
index 6618ced662fda..17b80e03df7e2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir
@@ -17,8 +17,7 @@ body: |
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]]
; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32)
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32)
- ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
; CHECK-CSSC-LABEL: name: s8
; CHECK-CSSC: liveins: $w0
@@ -27,8 +26,7 @@ body: |
; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32)
- ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
- ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-CSSC-NEXT: $w0 = COPY [[CTTZ]](s32)
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
%val:_(s8) = G_IMPLICIT_DEF
%cttz:_(s8) = G_CTTZ %val(s8)
@@ -51,8 +49,7 @@ body: |
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]]
; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32)
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32)
- ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
; CHECK-CSSC-LABEL: name: s16
; CHECK-CSSC: liveins: $w0
@@ -61,8 +58,7 @@ body: |
; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536
; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]]
; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32)
- ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32)
- ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-CSSC-NEXT: $w0 = COPY [[CTTZ]](s32)
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
%val:_(s16) = G_IMPLICIT_DEF
%cttz:_(s16) = G_CTTZ %val(s16)
@@ -166,3 +162,104 @@ body: |
RET_ReallyLR implicit $q0
...
+---
+name: s35
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: s35
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[C]]
+ ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s64) = G_BITREVERSE [[OR]]
+ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[CTLZ]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ ; CHECK-CSSC-LABEL: name: s35
+ ; CHECK-CSSC: liveins: $x0
+ ; CHECK-CSSC-NEXT: {{ $}}
+ ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368
+ ; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[C]]
+ ; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s64) = G_CTTZ [[OR]](s64)
+ ; CHECK-CSSC-NEXT: $x0 = COPY [[CTTZ]](s64)
+ ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $x0
+ %1:_(s64) = COPY $x0
+ %0:_(s35) = G_TRUNC %1(s64)
+ %2:_(s35) = G_CTTZ %0(s35)
+ %3:_(s64) = G_ANYEXT %2(s35)
+ $x0 = COPY %3(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: s65
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: s65
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[C]]
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[COPY1]], [[C1]]
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]]
+ ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(s64) = G_BITREVERSE [[OR1]]
+ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[CTLZ]], [[C2]]
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C3]]
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[AND]]
+ ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:_(s64) = G_BITREVERSE [[OR]]
+ ; CHECK-NEXT: [[CTLZ1:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE1]](s64)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]]
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[CTLZ1]]
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDE]], [[C]]
+ ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64)
+ ; CHECK-NEXT: $x1 = COPY [[SELECT1]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+ ; CHECK-CSSC-LABEL: name: s65
+ ; CHECK-CSSC: liveins: $x0, $x1
+ ; CHECK-CSSC-NEXT: {{ $}}
+ ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-CSSC-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-CSSC-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-CSSC-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[C]]
+ ; CHECK-CSSC-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[COPY1]], [[C1]]
+ ; CHECK-CSSC-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]]
+ ; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s64) = G_CTTZ [[OR1]](s64)
+ ; CHECK-CSSC-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+ ; CHECK-CSSC-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[CTTZ]], [[C2]]
+ ; CHECK-CSSC-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-CSSC-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C3]]
+ ; CHECK-CSSC-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[AND]]
+ ; CHECK-CSSC-NEXT: [[CTTZ1:%[0-9]+]]:_(s64) = G_CTTZ [[OR]](s64)
+ ; CHECK-CSSC-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]]
+ ; CHECK-CSSC-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[CTTZ1]]
+ ; CHECK-CSSC-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDE]], [[C]]
+ ; CHECK-CSSC-NEXT: $x0 = COPY [[SELECT]](s64)
+ ; CHECK-CSSC-NEXT: $x1 = COPY [[SELECT1]](s64)
+ ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+ %1:_(s64) = COPY $x0
+ %2:_(s64) = COPY $x1
+ %3:_(s128) = G_MERGE_VALUES %1(s64), %2(s64)
+ %0:_(s65) = G_TRUNC %3(s128)
+ %4:_(s65) = G_CTTZ %0(s65)
+ %7:_(s128) = G_ANYEXT %4(s65)
+ %5:_(s64), %6:_(s64) = G_UNMERGE_VALUES %7(s128)
+ $x0 = COPY %5(s64)
+ $x1 = COPY %6(s64)
+ RET_ReallyLR implicit $x0, implicit $x1
+...
More information about the llvm-commits
mailing list