[llvm] [GlobalIsel] Use aext in ctlz_zero_undef widenScalar expansion (PR #181506)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 15 06:54:43 PST 2026
https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/181506
>From da9a2d35d046f4da643432f191f9ea91f7b8ea17 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Sat, 14 Feb 2026 20:48:39 +0000
Subject: [PATCH 1/2] [GlobalIsel] Use aext in ctlz_zero_undef expansion
---
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 41b79ccb4e4d1..ffd0814f86010 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2816,6 +2816,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
switch (Opcode) {
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF: // undef bits shifted out below
ExtOpc = TargetOpcode::G_ANYEXT;
break;
case TargetOpcode::G_CTLS:
>From 2b3f663d248e726d9051c7376107cd6cf81eb934 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Sun, 15 Feb 2026 14:54:29 +0000
Subject: [PATCH 2/2] update tests
---
.../GlobalISel/legalize-ctlz-zero-undef.mir | 28 +++++++++----------
.../ARM/GlobalISel/arm-legalize-bitcounts.mir | 4 +--
.../GlobalISel/LegalizerHelperTest.cpp | 2 +-
3 files changed, 15 insertions(+), 19 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir
index 68587630e2195..f0f62dd2f98be 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir
@@ -83,9 +83,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
- ; CHECK-NEXT: [[AMDGPU_FFBH_U32:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U32]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U32_]], [[C1]]
; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s16) = G_TRUNC %0
@@ -148,14 +148,14 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[AMDGPU_FFBH_U32:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL]](s32)
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
- ; CHECK-NEXT: [[AMDGPU_FFBH_U321:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL2]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U32]], [[C1]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U321]], [[C1]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U32_]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U32_1]], [[C1]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]]
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
@@ -176,9 +176,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
- ; CHECK-NEXT: [[FFBH:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[SHL]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FFBH]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_FFBH_U32_]], [[C1]]
; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s7) = G_TRUNC %0
@@ -198,10 +198,8 @@ body: |
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[C1]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32)
; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[SHL]](s64)
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTLZ_ZERO_UNDEF]](s32)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir
index 7cbe5de22debc..6b8cb4c6c5a1a 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir
@@ -145,14 +145,12 @@ body: |
liveins: $r0
; CHECK: [[X:%[0-9]+]]:_(s32) = COPY $r0
- ; CHECK: [[BITMASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
- ; CHECK: [[X32:%[0-9]+]]:_(s32) = G_AND [[X]], [[BITMASK]]
%0(s32) = COPY $r0
%1(s8) = G_TRUNC %0(s32)
; Check that the operation is performed for 32 bits
; CHECK: [[BITDIFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
- ; CHECK: [[R32:%[0-9]+]]:_(s32) = G_SHL [[X32]], [[BITDIFF]]
+ ; CHECK: [[R32:%[0-9]+]]:_(s32) = G_SHL [[X]], [[BITDIFF]]
; LIBCALLS-NOT: G_CTLZ
; LIBCALLS: ADJCALLSTACKDOWN
; LIBCALLS: $r0 = COPY [[R32]]
diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
index f80082fdd46d0..b4f27919e7ea5 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
@@ -609,7 +609,7 @@ TEST_F(AArch64GISelMITest, WidenBitCountingCTLZZeroUndef) {
auto CheckStr = R"(
CHECK: [[Trunc:%[0-9]+]]:_(s8) = G_TRUNC
- CHECK: [[Zext:%[0-9]+]]:_(s16) = G_ZEXT [[Trunc]]
+ CHECK: [[Zext:%[0-9]+]]:_(s16) = G_ANYEXT [[Trunc]]
CHECK: [[Cst8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
CHECK: [[Shl:%[0-9]+]]:_(s16) = G_SHL [[Zext]]:_, [[Cst8]]:_
CHECK: [[CtlzZu:%[0-9]+]]:_(s16) = G_CTLZ_ZERO_UNDEF [[Shl]]
More information about the llvm-commits
mailing list