[llvm] [AMDGPU] Correct bitshift legality transformation for small vectors (PR #140940)

Wed May 21 10:49:50 PDT 2025

https://github.com/zGoldthorpe created https://github.com/llvm/llvm-project/pull/140940

Fix for a bug found by the AMD fuzzing project.

The legaliser would originally try to widen a small vector such as `<4 x i1>` to a single `i16` during the legalisation of bitshifts, as it was not originally written with consideration for vector operands. This patch simply adds a guard to prohibit this transformation and allow other legalisation transformations to step in.

>From 12bdc90cfb12e0eed63b34c20087826d1225a441 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Wed, 21 May 2025 12:22:50 -0500
Subject: [PATCH] Correct bitshift legalisation

---
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |  2 +-
 .../test/CodeGen/AMDGPU/widen-vector-shift.ll | 24 +++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/widen-vector-shift.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 667c466a998e0..eeb05f0acebed 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1765,7 +1765,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
         // 32-bit amount.
         const LLT ValTy = Query.Types[0];
         const LLT AmountTy = Query.Types[1];
-        return ValTy.getSizeInBits() <= 16 &&
+        return ValTy.isScalar() && ValTy.getSizeInBits() <= 16 &&
                AmountTy.getSizeInBits() < 16;
       }, changeTo(1, S16));
     Shifts.maxScalarIf(typeIs(0, S16), 1, S16);
diff --git a/llvm/test/CodeGen/AMDGPU/widen-vector-shift.ll b/llvm/test/CodeGen/AMDGPU/widen-vector-shift.ll
new file mode 100644
index 0000000000000..1d40038abe911
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/widen-vector-shift.ll
@@ -0,0 +1,24 @@
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -O0 -print-after=legalizer %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK-LABEL: widen_ashr_i4:
+define amdgpu_kernel void @widen_ashr_i4(
+    ptr addrspace(1) %res, i4 %a, i4 %b) {
+; CHECK: G_ASHR %{{[0-9]+}}:_, %{{[0-9]+}}:_(s16)
+entry:
+  %res.val = ashr i4 %a, %b
+  store i4 %res.val, ptr addrspace(1) %res
+  ret void
+}
+
+; CHECK-LABEL: widen_ashr_v4i1:
+define amdgpu_kernel void @widen_ashr_v4i1(
+    ptr addrspace(1) %res, <4 x i1> %a, <4 x i1> %b) {
+; CHECK: G_ASHR %{{[0-9]+}}:_, %{{[0-9]+}}:_(s16)
+; CHECK: G_ASHR %{{[0-9]+}}:_, %{{[0-9]+}}:_(s16)
+; CHECK: G_ASHR %{{[0-9]+}}:_, %{{[0-9]+}}:_(s16)
+; CHECK: G_ASHR %{{[0-9]+}}:_, %{{[0-9]+}}:_(s16)
+entry:
+  %res.val = ashr <4 x i1> %a, %b
+  store <4 x i1> %res.val, ptr addrspace(1) %res
+  ret void
+}