[llvm] [AMDGCN] Use ZExt when handling indices in insertment element (PR #85718)

Mon Mar 18 16:46:49 PDT 2024

https://github.com/DataCorrupted created https://github.com/llvm/llvm-project/pull/85718

When i1 true is used as an index, SExt extends it to i32 -1. This would cause BitVector to overflow.
The language manual have specified that the index shall be treated as an unsigned number, this patch fixes that. (https://llvm.org/docs/LangRef.html#insertelement-instruction)

This patch fixes #85717


>From fe2d2aa7a21788d4d3ae92557d1c2790adb75cf6 Mon Sep 17 00:00:00 2001
From: Peter Rong <PeterRong96 at gmail.com>
Date: Mon, 18 Mar 2024 23:42:49 +0000
Subject: [PATCH] [AMDGCN] Use ZExt when handling indices in insertment element
 instruction.

This patch fixes #85717

Signed-off-by: Peter Rong <PeterRong96 at gmail.com>
---
 .../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp    |  4 ++--
 llvm/test/CodeGen/AMDGPU/pr85717.ll           | 24 +++++++++++++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/pr85717.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index f1cc4b524460e2..bddf3d958a1ae6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -1749,7 +1749,7 @@ static bool isInterestingPHIIncomingValue(const Value *V) {
     // Non constant index/out of bounds index -> folding is unlikely.
     // The latter is more of a sanity check because canonical IR should just
     // have replaced those with poison.
-    if (!Idx || Idx->getSExtValue() >= FVT->getNumElements())
+    if (!Idx || Idx->getZExtValue() >= FVT->getNumElements())
       return false;
 
     const auto *VecSrc = IE->getOperand(0);
@@ -1761,7 +1761,7 @@ static bool isInterestingPHIIncomingValue(const Value *V) {
       return false;
 
     CurVal = VecSrc;
-    EltsCovered.set(Idx->getSExtValue());
+    EltsCovered.set(Idx->getZExtValue());
 
     // All elements covered.
     if (EltsCovered.all())
diff --git a/llvm/test/CodeGen/AMDGPU/pr85717.ll b/llvm/test/CodeGen/AMDGPU/pr85717.ll
new file mode 100644
index 00000000000000..d0483125d9355e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/pr85717.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+;RUN: llc -mtriple=amdgcn -mcpu=gfx1036 < %s | FileCheck %s
+define void @test(i1 %Bool, ptr %Ptr, <32 x float> %Vec1, <32 x float> %Vec2) {
+; CHECK-LABEL: test:
+; CHECK:       ; %bb.0: ; %BB0
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 vcc_lo, exec_lo
+; CHECK-NEXT:  .LBB0_1: ; %BB1
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_1
+; CHECK-NEXT:  ; %bb.2: ; %DummyReturnBlock
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+BB0:
+  %I = insertelement <32 x float> %Vec1, float 4.200000e+01, i1 true
+  br label %BB1
+
+BB1:                                              ; preds = %BB0, %BB1, %BB2
+  %PHI = phi <32 x float> [ %I, %BB2 ], [ %Vec2, %BB1 ], [ zeroinitializer, %BB0 ]
+  store <32 x float> %PHI, ptr %Ptr, align 128
+  br i1 %Bool, label %BB1, label %BB2
+
+BB2:                                               ; preds = %BB1
+  br label %BB1
+}