[llvm] [AMDGCN] Use ZExt when handling indices in insertment element (PR #85718)

Mon Mar 18 16:47:22 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Peter Rong (DataCorrupted)

<details>
<summary>Changes</summary>

When i1 true is used as an index, SExt extends it to i32 -1. This would cause BitVector to overflow.
The language manual have specified that the index shall be treated as an unsigned number, this patch fixes that. (https://llvm.org/docs/LangRef.html#insertelement-instruction)

This patch fixes #85717


---
Full diff: https://github.com/llvm/llvm-project/pull/85718.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp (+2-2) 
- (added) llvm/test/CodeGen/AMDGPU/pr85717.ll (+24) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index f1cc4b524460e2..bddf3d958a1ae6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -1749,7 +1749,7 @@ static bool isInterestingPHIIncomingValue(const Value *V) {
     // Non constant index/out of bounds index -> folding is unlikely.
     // The latter is more of a sanity check because canonical IR should just
     // have replaced those with poison.
-    if (!Idx || Idx->getSExtValue() >= FVT->getNumElements())
+    if (!Idx || Idx->getZExtValue() >= FVT->getNumElements())
       return false;
 
     const auto *VecSrc = IE->getOperand(0);
@@ -1761,7 +1761,7 @@ static bool isInterestingPHIIncomingValue(const Value *V) {
       return false;
 
     CurVal = VecSrc;
-    EltsCovered.set(Idx->getSExtValue());
+    EltsCovered.set(Idx->getZExtValue());
 
     // All elements covered.
     if (EltsCovered.all())
diff --git a/llvm/test/CodeGen/AMDGPU/pr85717.ll b/llvm/test/CodeGen/AMDGPU/pr85717.ll
new file mode 100644
index 00000000000000..d0483125d9355e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/pr85717.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+;RUN: llc -mtriple=amdgcn -mcpu=gfx1036 < %s | FileCheck %s
+define void @test(i1 %Bool, ptr %Ptr, <32 x float> %Vec1, <32 x float> %Vec2) {
+; CHECK-LABEL: test:
+; CHECK:       ; %bb.0: ; %BB0
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 vcc_lo, exec_lo
+; CHECK-NEXT:  .LBB0_1: ; %BB1
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_1
+; CHECK-NEXT:  ; %bb.2: ; %DummyReturnBlock
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+BB0:
+  %I = insertelement <32 x float> %Vec1, float 4.200000e+01, i1 true
+  br label %BB1
+
+BB1:                                              ; preds = %BB0, %BB1, %BB2
+  %PHI = phi <32 x float> [ %I, %BB2 ], [ %Vec2, %BB1 ], [ zeroinitializer, %BB0 ]
+  store <32 x float> %PHI, ptr %Ptr, align 128
+  br i1 %Bool, label %BB1, label %BB2
+
+BB2:                                               ; preds = %BB1
+  br label %BB1
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/85718