[llvm] [AMDGCN] Use ZExt when handling indices in insertment element (PR #85718)

Tue Mar 19 00:09:08 PDT 2024

https://github.com/DataCorrupted updated https://github.com/llvm/llvm-project/pull/85718

>From fe2d2aa7a21788d4d3ae92557d1c2790adb75cf6 Mon Sep 17 00:00:00 2001
From: Peter Rong <PeterRong96 at gmail.com>
Date: Mon, 18 Mar 2024 23:42:49 +0000
Subject: [PATCH 1/2] [AMDGCN] Use ZExt when handling indices in insertment
 element instruction.

This patch fixes #85717

Signed-off-by: Peter Rong <PeterRong96 at gmail.com>
---
 .../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp    |  4 ++--
 llvm/test/CodeGen/AMDGPU/pr85717.ll           | 24 +++++++++++++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/pr85717.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index f1cc4b524460e2..bddf3d958a1ae6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -1749,7 +1749,7 @@ static bool isInterestingPHIIncomingValue(const Value *V) {
     // Non constant index/out of bounds index -> folding is unlikely.
     // The latter is more of a sanity check because canonical IR should just
     // have replaced those with poison.
-    if (!Idx || Idx->getSExtValue() >= FVT->getNumElements())
+    if (!Idx || Idx->getZExtValue() >= FVT->getNumElements())
       return false;
 
     const auto *VecSrc = IE->getOperand(0);
@@ -1761,7 +1761,7 @@ static bool isInterestingPHIIncomingValue(const Value *V) {
       return false;
 
     CurVal = VecSrc;
-    EltsCovered.set(Idx->getSExtValue());
+    EltsCovered.set(Idx->getZExtValue());
 
     // All elements covered.
     if (EltsCovered.all())
diff --git a/llvm/test/CodeGen/AMDGPU/pr85717.ll b/llvm/test/CodeGen/AMDGPU/pr85717.ll
new file mode 100644
index 00000000000000..d0483125d9355e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/pr85717.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+;RUN: llc -mtriple=amdgcn -mcpu=gfx1036 < %s | FileCheck %s
+define void @test(i1 %Bool, ptr %Ptr, <32 x float> %Vec1, <32 x float> %Vec2) {
+; CHECK-LABEL: test:
+; CHECK:       ; %bb.0: ; %BB0
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 vcc_lo, exec_lo
+; CHECK-NEXT:  .LBB0_1: ; %BB1
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_1
+; CHECK-NEXT:  ; %bb.2: ; %DummyReturnBlock
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+BB0:
+  %I = insertelement <32 x float> %Vec1, float 4.200000e+01, i1 true
+  br label %BB1
+
+BB1:                                              ; preds = %BB0, %BB1, %BB2
+  %PHI = phi <32 x float> [ %I, %BB2 ], [ %Vec2, %BB1 ], [ zeroinitializer, %BB0 ]
+  store <32 x float> %PHI, ptr %Ptr, align 128
+  br i1 %Bool, label %BB1, label %BB2
+
+BB2:                                               ; preds = %BB1
+  br label %BB1
+}

>From 282d7967edd0ece35f9ff6f9ad12d0b90e239ba3 Mon Sep 17 00:00:00 2001
From: Peter Rong <PeterRong96 at gmail.com>
Date: Tue, 19 Mar 2024 07:08:39 +0000
Subject: [PATCH 2/2] [AMDGPU] move test to codegenprepare-break-large-phis.ll

Signed-off-by: Peter Rong <PeterRong96 at gmail.com>
---
 .../amdgpu-codegenprepare-break-large-phis.ll | 51 +++++++++++++++++++
 llvm/test/CodeGen/AMDGPU/pr85717.ll           | 24 ---------
 2 files changed, 51 insertions(+), 24 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AMDGPU/pr85717.ll

diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis.ll
index 192bf7c249817b..a1781b5144d455 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis.ll
@@ -1197,3 +1197,54 @@ reallyfinally:
   store <5 x double> %val, ptr %out, align 1
   ret void
 }
+
+define void @test(i1 %Bool, ptr %Ptr, <4 x float> %Vec1, <4 x float> %Vec2) {
+; OPT-LABEL: @test(
+; OPT-NEXT:  BB0:
+; OPT-NEXT:    [[I:%.*]] = insertelement <4 x float> [[VEC1:%.*]], float 4.200000e+01, i1 true
+; OPT-NEXT:    br label [[BB1:%.*]]
+; OPT:       BB1:
+; OPT-NEXT:    [[TMP0:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE0:%.*]], [[BB2:%.*]] ], [ [[LARGEPHI_EXTRACTSLICE1:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0:%.*]] ]
+; OPT-NEXT:    [[TMP1:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE3:%.*]], [[BB2]] ], [ [[LARGEPHI_EXTRACTSLICE4:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0]] ]
+; OPT-NEXT:    [[TMP2:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE6:%.*]], [[BB2]] ], [ [[LARGEPHI_EXTRACTSLICE7:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE9:%.*]], [[BB2]] ], [ [[LARGEPHI_EXTRACTSLICE10:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0]] ]
+; OPT-NEXT:    [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
+; OPT-NEXT:    [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <4 x float> [[LARGEPHI_INSERTSLICE0]], float [[TMP1]], i64 1
+; OPT-NEXT:    [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <4 x float> [[LARGEPHI_INSERTSLICE1]], float [[TMP2]], i64 2
+; OPT-NEXT:    [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <4 x float> [[LARGEPHI_INSERTSLICE2]], float [[TMP3]], i64 3
+; OPT-NEXT:    store <4 x float> [[LARGEPHI_INSERTSLICE3]], ptr [[PTR:%.*]], align 128
+; OPT-NEXT:    [[LARGEPHI_EXTRACTSLICE1]] = extractelement <4 x float> [[VEC2:%.*]], i64 0
+; OPT-NEXT:    [[LARGEPHI_EXTRACTSLICE4]] = extractelement <4 x float> [[VEC2]], i64 1
+; OPT-NEXT:    [[LARGEPHI_EXTRACTSLICE7]] = extractelement <4 x float> [[VEC2]], i64 2
+; OPT-NEXT:    [[LARGEPHI_EXTRACTSLICE10]] = extractelement <4 x float> [[VEC2]], i64 3
+; OPT-NEXT:    br i1 [[BOOL:%.*]], label [[BB1]], label [[BB2]]
+; OPT:       BB2:
+; OPT-NEXT:    [[LARGEPHI_EXTRACTSLICE0]] = extractelement <4 x float> [[I]], i64 0
+; OPT-NEXT:    [[LARGEPHI_EXTRACTSLICE3]] = extractelement <4 x float> [[I]], i64 1
+; OPT-NEXT:    [[LARGEPHI_EXTRACTSLICE6]] = extractelement <4 x float> [[I]], i64 2
+; OPT-NEXT:    [[LARGEPHI_EXTRACTSLICE9]] = extractelement <4 x float> [[I]], i64 3
+; OPT-NEXT:    br label [[BB1]]
+;
+; NOOPT-LABEL: @test(
+; NOOPT-NEXT:  BB0:
+; NOOPT-NEXT:    [[I:%.*]] = insertelement <4 x float> [[VEC1:%.*]], float 4.200000e+01, i1 true
+; NOOPT-NEXT:    br label [[BB1:%.*]]
+; NOOPT:       BB1:
+; NOOPT-NEXT:    [[PHI:%.*]] = phi <4 x float> [ [[I]], [[BB2:%.*]] ], [ [[VEC2:%.*]], [[BB1]] ], [ zeroinitializer, [[BB0:%.*]] ]
+; NOOPT-NEXT:    store <4 x float> [[PHI]], ptr [[PTR:%.*]], align 128
+; NOOPT-NEXT:    br i1 [[BOOL:%.*]], label [[BB1]], label [[BB2]]
+; NOOPT:       BB2:
+; NOOPT-NEXT:    br label [[BB1]]
+;
+BB0:
+  %I = insertelement <4 x float> %Vec1, float 4.200000e+01, i1 true
+  br label %BB1
+
+BB1:                                              ; preds = %BB0, %BB1, %BB2
+  %PHI = phi <4 x float> [ %I, %BB2 ], [ %Vec2, %BB1 ], [ zeroinitializer, %BB0 ]
+  store <4 x float> %PHI, ptr %Ptr, align 128
+  br i1 %Bool, label %BB1, label %BB2
+
+BB2:                                               ; preds = %BB1
+  br label %BB1
+}
diff --git a/llvm/test/CodeGen/AMDGPU/pr85717.ll b/llvm/test/CodeGen/AMDGPU/pr85717.ll
deleted file mode 100644
index d0483125d9355e..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/pr85717.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-;RUN: llc -mtriple=amdgcn -mcpu=gfx1036 < %s | FileCheck %s
-define void @test(i1 %Bool, ptr %Ptr, <32 x float> %Vec1, <32 x float> %Vec2) {
-; CHECK-LABEL: test:
-; CHECK:       ; %bb.0: ; %BB0
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 vcc_lo, exec_lo
-; CHECK-NEXT:  .LBB0_1: ; %BB1
-; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    s_cbranch_vccnz .LBB0_1
-; CHECK-NEXT:  ; %bb.2: ; %DummyReturnBlock
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
-BB0:
-  %I = insertelement <32 x float> %Vec1, float 4.200000e+01, i1 true
-  br label %BB1
-
-BB1:                                              ; preds = %BB0, %BB1, %BB2
-  %PHI = phi <32 x float> [ %I, %BB2 ], [ %Vec2, %BB1 ], [ zeroinitializer, %BB0 ]
-  store <32 x float> %PHI, ptr %Ptr, align 128
-  br i1 %Bool, label %BB1, label %BB2
-
-BB2:                                               ; preds = %BB1
-  br label %BB1
-}