[llvm] [AMDGCN] Use ZExt when handling indices in insertment element (PR #85718)
Peter Rong via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 19 00:09:08 PDT 2024
https://github.com/DataCorrupted updated https://github.com/llvm/llvm-project/pull/85718
>From fe2d2aa7a21788d4d3ae92557d1c2790adb75cf6 Mon Sep 17 00:00:00 2001
From: Peter Rong <PeterRong96 at gmail.com>
Date: Mon, 18 Mar 2024 23:42:49 +0000
Subject: [PATCH 1/2] [AMDGCN] Use ZExt when handling indices in insertment
element instruction.
This patch fixes #85717
Signed-off-by: Peter Rong <PeterRong96 at gmail.com>
---
.../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 4 ++--
llvm/test/CodeGen/AMDGPU/pr85717.ll | 24 +++++++++++++++++++
2 files changed, 26 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/pr85717.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index f1cc4b524460e2..bddf3d958a1ae6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -1749,7 +1749,7 @@ static bool isInterestingPHIIncomingValue(const Value *V) {
// Non constant index/out of bounds index -> folding is unlikely.
// The latter is more of a sanity check because canonical IR should just
// have replaced those with poison.
- if (!Idx || Idx->getSExtValue() >= FVT->getNumElements())
+ if (!Idx || Idx->getZExtValue() >= FVT->getNumElements())
return false;
const auto *VecSrc = IE->getOperand(0);
@@ -1761,7 +1761,7 @@ static bool isInterestingPHIIncomingValue(const Value *V) {
return false;
CurVal = VecSrc;
- EltsCovered.set(Idx->getSExtValue());
+ EltsCovered.set(Idx->getZExtValue());
// All elements covered.
if (EltsCovered.all())
diff --git a/llvm/test/CodeGen/AMDGPU/pr85717.ll b/llvm/test/CodeGen/AMDGPU/pr85717.ll
new file mode 100644
index 00000000000000..d0483125d9355e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/pr85717.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+;RUN: llc -mtriple=amdgcn -mcpu=gfx1036 < %s | FileCheck %s
+define void @test(i1 %Bool, ptr %Ptr, <32 x float> %Vec1, <32 x float> %Vec2) {
+; CHECK-LABEL: test:
+; CHECK: ; %bb.0: ; %BB0
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_mov_b32 vcc_lo, exec_lo
+; CHECK-NEXT: .LBB0_1: ; %BB1
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: s_cbranch_vccnz .LBB0_1
+; CHECK-NEXT: ; %bb.2: ; %DummyReturnBlock
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+BB0:
+ %I = insertelement <32 x float> %Vec1, float 4.200000e+01, i1 true
+ br label %BB1
+
+BB1: ; preds = %BB0, %BB1, %BB2
+ %PHI = phi <32 x float> [ %I, %BB2 ], [ %Vec2, %BB1 ], [ zeroinitializer, %BB0 ]
+ store <32 x float> %PHI, ptr %Ptr, align 128
+ br i1 %Bool, label %BB1, label %BB2
+
+BB2: ; preds = %BB1
+ br label %BB1
+}
>From 282d7967edd0ece35f9ff6f9ad12d0b90e239ba3 Mon Sep 17 00:00:00 2001
From: Peter Rong <PeterRong96 at gmail.com>
Date: Tue, 19 Mar 2024 07:08:39 +0000
Subject: [PATCH 2/2] [AMDGPU] move test to codegenprepare-break-large-phis.ll
Signed-off-by: Peter Rong <PeterRong96 at gmail.com>
---
.../amdgpu-codegenprepare-break-large-phis.ll | 51 +++++++++++++++++++
llvm/test/CodeGen/AMDGPU/pr85717.ll | 24 ---------
2 files changed, 51 insertions(+), 24 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/pr85717.ll
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis.ll
index 192bf7c249817b..a1781b5144d455 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis.ll
@@ -1197,3 +1197,54 @@ reallyfinally:
store <5 x double> %val, ptr %out, align 1
ret void
}
+
+define void @test(i1 %Bool, ptr %Ptr, <4 x float> %Vec1, <4 x float> %Vec2) {
+; OPT-LABEL: @test(
+; OPT-NEXT: BB0:
+; OPT-NEXT: [[I:%.*]] = insertelement <4 x float> [[VEC1:%.*]], float 4.200000e+01, i1 true
+; OPT-NEXT: br label [[BB1:%.*]]
+; OPT: BB1:
+; OPT-NEXT: [[TMP0:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE0:%.*]], [[BB2:%.*]] ], [ [[LARGEPHI_EXTRACTSLICE1:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0:%.*]] ]
+; OPT-NEXT: [[TMP1:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE3:%.*]], [[BB2]] ], [ [[LARGEPHI_EXTRACTSLICE4:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0]] ]
+; OPT-NEXT: [[TMP2:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE6:%.*]], [[BB2]] ], [ [[LARGEPHI_EXTRACTSLICE7:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0]] ]
+; OPT-NEXT: [[TMP3:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE9:%.*]], [[BB2]] ], [ [[LARGEPHI_EXTRACTSLICE10:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0]] ]
+; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
+; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <4 x float> [[LARGEPHI_INSERTSLICE0]], float [[TMP1]], i64 1
+; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <4 x float> [[LARGEPHI_INSERTSLICE1]], float [[TMP2]], i64 2
+; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <4 x float> [[LARGEPHI_INSERTSLICE2]], float [[TMP3]], i64 3
+; OPT-NEXT: store <4 x float> [[LARGEPHI_INSERTSLICE3]], ptr [[PTR:%.*]], align 128
+; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1]] = extractelement <4 x float> [[VEC2:%.*]], i64 0
+; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4]] = extractelement <4 x float> [[VEC2]], i64 1
+; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7]] = extractelement <4 x float> [[VEC2]], i64 2
+; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE10]] = extractelement <4 x float> [[VEC2]], i64 3
+; OPT-NEXT: br i1 [[BOOL:%.*]], label [[BB1]], label [[BB2]]
+; OPT: BB2:
+; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0]] = extractelement <4 x float> [[I]], i64 0
+; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3]] = extractelement <4 x float> [[I]], i64 1
+; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6]] = extractelement <4 x float> [[I]], i64 2
+; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9]] = extractelement <4 x float> [[I]], i64 3
+; OPT-NEXT: br label [[BB1]]
+;
+; NOOPT-LABEL: @test(
+; NOOPT-NEXT: BB0:
+; NOOPT-NEXT: [[I:%.*]] = insertelement <4 x float> [[VEC1:%.*]], float 4.200000e+01, i1 true
+; NOOPT-NEXT: br label [[BB1:%.*]]
+; NOOPT: BB1:
+; NOOPT-NEXT: [[PHI:%.*]] = phi <4 x float> [ [[I]], [[BB2:%.*]] ], [ [[VEC2:%.*]], [[BB1]] ], [ zeroinitializer, [[BB0:%.*]] ]
+; NOOPT-NEXT: store <4 x float> [[PHI]], ptr [[PTR:%.*]], align 128
+; NOOPT-NEXT: br i1 [[BOOL:%.*]], label [[BB1]], label [[BB2]]
+; NOOPT: BB2:
+; NOOPT-NEXT: br label [[BB1]]
+;
+BB0:
+ %I = insertelement <4 x float> %Vec1, float 4.200000e+01, i1 true
+ br label %BB1
+
+BB1: ; preds = %BB0, %BB1, %BB2
+ %PHI = phi <4 x float> [ %I, %BB2 ], [ %Vec2, %BB1 ], [ zeroinitializer, %BB0 ]
+ store <4 x float> %PHI, ptr %Ptr, align 128
+ br i1 %Bool, label %BB1, label %BB2
+
+BB2: ; preds = %BB1
+ br label %BB1
+}
diff --git a/llvm/test/CodeGen/AMDGPU/pr85717.ll b/llvm/test/CodeGen/AMDGPU/pr85717.ll
deleted file mode 100644
index d0483125d9355e..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/pr85717.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-;RUN: llc -mtriple=amdgcn -mcpu=gfx1036 < %s | FileCheck %s
-define void @test(i1 %Bool, ptr %Ptr, <32 x float> %Vec1, <32 x float> %Vec2) {
-; CHECK-LABEL: test:
-; CHECK: ; %bb.0: ; %BB0
-; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_mov_b32 vcc_lo, exec_lo
-; CHECK-NEXT: .LBB0_1: ; %BB1
-; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: s_cbranch_vccnz .LBB0_1
-; CHECK-NEXT: ; %bb.2: ; %DummyReturnBlock
-; CHECK-NEXT: s_setpc_b64 s[30:31]
-BB0:
- %I = insertelement <32 x float> %Vec1, float 4.200000e+01, i1 true
- br label %BB1
-
-BB1: ; preds = %BB0, %BB1, %BB2
- %PHI = phi <32 x float> [ %I, %BB2 ], [ %Vec2, %BB1 ], [ zeroinitializer, %BB0 ]
- store <32 x float> %PHI, ptr %Ptr, align 128
- br i1 %Bool, label %BB1, label %BB2
-
-BB2: ; preds = %BB1
- br label %BB1
-}
More information about the llvm-commits
mailing list