[flang-commits] [lld] [libunwind] [libcxx] [compiler-rt] [llvm] [flang] [clang-tools-extra] [lldb] [clang] [AMDGPU] - Add constant folding for s_quadmask (PR #72381)
Jessica Del via flang-commits
flang-commits at lists.llvm.org
Fri Nov 17 03:39:12 PST 2023
https://github.com/OutOfCache updated https://github.com/llvm/llvm-project/pull/72381
>From 00d0f99207242befc8022031ccd8faf573cbf014 Mon Sep 17 00:00:00 2001
From: Jessica Del <Jessica.Del at amd.com>
Date: Tue, 14 Nov 2023 22:17:26 +0100
Subject: [PATCH 1/5] [AMDGPU] - Add constant folding for s_quadmask
If the input is a constant we can constant fold the `s_quadmask`
intrinsic.
---
llvm/lib/Analysis/ConstantFolding.cpp | 14 ++++++++++++++
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll | 12 ++++--------
2 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 966a65ac26b8017..40b5938fcda0c2a 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1533,6 +1533,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::amdgcn_perm:
case Intrinsic::amdgcn_wave_reduce_umin:
case Intrinsic::amdgcn_wave_reduce_umax:
+ case Intrinsic::amdgcn_s_quadmask:
case Intrinsic::arm_mve_vctp8:
case Intrinsic::arm_mve_vctp16:
case Intrinsic::arm_mve_vctp32:
@@ -2422,6 +2423,19 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantFP::get(Ty->getContext(), Val);
}
+
+ case Intrinsic::amdgcn_s_quadmask: {
+ uint64_t Val = Op->getZExtValue();
+ uint64_t QuadMask = 0;
+ for (unsigned i = 0; i < Op->getBitWidth() / 4; ++i, Val >>= 4) {
+ if (!(Val & 0xF))
+ continue;
+
+ QuadMask |= (1 << i);
+ }
+ return ConstantInt::get(Ty, QuadMask);
+ }
+
default:
return nullptr;
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
index 65443a6efa789d9..0f500c0999ad9a8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
@@ -9,11 +9,10 @@ define i32 @test_quadmask_constant_i32() {
; GFX11-LABEL: test_quadmask_constant_i32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_quadmask_b32 s0, 0x85fe3a92
-; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: v_mov_b32_e32 v0, 0xcb
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
- %qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 u0x85FE3A92)
+ %qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 u0x85003092)
ret i32 %qm
}
@@ -50,13 +49,10 @@ define i64 @test_quadmask_constant_i64() {
; GFX11-LABEL: test_quadmask_constant_i64:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s0, 0x85fe3a92
-; GFX11-NEXT: s_mov_b32 s1, 0x67de48fc
-; GFX11-NEXT: s_quadmask_b64 s[0:1], s[0:1]
-; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: v_dual_mov_b32 v0, 0xe3e6 :: v_dual_mov_b32 v1, 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
- %qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 u0x67DE48FC85FE3A92)
+ %qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 u0x67D000FC85F00A90)
ret i64 %qm
}
>From 144c4dc164ec137e518cfd647c116373e7a61b8f Mon Sep 17 00:00:00 2001
From: Jessica Del <Jessica.Del at amd.com>
Date: Wed, 15 Nov 2023 15:59:56 +0100
Subject: [PATCH 2/5] fixup! [AMDGPU] - Add constant folding for s_quadmask
---
llvm/lib/Analysis/ConstantFolding.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 40b5938fcda0c2a..39bbb04fbcf26cc 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -2427,11 +2427,11 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
case Intrinsic::amdgcn_s_quadmask: {
uint64_t Val = Op->getZExtValue();
uint64_t QuadMask = 0;
- for (unsigned i = 0; i < Op->getBitWidth() / 4; ++i, Val >>= 4) {
+ for (unsigned I = 0; I < Op->getBitWidth() / 4; ++I, Val >>= 4) {
if (!(Val & 0xF))
continue;
- QuadMask |= (1 << i);
+ QuadMask |= (1 << I);
}
return ConstantInt::get(Ty, QuadMask);
}
>From 65bb0b1164ff9b7491589cb88decb1d135504c1b Mon Sep 17 00:00:00 2001
From: Jessica Del <Jessica.Del at amd.com>
Date: Thu, 16 Nov 2023 11:22:52 +0100
Subject: [PATCH 3/5] fixup! Merge branch 'main' into quadmask-folding
---
llvm/lib/Analysis/ConstantFolding.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 64d088ea7a46404..2771a3d574f7799 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -2425,7 +2425,6 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantFP::get(Ty->getContext(), Val);
}
-
case Intrinsic::amdgcn_s_quadmask: {
uint64_t Val = Op->getZExtValue();
uint64_t QuadMask = 0;
@@ -2436,6 +2435,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
QuadMask |= (1 << I);
}
return ConstantInt::get(Ty, QuadMask);
+ }
case Intrinsic::amdgcn_s_bitreplicate: {
uint64_t Val = Op->getZExtValue();
@@ -2446,7 +2446,6 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
Val = (Val & 0x1111111111111111ULL) | (Val & 0x2222222222222222ULL) << 1;
Val = Val | Val << 1;
return ConstantInt::get(Ty, Val);
-
}
default:
>From 667e6a5577e79c283c97400d5e0bb38fe5d32ad9 Mon Sep 17 00:00:00 2001
From: Jessica Del <Jessica.Del at amd.com>
Date: Fri, 17 Nov 2023 12:15:11 +0100
Subject: [PATCH 4/5] fixup! [AMDGPU] - Add constant folding for s_quadmask
---
.../CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll | 46 +++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
index 0f500c0999ad9a8..ab45f0b82442010 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
@@ -5,6 +5,52 @@
declare i32 @llvm.amdgcn.s.quadmask.i32(i32)
declare i64 @llvm.amdgcn.s.quadmask.i64(i64)
+define i32 @test_quadmask_constant_zero_i32() {
+; GFX11-LABEL: test_quadmask_constant_zero_i32:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 0)
+ ret i32 %qm
+}
+
+define i32 @test_quadmask_constant_neg_one_i32() {
+; GFX11-LABEL: test_quadmask_constant_neg_one_i32:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mov_b32_e32 v0, 0xff
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 -1)
+ ret i32 %qm
+}
+
+define i32 @test_quadmask_constant_undef_i32() {
+; GFX11-LABEL: test_quadmask_constant_undef_i32:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_quadmask_b32 s0, s0
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 undef)
+ ret i32 %qm
+}
+
+define i32 @test_quadmask_constant_poison_i32() {
+; GFX11-LABEL: test_quadmask_constant_poison_i32:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_quadmask_b32 s0, s0
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 poison)
+ ret i32 %qm
+}
+
define i32 @test_quadmask_constant_i32() {
; GFX11-LABEL: test_quadmask_constant_i32:
; GFX11: ; %bb.0: ; %entry
>From f2c3247239f3089fc84f55a348d2b31c7e953353 Mon Sep 17 00:00:00 2001
From: Jessica Del <Jessica.Del at amd.com>
Date: Fri, 17 Nov 2023 12:38:43 +0100
Subject: [PATCH 5/5] fixup! [AMDGPU] - Add constant folding for s_quadmask
---
.../CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll | 46 +++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
index ab45f0b82442010..d7a84743e78c5b1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
@@ -102,6 +102,52 @@ entry:
ret i64 %qm
}
+define i64 @test_quadmask_constant_zero_i64() {
+; GFX11-LABEL: test_quadmask_constant_zero_i64:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 0)
+ ret i64 %qm
+}
+
+define i64 @test_quadmask_constant_neg_one_i64() {
+; GFX11-LABEL: test_quadmask_constant_neg_one_i64:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_dual_mov_b32 v0, 0xffff :: v_dual_mov_b32 v1, 0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 -1)
+ ret i64 %qm
+}
+
+define i64 @test_quadmask_constant_undef_i64() {
+; GFX11-LABEL: test_quadmask_constant_undef_i64:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_quadmask_b64 s[0:1], s[0:1]
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 undef)
+ ret i64 %qm
+}
+
+define i64 @test_quadmask_constant_poison_i64() {
+; GFX11-LABEL: test_quadmask_constant_poison_i64:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_quadmask_b64 s[0:1], s[0:1]
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 poison)
+ ret i64 %qm
+}
+
define amdgpu_cs void @test_quadmask_sgpr_i64(i64 inreg %mask, ptr addrspace(1) %out) {
; GFX11-LABEL: test_quadmask_sgpr_i64:
; GFX11: ; %bb.0: ; %entry
More information about the flang-commits
mailing list