[llvm] [AMDGPU] - Add constant folding for s_bitreplicate (PR #72366)
Jessica Del via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 15 08:52:16 PST 2023
https://github.com/OutOfCache updated https://github.com/llvm/llvm-project/pull/72366
>From 44b4463d42b206f69114859fb04a5caeadcd3312 Mon Sep 17 00:00:00 2001
From: Jessica Del <Jessica.Del at amd.com>
Date: Tue, 14 Nov 2023 21:44:56 +0100
Subject: [PATCH 1/4] [AMDGPU] - Add constant folding for s_bitreplicate
If the input is a constant, we can constant fold the s_bitreplicate
operation.
---
llvm/lib/Analysis/ConstantFolding.cpp | 18 ++++++++++++++++++
.../CodeGen/AMDGPU/llvm.amdgcn.bitreplicate.ll | 4 ++--
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 966a65ac26b8017..e401efa6c67c2da 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1533,6 +1533,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::amdgcn_perm:
case Intrinsic::amdgcn_wave_reduce_umin:
case Intrinsic::amdgcn_wave_reduce_umax:
+ case Intrinsic::amdgcn_s_bitreplicate:
case Intrinsic::arm_mve_vctp8:
case Intrinsic::arm_mve_vctp16:
case Intrinsic::arm_mve_vctp32:
@@ -2422,6 +2423,23 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantFP::get(Ty->getContext(), Val);
}
+
+ case Intrinsic::amdgcn_s_bitreplicate: {
+ uint64_t Val = Op->getZExtValue();
+ uint64_t ReplicatedVal = 0;
+ uint64_t ReplicatedOnes = 0b11;
+ // Input operand is always b32
+ for (unsigned i = 0; i < 32; ++i, ReplicatedOnes <<= 2, Val >>= 1) {
+ uint64_t Bit = Val & 1;
+
+ if (!Bit)
+ continue;
+
+ ReplicatedVal |= ReplicatedOnes;
+ }
+ return ConstantInt::get(Ty, ReplicatedVal);
+ }
+
default:
return nullptr;
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitreplicate.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitreplicate.ll
index 027c9ef5e7cc349..0937280e6352c80 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitreplicate.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitreplicate.ll
@@ -8,8 +8,8 @@ define i64 @test_s_bitreplicate_constant() {
; GFX11-LABEL: test_s_bitreplicate_constant:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_bitreplicate_b64_b32 s[0:1], 0x85fe3a92
-; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: v_mov_b32_e32 v0, 0xfccc30c
+; GFX11-NEXT: v_mov_b32_e32 v1, 0xc033fffc
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%br = call i64 @llvm.amdgcn.s.bitreplicate(i32 u0x85FE3A92)
>From 22a11897a9e28de46ac7f25acde940bb1759b4c4 Mon Sep 17 00:00:00 2001
From: Jessica Del <Jessica.Del at amd.com>
Date: Wed, 15 Nov 2023 11:25:11 +0100
Subject: [PATCH 2/4] fixup! [AMDGPU] - Add constant folding for s_bitreplicate
---
llvm/lib/Analysis/ConstantFolding.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index e401efa6c67c2da..7521e45fe27bd30 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -2430,9 +2430,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
uint64_t ReplicatedOnes = 0b11;
// Input operand is always b32
for (unsigned i = 0; i < 32; ++i, ReplicatedOnes <<= 2, Val >>= 1) {
- uint64_t Bit = Val & 1;
-
- if (!Bit)
+ if (!(Val & 1))
continue;
ReplicatedVal |= ReplicatedOnes;
>From 8c69df448cd1ce23dbce678627c22c1a6f0d9573 Mon Sep 17 00:00:00 2001
From: Jessica Del <Jessica.Del at amd.com>
Date: Wed, 15 Nov 2023 16:42:29 +0100
Subject: [PATCH 3/4] fixup! [AMDGPU] - Add constant folding for s_bitreplicate
---
llvm/lib/Analysis/ConstantFolding.cpp | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 7521e45fe27bd30..3c0a45c8964f25b 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -2426,16 +2426,13 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
case Intrinsic::amdgcn_s_bitreplicate: {
uint64_t Val = Op->getZExtValue();
- uint64_t ReplicatedVal = 0;
- uint64_t ReplicatedOnes = 0b11;
- // Input operand is always b32
- for (unsigned i = 0; i < 32; ++i, ReplicatedOnes <<= 2, Val >>= 1) {
- if (!(Val & 1))
- continue;
-
- ReplicatedVal |= ReplicatedOnes;
- }
- return ConstantInt::get(Ty, ReplicatedVal);
+ Val = (Val & 0x000000000000FFFF) | (Val & 0x00000000FFFF0000) << 16;
+ Val = (Val & 0x000000FF000000FF) | (Val & 0x0000FF000000FF00) << 8;
+ Val = (Val & 0x000F000F000F000F) | (Val & 0x00F000F000F000F0) << 4;
+ Val = (Val & 0x0303030303030303) | (Val & 0x0C0C0C0C0C0C0C0C) << 2;
+ Val = (Val & 0x5555555555555555) | (Val & 0xAAAAAAAAAAAAAAAA) << 1;
+ Val = Val | Val << 1;
+ return ConstantInt::get(Ty, Val);
}
default:
>From a6e5a613e535bd467880a911905a172fab539036 Mon Sep 17 00:00:00 2001
From: Jessica Del <Jessica.Del at amd.com>
Date: Wed, 15 Nov 2023 17:50:57 +0100
Subject: [PATCH 4/4] fixup! [AMDGPU] - Add constant folding for s_bitreplicate
---
llvm/lib/Analysis/ConstantFolding.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 3c0a45c8964f25b..5b385e9df24183f 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -2426,11 +2426,11 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
case Intrinsic::amdgcn_s_bitreplicate: {
uint64_t Val = Op->getZExtValue();
- Val = (Val & 0x000000000000FFFF) | (Val & 0x00000000FFFF0000) << 16;
- Val = (Val & 0x000000FF000000FF) | (Val & 0x0000FF000000FF00) << 8;
- Val = (Val & 0x000F000F000F000F) | (Val & 0x00F000F000F000F0) << 4;
- Val = (Val & 0x0303030303030303) | (Val & 0x0C0C0C0C0C0C0C0C) << 2;
- Val = (Val & 0x5555555555555555) | (Val & 0xAAAAAAAAAAAAAAAA) << 1;
+ Val = (Val & 0x000000000000FFFFULL) | (Val & 0x00000000FFFF0000ULL) << 16;
+ Val = (Val & 0x000000FF000000FFULL) | (Val & 0x0000FF000000FF00ULL) << 8;
+ Val = (Val & 0x000F000F000F000FULL) | (Val & 0x00F000F000F000F0ULL) << 4;
+ Val = (Val & 0x0303030303030303ULL) | (Val & 0x0C0C0C0C0C0C0C0CULL) << 2;
+ Val = (Val & 0x1111111111111111ULL) | (Val & 0x2222222222222222ULL) << 1;
Val = Val | Val << 1;
return ConstantInt::get(Ty, Val);
}
More information about the llvm-commits
mailing list