[llvm] b1e039f - [AMDGPU] - Add constant folding for s_quadmask (#72381)

via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 17 06:24:27 PST 2023


Author: Jessica Del
Date: 2023-11-17T15:24:23+01:00
New Revision: b1e039f3b757dbcf4928e075e2120070e9d92a85

URL: https://github.com/llvm/llvm-project/commit/b1e039f3b757dbcf4928e075e2120070e9d92a85
DIFF: https://github.com/llvm/llvm-project/commit/b1e039f3b757dbcf4928e075e2120070e9d92a85.diff

LOG: [AMDGPU] - Add constant folding for s_quadmask (#72381)

If the input is a constant we can constant fold the `s_quadmask`
intrinsic.

Added: 
    

Modified: 
    llvm/lib/Analysis/ConstantFolding.cpp
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 5b385e9df24183f..2771a3d574f7799 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1533,6 +1533,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::amdgcn_perm:
   case Intrinsic::amdgcn_wave_reduce_umin:
   case Intrinsic::amdgcn_wave_reduce_umax:
+  case Intrinsic::amdgcn_s_quadmask:
   case Intrinsic::amdgcn_s_bitreplicate:
   case Intrinsic::arm_mve_vctp8:
   case Intrinsic::arm_mve_vctp16:
@@ -2424,6 +2425,18 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
       return ConstantFP::get(Ty->getContext(), Val);
     }
 
+    case Intrinsic::amdgcn_s_quadmask: {
+      uint64_t Val = Op->getZExtValue();
+      uint64_t QuadMask = 0;
+      for (unsigned I = 0; I < Op->getBitWidth() / 4; ++I, Val >>= 4) {
+        if (!(Val & 0xF))
+          continue;
+
+        QuadMask |= (1 << I);
+      }
+      return ConstantInt::get(Ty, QuadMask);
+    }
+
     case Intrinsic::amdgcn_s_bitreplicate: {
       uint64_t Val = Op->getZExtValue();
       Val = (Val & 0x000000000000FFFFULL) | (Val & 0x00000000FFFF0000ULL) << 16;

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
index 65443a6efa789d9..d7a84743e78c5b1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
@@ -5,15 +5,60 @@
 declare i32 @llvm.amdgcn.s.quadmask.i32(i32)
 declare i64 @llvm.amdgcn.s.quadmask.i64(i64)
 
+define i32 @test_quadmask_constant_zero_i32() {
+; GFX11-LABEL: test_quadmask_constant_zero_i32:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 0)
+  ret i32 %qm
+}
+
+define i32 @test_quadmask_constant_neg_one_i32() {
+; GFX11-LABEL: test_quadmask_constant_neg_one_i32:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0xff
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 -1)
+  ret i32 %qm
+}
+
+define i32 @test_quadmask_constant_undef_i32() {
+; GFX11-LABEL: test_quadmask_constant_undef_i32:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_quadmask_b32 s0, s0
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 undef)
+  ret i32 %qm
+}
+
+define i32 @test_quadmask_constant_poison_i32() {
+; GFX11-LABEL: test_quadmask_constant_poison_i32:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_quadmask_b32 s0, s0
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 poison)
+  ret i32 %qm
+}
+
 define i32 @test_quadmask_constant_i32() {
 ; GFX11-LABEL: test_quadmask_constant_i32:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    s_quadmask_b32 s0, 0x85fe3a92
-; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0xcb
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 u0x85FE3A92)
+  %qm = call i32 @llvm.amdgcn.s.quadmask.i32(i32 u0x85003092)
   ret i32 %qm
 }
 
@@ -50,13 +95,56 @@ define i64 @test_quadmask_constant_i64() {
 ; GFX11-LABEL: test_quadmask_constant_i64:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s0, 0x85fe3a92
-; GFX11-NEXT:    s_mov_b32 s1, 0x67de48fc
+; GFX11-NEXT:    v_dual_mov_b32 v0, 0xe3e6 :: v_dual_mov_b32 v1, 0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 u0x67D000FC85F00A90)
+  ret i64 %qm
+}
+
+define i64 @test_quadmask_constant_zero_i64() {
+; GFX11-LABEL: test_quadmask_constant_zero_i64:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 0)
+  ret i64 %qm
+}
+
+define i64 @test_quadmask_constant_neg_one_i64() {
+; GFX11-LABEL: test_quadmask_constant_neg_one_i64:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_dual_mov_b32 v0, 0xffff :: v_dual_mov_b32 v1, 0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 -1)
+  ret i64 %qm
+}
+
+define i64 @test_quadmask_constant_undef_i64() {
+; GFX11-LABEL: test_quadmask_constant_undef_i64:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_quadmask_b64 s[0:1], s[0:1]
+; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 undef)
+  ret i64 %qm
+}
+
+define i64 @test_quadmask_constant_poison_i64() {
+; GFX11-LABEL: test_quadmask_constant_poison_i64:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_quadmask_b64 s[0:1], s[0:1]
 ; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 u0x67DE48FC85FE3A92)
+  %qm = call i64 @llvm.amdgcn.s.quadmask.i64(i64 poison)
   ret i64 %qm
 }
 


        


More information about the llvm-commits mailing list