[llvm] 22d295f - [AMDGPU] Constant fold Intrinsic::amdgcn_perm

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Mon May 10 16:39:13 PDT 2021


Author: Stanislav Mekhanoshin
Date: 2021-05-10T16:23:11-07:00
New Revision: 22d295f6953c07129837703c811fdda83775e75e

URL: https://github.com/llvm/llvm-project/commit/22d295f6953c07129837703c811fdda83775e75e
DIFF: https://github.com/llvm/llvm-project/commit/22d295f6953c07129837703c811fdda83775e75e.diff

LOG: [AMDGPU] Constant fold Intrinsic::amdgcn_perm

Differential Revision: https://reviews.llvm.org/D102203

Added: 
    llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/perm.ll

Modified: 
    llvm/lib/Analysis/ConstantFolding.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index d53c02fe34c39..0b97b823ce048 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1470,6 +1470,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::vector_reduce_umin:
   case Intrinsic::vector_reduce_umax:
   // Target intrinsics
+  case Intrinsic::amdgcn_perm:
   case Intrinsic::arm_mve_vctp8:
   case Intrinsic::arm_mve_vctp16:
   case Intrinsic::arm_mve_vctp32:
@@ -2702,6 +2703,46 @@ static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID,
   }
 }
 
+static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef<Constant *> Operands,
+                                                 Type *Ty) {
+  const APInt *C0, *C1, *C2;
+  if (!getConstIntOrUndef(Operands[0], C0) ||
+      !getConstIntOrUndef(Operands[1], C1) ||
+      !getConstIntOrUndef(Operands[2], C2))
+    return nullptr;
+
+  if (!C2)
+    return UndefValue::get(Ty);
+
+  APInt Val(32, 0);
+  unsigned NumUndefBytes = 0;
+  for (unsigned I = 0; I < 32; I += 8) {
+    unsigned Sel = C2->extractBitsAsZExtValue(8, I);
+    unsigned B = 0;
+
+    if (Sel >= 13)
+      B = 0xff;
+    else if (Sel == 12)
+      B = 0x00;
+    else {
+      const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1;
+      if (!Src)
+        ++NumUndefBytes;
+      else if (Sel < 8)
+        B = Src->extractBitsAsZExtValue(8, (Sel & 3) * 8);
+      else
+        B = Src->extractBitsAsZExtValue(1, (Sel & 1) ? 31 : 15) * 0xff;
+    }
+
+    Val.insertBits(B, I, 8);
+  }
+
+  if (NumUndefBytes == 4)
+    return UndefValue::get(Ty);
+
+  return ConstantInt::get(Ty, Val);
+}
+
 static Constant *ConstantFoldScalarCall3(StringRef Name,
                                          Intrinsic::ID IntrinsicID,
                                          Type *Ty,
@@ -2817,6 +2858,9 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
     return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt));
   }
 
+  if (IntrinsicID == Intrinsic::amdgcn_perm)
+    return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty);
+
   return nullptr;
 }
 

diff  --git a/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/perm.ll b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/perm.ll
new file mode 100644
index 0000000000000..7483b54cbb297
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/perm.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+declare i32 @llvm.amdgcn.perm(i32, i32, i32)
+
+; src1 = 0x19203a4b (421542475), src2 = 0x5c6d7e8f (1550679695)
+define void @test(i32* %p) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    store volatile i32 undef, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    store volatile i32 -1887539876, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 2121096267, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 1262100505, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 1550679695, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 421542475, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 545143439, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 16711935, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 16711935, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 436174336, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 16711680, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 -1, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 -1, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 -1, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 undef, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 421542475, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 1550679695, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 undef, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 143, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 0, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 255, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 1550679552, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 75, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 0, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 255, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 65535, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 421542400, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 -16776961, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 255, i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 -16777216, i32* [[P]], align 4
+; CHECK-NEXT:    ret void
+;
+  %s1s2_u = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 undef)
+  store volatile i32 %s1s2_u, i32* %p
+  %s1s2_0x00010203 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 66051)
+  store volatile i32 %s1s2_0x00010203, i32* %p
+  %s1s2_0x01020304 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 16909060)
+  store volatile i32 %s1s2_0x01020304, i32* %p
+  %s1s2_0x04050607 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 67438087)
+  store volatile i32 %s1s2_0x04050607, i32* %p
+  %s1s2_0x03020100 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 50462976)
+  store volatile i32 %s1s2_0x03020100, i32* %p
+  %s1s2_0x07060504 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 117835012)
+  store volatile i32 %s1s2_0x07060504, i32* %p
+  %s1s2_0x06010500 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 100730112)
+  store volatile i32 %s1s2_0x06010500, i32* %p
+  %s1s2_0x0c0f0c0f = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 202312719)
+  store volatile i32 %s1s2_0x0c0f0c0f, i32* %p
+  %u1u2_0x0c0f0c0f = call i32 @llvm.amdgcn.perm(i32 undef, i32 undef, i32 202312719)
+  store volatile i32 %u1u2_0x0c0f0c0f, i32* %p
+  %s1s2_0x070d010c = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 118292748)
+  store volatile i32 %s1s2_0x070d010c, i32* %p
+  %u1u2_0x070d010c = call i32 @llvm.amdgcn.perm(i32 undef, i32 undef, i32 118292748)
+  store volatile i32 %u1u2_0x070d010c, i32* %p
+  %s1s2_0x80818283 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 2155971203)
+  store volatile i32 %s1s2_0x80818283, i32* %p
+  %u1u2_0x80818283 = call i32 @llvm.amdgcn.perm(i32 undef, i32 undef, i32 2155971203)
+  store volatile i32 %u1u2_0x80818283, i32* %p
+  %u1u2_0x0e0e0e0e = call i32 @llvm.amdgcn.perm(i32 undef, i32 undef, i32 235802126)
+  store volatile i32 %u1u2_0x0e0e0e0e, i32* %p
+  %u1s2_0x07060504 = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 117835012)
+  store volatile i32 %u1s2_0x07060504, i32* %p
+  %s1u2_0x07060504 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 117835012)
+  store volatile i32 %s1u2_0x07060504, i32* %p
+  %u1s2_0x03020100 = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 50462976)
+  store volatile i32 %u1s2_0x03020100, i32* %p
+  %s1u2_0x03020100 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 50462976)
+  store volatile i32 %s1u2_0x03020100, i32* %p
+  %u1s2_0x07060500 = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 117835008)
+  store volatile i32 %u1s2_0x07060500, i32* %p
+  %u1s2_0x0706050c = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 117835020)
+  store volatile i32 %u1s2_0x0706050c, i32* %p
+  %u1s2_0x0706050d = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 117835021)
+  store volatile i32 %u1s2_0x0706050d, i32* %p
+  %u1s2_0x03020104 = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 50462980)
+  store volatile i32 %u1s2_0x03020104, i32* %p
+  %s1u2_0x03020104 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 50462980)
+  store volatile i32 %s1u2_0x03020104, i32* %p
+  %s1u2_0x0302010c = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 50462988)
+  store volatile i32 %s1u2_0x0302010c, i32* %p
+  %s1u2_0x0302010e = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 50462990)
+  store volatile i32 %s1u2_0x0302010e, i32* %p
+  %s1u2_0x03020f0e = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 50466574)
+  store volatile i32 %s1u2_0x03020f0e, i32* %p
+  %s1u2_0x07060500 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 117835008)
+  store volatile i32 %s1u2_0x07060500, i32* %p
+  %_0x81000100_0x01008100_0x0b0a0908 = call i32 @llvm.amdgcn.perm(i32 2164261120, i32 16810240, i32 185207048)
+  store volatile i32 %_0x81000100_0x01008100_0x0b0a0908, i32* %p
+  %_u1_0x01008100_0x0b0a0908 = call i32 @llvm.amdgcn.perm(i32 undef, i32 16810240, i32 185207048)
+  store volatile i32 %_u1_0x01008100_0x0b0a0908, i32* %p
+  %_0x81000100_u2_0x0b0a0908 = call i32 @llvm.amdgcn.perm(i32 2164261120, i32 undef, i32 185207048)
+  store volatile i32 %_0x81000100_u2_0x0b0a0908, i32* %p
+  ret void
+}


        


More information about the llvm-commits mailing list