[llvm] 22d295f - [AMDGPU] Constant fold Intrinsic::amdgcn_perm
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Mon May 10 16:39:13 PDT 2021
Author: Stanislav Mekhanoshin
Date: 2021-05-10T16:23:11-07:00
New Revision: 22d295f6953c07129837703c811fdda83775e75e
URL: https://github.com/llvm/llvm-project/commit/22d295f6953c07129837703c811fdda83775e75e
DIFF: https://github.com/llvm/llvm-project/commit/22d295f6953c07129837703c811fdda83775e75e.diff
LOG: [AMDGPU] Constant fold Intrinsic::amdgcn_perm
Differential Revision: https://reviews.llvm.org/D102203
Added:
llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/perm.ll
Modified:
llvm/lib/Analysis/ConstantFolding.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index d53c02fe34c39..0b97b823ce048 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1470,6 +1470,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::vector_reduce_umin:
case Intrinsic::vector_reduce_umax:
// Target intrinsics
+ case Intrinsic::amdgcn_perm:
case Intrinsic::arm_mve_vctp8:
case Intrinsic::arm_mve_vctp16:
case Intrinsic::arm_mve_vctp32:
@@ -2702,6 +2703,46 @@ static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID,
}
}
+static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef<Constant *> Operands,
+ Type *Ty) {
+ const APInt *C0, *C1, *C2;
+ if (!getConstIntOrUndef(Operands[0], C0) ||
+ !getConstIntOrUndef(Operands[1], C1) ||
+ !getConstIntOrUndef(Operands[2], C2))
+ return nullptr;
+
+ if (!C2)
+ return UndefValue::get(Ty);
+
+ APInt Val(32, 0);
+ unsigned NumUndefBytes = 0;
+ for (unsigned I = 0; I < 32; I += 8) {
+ unsigned Sel = C2->extractBitsAsZExtValue(8, I);
+ unsigned B = 0;
+
+ if (Sel >= 13)
+ B = 0xff;
+ else if (Sel == 12)
+ B = 0x00;
+ else {
+ const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1;
+ if (!Src)
+ ++NumUndefBytes;
+ else if (Sel < 8)
+ B = Src->extractBitsAsZExtValue(8, (Sel & 3) * 8);
+ else
+ B = Src->extractBitsAsZExtValue(1, (Sel & 1) ? 31 : 15) * 0xff;
+ }
+
+ Val.insertBits(B, I, 8);
+ }
+
+ if (NumUndefBytes == 4)
+ return UndefValue::get(Ty);
+
+ return ConstantInt::get(Ty, Val);
+}
+
static Constant *ConstantFoldScalarCall3(StringRef Name,
Intrinsic::ID IntrinsicID,
Type *Ty,
@@ -2817,6 +2858,9 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt));
}
+ if (IntrinsicID == Intrinsic::amdgcn_perm)
+ return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty);
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/perm.ll b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/perm.ll
new file mode 100644
index 0000000000000..7483b54cbb297
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/perm.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+declare i32 @llvm.amdgcn.perm(i32, i32, i32)
+
+; src1 = 0x19203a4b (421542475), src2 = 0x5c6d7e8f (1550679695)
+define void @test(i32* %p) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: store volatile i32 undef, i32* [[P:%.*]], align 4
+; CHECK-NEXT: store volatile i32 -1887539876, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 2121096267, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 1262100505, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 1550679695, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 421542475, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 545143439, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 16711935, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 16711935, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 436174336, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 16711680, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 undef, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 421542475, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 1550679695, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 undef, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 143, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 255, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 1550679552, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 75, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 255, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 65535, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 421542400, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 -16776961, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 255, i32* [[P]], align 4
+; CHECK-NEXT: store volatile i32 -16777216, i32* [[P]], align 4
+; CHECK-NEXT: ret void
+;
+ %s1s2_u = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 undef)
+ store volatile i32 %s1s2_u, i32* %p
+ %s1s2_0x00010203 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 66051)
+ store volatile i32 %s1s2_0x00010203, i32* %p
+ %s1s2_0x01020304 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 16909060)
+ store volatile i32 %s1s2_0x01020304, i32* %p
+ %s1s2_0x04050607 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 67438087)
+ store volatile i32 %s1s2_0x04050607, i32* %p
+ %s1s2_0x03020100 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 50462976)
+ store volatile i32 %s1s2_0x03020100, i32* %p
+ %s1s2_0x07060504 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 117835012)
+ store volatile i32 %s1s2_0x07060504, i32* %p
+ %s1s2_0x06010500 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 100730112)
+ store volatile i32 %s1s2_0x06010500, i32* %p
+ %s1s2_0x0c0f0c0f = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 202312719)
+ store volatile i32 %s1s2_0x0c0f0c0f, i32* %p
+ %u1u2_0x0c0f0c0f = call i32 @llvm.amdgcn.perm(i32 undef, i32 undef, i32 202312719)
+ store volatile i32 %u1u2_0x0c0f0c0f, i32* %p
+ %s1s2_0x070d010c = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 118292748)
+ store volatile i32 %s1s2_0x070d010c, i32* %p
+ %u1u2_0x070d010c = call i32 @llvm.amdgcn.perm(i32 undef, i32 undef, i32 118292748)
+ store volatile i32 %u1u2_0x070d010c, i32* %p
+ %s1s2_0x80818283 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 1550679695, i32 2155971203)
+ store volatile i32 %s1s2_0x80818283, i32* %p
+ %u1u2_0x80818283 = call i32 @llvm.amdgcn.perm(i32 undef, i32 undef, i32 2155971203)
+ store volatile i32 %u1u2_0x80818283, i32* %p
+ %u1u2_0x0e0e0e0e = call i32 @llvm.amdgcn.perm(i32 undef, i32 undef, i32 235802126)
+ store volatile i32 %u1u2_0x0e0e0e0e, i32* %p
+ %u1s2_0x07060504 = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 117835012)
+ store volatile i32 %u1s2_0x07060504, i32* %p
+ %s1u2_0x07060504 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 117835012)
+ store volatile i32 %s1u2_0x07060504, i32* %p
+ %u1s2_0x03020100 = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 50462976)
+ store volatile i32 %u1s2_0x03020100, i32* %p
+ %s1u2_0x03020100 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 50462976)
+ store volatile i32 %s1u2_0x03020100, i32* %p
+ %u1s2_0x07060500 = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 117835008)
+ store volatile i32 %u1s2_0x07060500, i32* %p
+ %u1s2_0x0706050c = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 117835020)
+ store volatile i32 %u1s2_0x0706050c, i32* %p
+ %u1s2_0x0706050d = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 117835021)
+ store volatile i32 %u1s2_0x0706050d, i32* %p
+ %u1s2_0x03020104 = call i32 @llvm.amdgcn.perm(i32 undef, i32 1550679695, i32 50462980)
+ store volatile i32 %u1s2_0x03020104, i32* %p
+ %s1u2_0x03020104 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 50462980)
+ store volatile i32 %s1u2_0x03020104, i32* %p
+ %s1u2_0x0302010c = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 50462988)
+ store volatile i32 %s1u2_0x0302010c, i32* %p
+ %s1u2_0x0302010e = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 50462990)
+ store volatile i32 %s1u2_0x0302010e, i32* %p
+ %s1u2_0x03020f0e = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 50466574)
+ store volatile i32 %s1u2_0x03020f0e, i32* %p
+ %s1u2_0x07060500 = call i32 @llvm.amdgcn.perm(i32 421542475, i32 undef, i32 117835008)
+ store volatile i32 %s1u2_0x07060500, i32* %p
+ %_0x81000100_0x01008100_0x0b0a0908 = call i32 @llvm.amdgcn.perm(i32 2164261120, i32 16810240, i32 185207048)
+ store volatile i32 %_0x81000100_0x01008100_0x0b0a0908, i32* %p
+ %_u1_0x01008100_0x0b0a0908 = call i32 @llvm.amdgcn.perm(i32 undef, i32 16810240, i32 185207048)
+ store volatile i32 %_u1_0x01008100_0x0b0a0908, i32* %p
+ %_0x81000100_u2_0x0b0a0908 = call i32 @llvm.amdgcn.perm(i32 2164261120, i32 undef, i32 185207048)
+ store volatile i32 %_0x81000100_u2_0x0b0a0908, i32* %p
+ ret void
+}
More information about the llvm-commits
mailing list