[llvm] 7d3aace - AMDGPU: Add flag to control mem intrinsic expansion

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 3 14:26:22 PST 2020


Author: Matt Arsenault
Date: 2020-02-03T14:26:01-08:00
New Revision: 7d3aace3f52f6b3f87aac432aa41ae1cdeb348eb

URL: https://github.com/llvm/llvm-project/commit/7d3aace3f52f6b3f87aac432aa41ae1cdeb348eb
DIFF: https://github.com/llvm/llvm-project/commit/7d3aace3f52f6b3f87aac432aa41ae1cdeb348eb.diff

LOG: AMDGPU: Add flag to control mem intrinsic expansion

GlobalISel doesn't implement the expansion for these yet, so add a
flag to force expanding these so it's possible to avoid these for a
while.

Added: 
    llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
index 15032969890e..54c15e4e4d39 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -22,7 +22,15 @@ using namespace llvm;
 
 namespace {
 
-const unsigned MaxStaticSize = 1024;
+static int MaxStaticSize;
+
+static cl::opt<int, true> MemIntrinsicExpandSizeThresholdOpt(
+  "amdgpu-mem-intrinsic-expand-size",
+  cl::desc("Set minimum mem intrinsic size to expand in IR"),
+  cl::location(MaxStaticSize),
+  cl::init(1024),
+  cl::Hidden);
+
 
 class AMDGPULowerIntrinsics : public ModulePass {
 private:
@@ -57,7 +65,7 @@ INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, "Lower intrinsics", false,
 // require splitting based on alignment)
 static bool shouldExpandOperationWithSize(Value *Size) {
   ConstantInt *CI = dyn_cast<ConstantInt>(Size);
-  return !CI || (CI->getZExtValue() > MaxStaticSize);
+  return !CI || (CI->getSExtValue() > MaxStaticSize);
 }
 
 bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function &F) {

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll
new file mode 100644
index 000000000000..a83715d90a38
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=8 %s | FileCheck -check-prefix=OPT8 %s
+; RUN: opt -S -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=4 %s | FileCheck -check-prefix=OPT4 %s
+; RUN: opt -S -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=0 %s | FileCheck -check-prefix=OPT0 %s
+; RUN: opt -S -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=-1 %s | FileCheck -check-prefix=OPT_NEG %s
+
+; Test the -amdgpu-mem-intrinsic-expand-size flag works.
+
+; Make sure we can always eliminate the intrinsic, even at 0.
+define amdgpu_kernel void @memset_size_0(i8 addrspace(1)* %dst, i8 %val) {
+; OPT8-LABEL: @memset_size_0(
+; OPT8-NEXT:    call void @llvm.memset.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 [[VAL:%.*]], i64 0, i1 false)
+; OPT8-NEXT:    ret void
+;
+; OPT4-LABEL: @memset_size_0(
+; OPT4-NEXT:    call void @llvm.memset.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 [[VAL:%.*]], i64 0, i1 false)
+; OPT4-NEXT:    ret void
+;
+; OPT0-LABEL: @memset_size_0(
+; OPT0-NEXT:    call void @llvm.memset.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 [[VAL:%.*]], i64 0, i1 false)
+; OPT0-NEXT:    ret void
+;
+; OPT_NEG-LABEL: @memset_size_0(
+; OPT_NEG-NEXT:    br i1 true, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]]
+; OPT_NEG:       loadstoreloop:
+; OPT_NEG-NEXT:    [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ]
+; OPT_NEG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]]
+; OPT_NEG-NEXT:    store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]]
+; OPT_NEG-NEXT:    [[TMP3]] = add i64 [[TMP1]], 1
+; OPT_NEG-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 0
+; OPT_NEG-NEXT:    br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]]
+; OPT_NEG:       split:
+; OPT_NEG-NEXT:    ret void
+;
+  call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 0, i1 false)
+  ret void
+}
+
+define amdgpu_kernel void @memset_size_4(i8 addrspace(1)* %dst, i8 %val) {
+; OPT8-LABEL: @memset_size_4(
+; OPT8-NEXT:    call void @llvm.memset.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 [[VAL:%.*]], i64 4, i1 false)
+; OPT8-NEXT:    ret void
+;
+; OPT4-LABEL: @memset_size_4(
+; OPT4-NEXT:    call void @llvm.memset.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 [[VAL:%.*]], i64 4, i1 false)
+; OPT4-NEXT:    ret void
+;
+; OPT0-LABEL: @memset_size_4(
+; OPT0-NEXT:    br i1 false, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]]
+; OPT0:       loadstoreloop:
+; OPT0-NEXT:    [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ]
+; OPT0-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]]
+; OPT0-NEXT:    store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]]
+; OPT0-NEXT:    [[TMP3]] = add i64 [[TMP1]], 1
+; OPT0-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 4
+; OPT0-NEXT:    br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]]
+; OPT0:       split:
+; OPT0-NEXT:    ret void
+;
+; OPT_NEG-LABEL: @memset_size_4(
+; OPT_NEG-NEXT:    br i1 false, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]]
+; OPT_NEG:       loadstoreloop:
+; OPT_NEG-NEXT:    [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ]
+; OPT_NEG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]]
+; OPT_NEG-NEXT:    store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]]
+; OPT_NEG-NEXT:    [[TMP3]] = add i64 [[TMP1]], 1
+; OPT_NEG-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 4
+; OPT_NEG-NEXT:    br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]]
+; OPT_NEG:       split:
+; OPT_NEG-NEXT:    ret void
+;
+  call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 4, i1 false)
+  ret void
+}
+
+define amdgpu_kernel void @memset_size_8(i8 addrspace(1)* %dst, i8 %val) {
+; OPT8-LABEL: @memset_size_8(
+; OPT8-NEXT:    call void @llvm.memset.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 [[VAL:%.*]], i64 8, i1 false)
+; OPT8-NEXT:    ret void
+;
+; OPT4-LABEL: @memset_size_8(
+; OPT4-NEXT:    br i1 false, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]]
+; OPT4:       loadstoreloop:
+; OPT4-NEXT:    [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ]
+; OPT4-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]]
+; OPT4-NEXT:    store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]]
+; OPT4-NEXT:    [[TMP3]] = add i64 [[TMP1]], 1
+; OPT4-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 8
+; OPT4-NEXT:    br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]]
+; OPT4:       split:
+; OPT4-NEXT:    ret void
+;
+; OPT0-LABEL: @memset_size_8(
+; OPT0-NEXT:    br i1 false, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]]
+; OPT0:       loadstoreloop:
+; OPT0-NEXT:    [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ]
+; OPT0-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]]
+; OPT0-NEXT:    store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]]
+; OPT0-NEXT:    [[TMP3]] = add i64 [[TMP1]], 1
+; OPT0-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 8
+; OPT0-NEXT:    br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]]
+; OPT0:       split:
+; OPT0-NEXT:    ret void
+;
+; OPT_NEG-LABEL: @memset_size_8(
+; OPT_NEG-NEXT:    br i1 false, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]]
+; OPT_NEG:       loadstoreloop:
+; OPT_NEG-NEXT:    [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ]
+; OPT_NEG-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]]
+; OPT_NEG-NEXT:    store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]]
+; OPT_NEG-NEXT:    [[TMP3]] = add i64 [[TMP1]], 1
+; OPT_NEG-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 8
+; OPT_NEG-NEXT:    br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]]
+; OPT_NEG:       split:
+; OPT_NEG-NEXT:    ret void
+;
+  call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 8, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture writeonly, i8, i64, i1 immarg) #0
+
+attributes #0 = { argmemonly nounwind willreturn writeonly }


        


More information about the llvm-commits mailing list