[PATCH] D57681: [InstCombine] Cleanup the TFE/LWE check in AMDGPU SimplifyDemanded
Nicolai Hähnle via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 4 05:03:59 PST 2019
nhaehnle created this revision.
nhaehnle added reviewers: hliao, dstuttard, arsenm.
Herald added subscribers: t-tye, tpr, yaxunl, wdng, jvesely, kzhuravl.
Herald added a project: LLVM.
nhaehnle added a reviewer: msearles.
The fix added in r352904 is not quite correct, or rather misleading:
1. When the texfailctrl (TFC) argument was non-constant, the fix assumed non-TFE/LWE, which is incorrect.
2. Regardless, this code path cannot even be hit for correct TFE/LWE-enabled calls, because those return a struct. Added a test case for those for completeness.
Change-Id: I92d314dbc67a2670f6d7adaab765ef45f56a49cf
Repository:
rL LLVM
https://reviews.llvm.org/D57681
Files:
lib/Transforms/InstCombine/InstCombineInternal.h
lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
Index: test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
===================================================================
--- test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
+++ test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
@@ -2404,6 +2404,21 @@
declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
declare <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
+; --------------------------------------------------------------------
+; TFE / LWE
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_tfe_image_load_1d_v4f32i32_i32(
+; CHECK-NEXT: %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
+define amdgpu_ps float @extract_elt0_tfe_image_load_1d_v4f32i32_i32(i32 %s, <8 x i32> inreg %rsrc) #0 {
+ %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
+ %rgba = extractvalue { <4 x float>, i32 } %data, 0
+ %elt0 = extractelement <4 x float> %rgba, i32 0
+ ret float %elt0
+}
+
+declare {<4 x float>, i32} @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32, i32, <8 x i32>, i32, i32) #1
+
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
Index: lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -966,25 +966,16 @@
}
/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
+///
+/// Note: This only supports non-TFE/LWE image intrinsic calls; those have
+/// struct returns.
Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
APInt DemandedElts,
- int DMaskIdx,
- int TFCIdx) {
+ int DMaskIdx) {
unsigned VWidth = II->getType()->getVectorNumElements();
if (VWidth == 1)
return nullptr;
- // Need to change to new instruction format
- bool TFELWEEnabled = false;
- if (TFCIdx > 0) {
- if (ConstantInt *TFC = dyn_cast<ConstantInt>(II->getArgOperand(TFCIdx)))
- TFELWEEnabled = TFC->getZExtValue() & 0x1 // TFE
- || TFC->getZExtValue() & 0x2; // LWE
- }
-
- if (TFELWEEnabled)
- return nullptr; // TFE not yet supported
-
ConstantInt *NewDMask = nullptr;
if (DMaskIdx < 0) {
@@ -1648,9 +1639,15 @@
case Intrinsic::amdgcn_struct_buffer_load_format:
return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts);
default: {
- if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID()))
- return simplifyAMDGCNMemoryIntrinsicDemanded(
- II, DemandedElts, 0, II->getNumArgOperands() - 2);
+ if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID())) {
+ LLVM_DEBUG(
+ Value *TFC = II->getArgOperand(II->getNumOperands() - 2);
+ assert(!isa<ConstantInt>(TFC) ||
+ dyn_cast<ConstantInt>(TFC)->getZExtValue() == 0);
+ );
+
+ return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0);
+ }
break;
}
Index: lib/Transforms/InstCombine/InstCombineInternal.h
===================================================================
--- lib/Transforms/InstCombine/InstCombineInternal.h
+++ lib/Transforms/InstCombine/InstCombineInternal.h
@@ -800,8 +800,7 @@
Value *simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
APInt DemandedElts,
- int DmaskIdx = -1,
- int TFCIdx = -1);
+ int DmaskIdx = -1);
Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt &UndefElts, unsigned Depth = 0);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D57681.185028.patch
Type: text/x-patch
Size: 4312 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190204/8a812b52/attachment.bin>
More information about the llvm-commits
mailing list