[PATCH] D68865: [InstCombine][AMDGPU] Fix crash with v3i16/v3f16 buffer intrinsics

Wed Oct 16 04:18:08 PDT 2019

This revision was automatically updated to reflect the committed changes.
Closed by commit rG79769a4475b3: [InstCombine][AMDGPU] Fix crash with v3i16/v3f16 buffer intrinsics (authored by piotr).
Herald added a subscriber: hiraditya.

Changed prior to commit:
  https://reviews.llvm.org/D68865?vs=224833&id=225192#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68865/new/

https://reviews.llvm.org/D68865

Files:
  llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
  llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll


Index: llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
===================================================================

--- llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
+++ llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
@@ -1474,6 +1474,51 @@
 
 declare <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1
 
+; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f16(
+; CHECK-NEXT: %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <4 x half> %data, i32 3
+; CHECK-NEXT: ret half %elt1
+define amdgpu_ps half @extract_elt3_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
+  %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
+  %elt1 = extractelement <4 x half> %data, i32 3
+  ret half %elt1
+}
+
+; FIXME: Enable load shortening when full support for v3f16 has been added (should expect call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16).
+; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f16(
+; CHECK-NEXT: %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <4 x half> %data, i32 2
+; CHECK-NEXT: ret half %elt1
+define amdgpu_ps half @extract_elt2_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
+  %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
+  %elt1 = extractelement <4 x half> %data, i32 2
+  ret half %elt1
+}
+
+; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f16(
+; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <2 x half> %data, i32 1
+; CHECK-NEXT: ret half %elt1
+define amdgpu_ps half @extract_elt1_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
+  %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
+  %elt1 = extractelement <4 x half> %data, i32 1
+  ret half %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f16(
+; CHECK-NEXT: %data = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
+; CHECK-NEXT: ret half %data
+define amdgpu_ps half @extract_elt0_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
+  %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
+  %elt1 = extractelement <4 x half> %data, i32 0
+  ret half %elt1
+}
+
+declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32) #1
+declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #1
+declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) #1
+declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #1
+
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.struct.tbuffer.load
 ; --------------------------------------------------------------------
Index: llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -971,6 +971,13 @@
 Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
                                                            APInt DemandedElts,
                                                            int DMaskIdx) {
+
+  // FIXME: Allow v3i16/v3f16 in buffer intrinsics when the types are fully supported.
+  if (DMaskIdx < 0 &&
+      II->getType()->getScalarSizeInBits() != 32 &&
+      DemandedElts.getActiveBits() == 3)
+    return nullptr;
+
   unsigned VWidth = II->getType()->getVectorNumElements();
   if (VWidth == 1)
     return nullptr;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D68865.225192.patch
Type: text/x-patch
Size: 4207 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191016/5e406a89/attachment.bin>