[PATCH] D60457: [CodeGen] Fixed de-optimization of legalize subvector extract

Tue Apr 9 05:50:08 PDT 2019

tpr created this revision.
Herald added subscribers: llvm-commits, nhaehnle, jvesely.
Herald added a project: LLVM.

The recent introduction of v3i32 etc as an MVT, and its use in AMDGPU
3-dword memory instructions, caused a de-optimization problem for code
with such a load that then bitcasts via vector of i8, because v12i8 is
not an MVT so it legalizes the bitcast by widening it.

This commit adds the ability to widen a bitcast using extract_subvector
on the result, so the value does not need to go via memory.

Change-Id: Ie4abb7760547e54a2445961992eafc78e80d4b64


Repository:
  rL LLVM

https://reviews.llvm.org/D60457

Files:
  lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
  test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll


Index: test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
===================================================================

--- /dev/null
+++ test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 <%s -stop-after=amdgpu-isel | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: body:
+; GCN-NOT: %stack
+
+define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg noalias dereferenceable(18446744073709551615) %arg) {
+main_body:
+  %tmp25 = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> undef, i32 undef, i32 0, i32 0)
+  %tmp26 = shufflevector <3 x float> %tmp25, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %tmp27 = bitcast <4 x float> %tmp26 to <16 x i8>
+  %tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> undef, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  %tmp29 = bitcast <12 x i8> %tmp28 to <3 x i32>
+  %tmp30 = extractelement <3 x i32> %tmp29, i32 0
+  %tmp31 = extractelement <3 x i32> %tmp29, i32 1
+  %tmp32 = extractelement <3 x i32> %tmp29, i32 2
+  %tmp33 = call i32 @llvm.bitreverse.i32(i32 %tmp30) #5
+  %tmp34 = call i32 @llvm.bitreverse.i32(i32 %tmp31) #5
+  %tmp35 = call i32 @llvm.bitreverse.i32(i32 %tmp32) #5
+  %tmp36 = insertelement <2 x i32> undef, i32 %tmp33, i32 0
+  %tmp37 = insertelement <2 x i32> %tmp36, i32 %tmp34, i32 1
+  %tmp38 = getelementptr [0 x i8], [0 x i8] addrspace(6)* %arg, i32 0, i32 16
+  %tmp39 = bitcast i8 addrspace(6)* %tmp38 to <4 x i32> addrspace(6)*
+  %tmp40 = load <4 x i32>, <4 x i32> addrspace(6)* %tmp39, align 16
+  %tmp41 = bitcast <2 x i32> %tmp37 to <2 x float>
+  call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %tmp41, <4 x i32> undef, i32 undef, i32 0, i32 0) #3
+  %tmp43 = bitcast i32 %tmp35 to float
+  call void @llvm.amdgcn.raw.buffer.store.f32(float %tmp43, <4 x i32> undef, i32 undef, i32 0, i32 0) #3
+  ret void
+}
+
+declare i32 @llvm.bitreverse.i32(i32)
+declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg)
+declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg)
+declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32 immarg)
+
Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4063,6 +4063,20 @@
     }
   }
 
+  if (VT.isVector()) {
+    EVT EltVT = VT.getVectorElementType();
+    unsigned EltSize = EltVT.getSizeInBits();
+    if (InWidenSize % EltSize == 0) {
+      unsigned NewNumElts = InWidenSize / EltSize;
+      EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts);
+      if (TLI.isTypeLegal(NewVT)) {
+        SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, BitOp,
+            DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+      }
+    }
+  }
+
   return CreateStackStoreLoad(InOp, VT);
 }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D60457.194299.patch
Type: text/x-patch
Size: 3147 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190409/771eb549/attachment.bin>