[llvm] 9e1d2af - AMDGPU/GlobalISel: Don't use vector G_EXTRACT in arg lowering
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 13:50:33 PST 2020
Author: Matt Arsenault
Date: 2020-03-04T16:49:01-05:00
New Revision: 9e1d2afc13c291fed0610bdfe89eb678890f7b88
URL: https://github.com/llvm/llvm-project/commit/9e1d2afc13c291fed0610bdfe89eb678890f7b88
DIFF: https://github.com/llvm/llvm-project/commit/9e1d2afc13c291fed0610bdfe89eb678890f7b88.diff
LOG: AMDGPU/GlobalISel: Don't use vector G_EXTRACT in arg lowering
Create a wider source vector, and unmerge with dead defs like the
legalizer. The legalization handling for G_EXTRACT is incomplete, and
it's preferrable to keep everything in 32-bit pieces.
We should probably start moving these functions into utils, since we
have a growing number of places that do almost the same thing.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index afbd89b2847b..ec0f38afc48b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -480,6 +480,43 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
return true;
}
+/// Pack values \p SrcRegs to cover the vector type result \p DstRegs.
+static MachineInstrBuilder mergeVectorRegsToResultRegs(
+ MachineIRBuilder &B, ArrayRef<Register> DstRegs, ArrayRef<Register> SrcRegs) {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ LLT LLTy = MRI.getType(DstRegs[0]);
+ LLT PartLLT = MRI.getType(SrcRegs[0]);
+
+ // Deal with v3s16 split into v2s16
+ LLT LCMTy = getLCMType(LLTy, PartLLT);
+ if (LCMTy == LLTy) {
+ // Common case where no padding is needed.
+ assert(DstRegs.size() == 1);
+ return B.buildConcatVectors(DstRegs[0], SrcRegs);
+ }
+
+ const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits();
+ Register Undef = B.buildUndef(PartLLT).getReg(0);
+
+ // Build vector of undefs.
+ SmallVector<Register, 8> WidenedSrcs(NumWide, Undef);
+
+ // Replace the first sources with the real registers.
+ std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin());
+
+ auto Widened = B.buildConcatVectors(LCMTy, WidenedSrcs);
+ int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits();
+
+ SmallVector<Register, 8> PadDstRegs(NumDst);
+ std::copy(DstRegs.begin(), DstRegs.end(), PadDstRegs.begin());
+
+ // Create the excess dead defs for the unmerge.
+ for (int I = DstRegs.size(); I != NumDst; ++I)
+ PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy);
+
+ return B.buildUnmerge(PadDstRegs, Widened);
+}
+
// TODO: Move this to generic code
static void packSplitRegsToOrigType(MachineIRBuilder &B,
ArrayRef<Register> OrigRegs,
@@ -492,22 +529,9 @@ static void packSplitRegsToOrigType(MachineIRBuilder &B,
}
if (LLTy.isVector() && PartLLT.isVector()) {
+ assert(OrigRegs.size() == 1);
assert(LLTy.getElementType() == PartLLT.getElementType());
-
- int DstElts = LLTy.getNumElements();
- int PartElts = PartLLT.getNumElements();
- if (DstElts % PartElts == 0)
- B.buildConcatVectors(OrigRegs[0], Regs);
- else {
- // Deal with v3s16 split into v2s16
- assert(PartElts == 2 && DstElts % 2 != 0);
- int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts);
-
- LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType());
- auto RoundedConcat = B.buildConcatVectors(RoundedDestTy, Regs);
- B.buildExtract(OrigRegs[0], RoundedConcat, 0);
- }
-
+ mergeVectorRegsToResultRegs(B, OrigRegs, Regs);
return;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
index eeb4380cbc00..b058c6fe502b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
@@ -814,10 +814,11 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 {
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
- ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
- ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; CHECK: G_STORE [[EXTRACT]](<3 x s16>), [[DEF]](p1) :: (store 6 into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
+ ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store 6 into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK: S_SETPC_B64_return [[COPY3]]
store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef
@@ -848,10 +849,11 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 {
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>)
- ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
- ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; CHECK: G_STORE [[EXTRACT]](<5 x s16>), [[DEF]](p1) :: (store 10 into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
+ ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store 10 into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
; CHECK: S_SETPC_B64_return [[COPY4]]
store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef
@@ -898,6 +900,58 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 {
ret void
}
+; <2 x i16> pieces that start spilling to the stack.
+; FIXME: load of 2 would be sufficient for last piece
+define void @void_func_v65i16(<65 x i16> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v65i16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr15
+ ; CHECK: [[COPY16:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr16
+ ; CHECK: [[COPY17:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr17
+ ; CHECK: [[COPY18:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr18
+ ; CHECK: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr19
+ ; CHECK: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr20
+ ; CHECK: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr21
+ ; CHECK: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr22
+ ; CHECK: [[COPY23:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr23
+ ; CHECK: [[COPY24:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr24
+ ; CHECK: [[COPY25:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr25
+ ; CHECK: [[COPY26:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr26
+ ; CHECK: [[COPY27:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr27
+ ; CHECK: [[COPY28:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr28
+ ; CHECK: [[COPY29:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr29
+ ; CHECK: [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30
+ ; CHECK: [[COPY31:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr31
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[COPY31]](<2 x s16>), [[LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; CHECK: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>)
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store 130 into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1)
+ ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]]
+ ; CHECK: S_SETPC_B64_return [[COPY33]]
+ store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef
+ ret void
+}
+
define void @void_func_v2f32(<2 x float> %arg0) #0 {
; CHECK-LABEL: name: void_func_v2f32
; CHECK: bb.1 (%ir-block.0):
@@ -1191,10 +1245,11 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 {
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
- ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
- ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; CHECK: G_STORE [[EXTRACT]](<3 x s16>), [[DEF]](p1) :: (store 6 into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
+ ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store 6 into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK: S_SETPC_B64_return [[COPY3]]
store <3 x half> %arg0, <3 x half> addrspace(1)* undef
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll
index a2b28175b4ac..7279a61af629 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll
@@ -105,15 +105,16 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
; UNPACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; UNPACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>)
- ; UNPACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
- ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; UNPACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
- ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; UNPACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
+ ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
@@ -137,9 +138,10 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
; PACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; PACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>)
- ; PACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
- ; PACKED: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.store.2d), [[EXTRACT]](<3 x s16>), 7, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
+ ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; PACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; PACKED: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.store.2d), [[UV]](<3 x s16>), 7, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret void
More information about the llvm-commits
mailing list