[llvm] f48fe2c - GlobalISel: Fix casted unmerge of G_CONCAT_VECTORS
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 13 19:37:19 PDT 2020
Author: Matt Arsenault
Date: 2020-04-13T22:03:05-04:00
New Revision: f48fe2c36e80e4d3820b5b56f5ad8c9bdbe813c4
URL: https://github.com/llvm/llvm-project/commit/f48fe2c36e80e4d3820b5b56f5ad8c9bdbe813c4
DIFF: https://github.com/llvm/llvm-project/commit/f48fe2c36e80e4d3820b5b56f5ad8c9bdbe813c4.diff
LOG: GlobalISel: Fix casted unmerge of G_CONCAT_VECTORS
This was assuming a scalarizing unmerge, and would fail assert if the
unmerge was to smaller vector types.
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
llvm/include/llvm/Support/LowLevelTypeImpl.h
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
llvm/unittests/CodeGen/LowLevelTypeTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 06c85409f396..ea78f61eea26 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -459,18 +459,35 @@ class LegalizationArtifactCombiner {
DstRegs.push_back(MI.getOperand(DefIdx).getReg());
if (ConvertOp) {
- SmallVector<Register, 2> TmpRegs;
- // This is a vector that is being scalarized and casted. Extract to
- // the element type, and do the conversion on the scalars.
- LLT MergeEltTy =
- MRI.getType(MergeI->getOperand(0).getReg()).getElementType();
- for (unsigned j = 0; j < NumMergeRegs; ++j)
- TmpRegs.push_back(MRI.createGenericVirtualRegister(MergeEltTy));
+ LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
+
+ // This is a vector that is being split and casted. Extract to the
+ // element type, and do the conversion on the scalars (or smaller
+ // vectors).
+ LLT MergeEltTy = MergeSrcTy.divide(NewNumDefs);
+
+ // Handle split to smaller vectors, with conversions.
+ // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>)
+ // %3(<8 x s16>) = G_SEXT %2
+ // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %3
+ //
+ // =>
+ //
+ // %8(<2 x s8>), %9(<2 x s8>) = G_UNMERGE_VALUES %0
+ // %10(<2 x s8>), %11(<2 x s8>) = G_UNMERGE_VALUES %1
+ // %4(<2 x s16>) = G_SEXT %8
+ // %5(<2 x s16>) = G_SEXT %9
+ // %6(<2 x s16>) = G_SEXT %10
+ // %7(<2 x s16>)= G_SEXT %11
+
+ SmallVector<Register, 4> TmpRegs(NewNumDefs);
+ for (unsigned k = 0; k < NewNumDefs; ++k)
+ TmpRegs[k] = MRI.createGenericVirtualRegister(MergeEltTy);
Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg());
- for (unsigned j = 0; j < NumMergeRegs; ++j)
- Builder.buildInstr(ConvertOp, {DstRegs[j]}, {TmpRegs[j]});
+ for (unsigned k = 0; k < NewNumDefs; ++k)
+ Builder.buildInstr(ConvertOp, {DstRegs[k]}, {TmpRegs[k]});
} else {
Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg());
}
diff --git a/llvm/include/llvm/Support/LowLevelTypeImpl.h b/llvm/include/llvm/Support/LowLevelTypeImpl.h
index 6b0e8d3898e6..a9aea17d3c00 100644
--- a/llvm/include/llvm/Support/LowLevelTypeImpl.h
+++ b/llvm/include/llvm/Support/LowLevelTypeImpl.h
@@ -143,6 +143,20 @@ class LLT {
return LLT::scalarOrVector(NewNumElts, getScalarType());
}
+ /// Return a type that is \p Factor times smaller. Reduces the number of
+ /// elements if this is a vector, or the bitwidth for scalar/pointers. Does
+ /// not attempt to handle cases that aren't evenly divisible.
+ LLT divide(int Factor) const {
+ assert(Factor != 1);
+ if (isVector()) {
+ assert(getNumElements() % Factor == 0);
+ return scalarOrVector(getNumElements() / Factor, getElementType());
+ }
+
+ assert(getSizeInBits() % Factor == 0);
+ return scalar(getSizeInBits() / Factor);
+ }
+
bool isByteSized() const { return (getSizeInBits() & 7) == 0; }
unsigned getScalarSizeInBits() const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index db88b4ffb580..64bb54cac5c3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1317,12 +1317,6 @@ static unsigned setBufferOffsets(MachineIRBuilder &B,
return 0;
}
-static LLT divideLLT(LLT Ty, int Factor) {
- if (Ty.isVector())
- return LLT::vector(Ty.getNumElements() / Factor, Ty.getElementType());
- return LLT::scalar(Ty.getSizeInBits() / Factor);
-}
-
bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
@@ -1347,7 +1341,7 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
int NumLoads = 1;
if (LoadSize == 256 || LoadSize == 512) {
NumLoads = LoadSize / 128;
- Ty = divideLLT(Ty, NumLoads);
+ Ty = Ty.divide(NumLoads);
}
// Use the alignment to ensure that the required offsets will fit into the
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
index 5c3a7d6ffe44..56db891f9b02 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
@@ -574,3 +574,329 @@ body: |
%4:_(<3 x s16>), %5:_(<3 x s16>), %6:_(<3 x s16>), %7:_(<3 x s16>) = G_UNMERGE_VALUES %3
S_ENDPGM 0, implicit %4, implicit %5, implicit %6, implicit %7
...
+
+---
+name: unmerge_v2s16_from_v4s16_sext_v4s8_concat_vectors_v2s8
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ ; CHECK-LABEL: name: unmerge_v2s16_from_v4s16_sext_v4s8_concat_vectors_v2s8
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32)
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32)
+ ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32)
+ ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 8
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32)
+ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY6]](s32)
+ ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY12]], 8
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY7]](s32)
+ ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY13]], 8
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG2]](s32)
+ ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]]
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG3]](s32)
+ ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]]
+ ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+ ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = COPY $vgpr3
+ %4:_(s8) = G_TRUNC %0
+ %5:_(s8) = G_TRUNC %1
+ %6:_(s8) = G_TRUNC %2
+ %7:_(s8) = G_TRUNC %3
+ %8:_(<2 x s8>) = G_BUILD_VECTOR %4, %5
+ %9:_(<2 x s8>) = G_BUILD_VECTOR %6, %7
+ %10:_(<4 x s8>) = G_CONCAT_VECTORS %8, %9
+ %11:_(<4 x s16>) = G_SEXT %10
+ %12:_(<2 x s16>), %13:_(<2 x s16>) = G_UNMERGE_VALUES %11
+ S_ENDPGM 0, implicit %12, implicit %13
+...
+
+---
+name: unmerge_v2s16_from_v8s16_sext_v8s8_concat_vectors_v4s8
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+
+ ; CHECK-LABEL: name: unmerge_v2s16_from_v8s16_sext_v8s8_concat_vectors_v4s8
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32)
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32)
+ ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY12]], 8
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
+ ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY13]], 8
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]]
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32)
+ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]]
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
+ ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY16]], 8
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
+ ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY17]], 8
+ ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG2]](s32)
+ ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C]]
+ ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG3]](s32)
+ ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C]]
+ ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+ ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32)
+ ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32)
+ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY6]](s32)
+ ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY7]](s32)
+ ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)
+ ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY24]], 8
+ ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY21]](s32)
+ ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY25]], 8
+ ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG4]](s32)
+ ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C]]
+ ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG5]](s32)
+ ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C]]
+ ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+ ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+ ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY [[COPY22]](s32)
+ ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY28]], 8
+ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY23]](s32)
+ ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY29]], 8
+ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG6]](s32)
+ ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C]]
+ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG7]](s32)
+ ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C]]
+ ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
+ ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+ ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>), implicit [[BITCAST2]](<2 x s16>), implicit [[BITCAST3]](<2 x s16>)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = COPY $vgpr3
+ %4:_(s32) = COPY $vgpr4
+ %5:_(s32) = COPY $vgpr5
+ %6:_(s32) = COPY $vgpr6
+ %7:_(s32) = COPY $vgpr7
+ %8:_(s8) = G_TRUNC %0
+ %9:_(s8) = G_TRUNC %1
+ %10:_(s8) = G_TRUNC %2
+ %11:_(s8) = G_TRUNC %3
+ %12:_(s8) = G_TRUNC %4
+ %13:_(s8) = G_TRUNC %5
+ %14:_(s8) = G_TRUNC %6
+ %15:_(s8) = G_TRUNC %7
+ %16:_(<4 x s8>) = G_BUILD_VECTOR %8, %9, %10, %11
+ %17:_(<4 x s8>) = G_BUILD_VECTOR %12, %13, %14, %15
+ %18:_(<8 x s8>) = G_CONCAT_VECTORS %16, %17
+ %19:_(<8 x s16>) = G_SEXT %18
+ %20:_(<2 x s16>), %21:_(<2 x s16>), %22:_(<2 x s16>), %23:_(<2 x s16>) = G_UNMERGE_VALUES %19
+ S_ENDPGM 0, implicit %20, implicit %21, implicit %22, implicit %23
+...
+
+---
+name: unmerge_v2s16_from_v16s16_sext_v16s8_concat_vectors_v8s8
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
+
+ ; CHECK-LABEL: name: unmerge_v2s16_from_v16s16_sext_v16s8_concat_vectors_v8s8
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32)
+ ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32)
+ ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32)
+ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY6]](s32)
+ ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY7]](s32)
+ ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY24]], 8
+ ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY25]], 8
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C]]
+ ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32)
+ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C]]
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY28]], 8
+ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)
+ ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY29]], 8
+ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG2]](s32)
+ ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C]]
+ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG3]](s32)
+ ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C]]
+ ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+ ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)
+ ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY32]], 8
+ ; CHECK: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY21]](s32)
+ ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY33]], 8
+ ; CHECK: [[COPY34:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG4]](s32)
+ ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY34]], [[C]]
+ ; CHECK: [[COPY35:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG5]](s32)
+ ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY35]], [[C]]
+ ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+ ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+ ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK: [[COPY36:%[0-9]+]]:_(s32) = COPY [[COPY22]](s32)
+ ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY36]], 8
+ ; CHECK: [[COPY37:%[0-9]+]]:_(s32) = COPY [[COPY23]](s32)
+ ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY37]], 8
+ ; CHECK: [[COPY38:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG6]](s32)
+ ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY38]], [[C]]
+ ; CHECK: [[COPY39:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG7]](s32)
+ ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY39]], [[C]]
+ ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
+ ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+ ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK: [[COPY40:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32)
+ ; CHECK: [[COPY41:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
+ ; CHECK: [[COPY42:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
+ ; CHECK: [[COPY43:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
+ ; CHECK: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; CHECK: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; CHECK: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; CHECK: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; CHECK: [[COPY48:%[0-9]+]]:_(s32) = COPY [[COPY40]](s32)
+ ; CHECK: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY48]], 8
+ ; CHECK: [[COPY49:%[0-9]+]]:_(s32) = COPY [[COPY41]](s32)
+ ; CHECK: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY49]], 8
+ ; CHECK: [[COPY50:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG8]](s32)
+ ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY50]], [[C]]
+ ; CHECK: [[COPY51:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG9]](s32)
+ ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY51]], [[C]]
+ ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
+ ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+ ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CHECK: [[COPY52:%[0-9]+]]:_(s32) = COPY [[COPY42]](s32)
+ ; CHECK: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY52]], 8
+ ; CHECK: [[COPY53:%[0-9]+]]:_(s32) = COPY [[COPY43]](s32)
+ ; CHECK: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY53]], 8
+ ; CHECK: [[COPY54:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG10]](s32)
+ ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY54]], [[C]]
+ ; CHECK: [[COPY55:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG11]](s32)
+ ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY55]], [[C]]
+ ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32)
+ ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+ ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CHECK: [[COPY56:%[0-9]+]]:_(s32) = COPY [[COPY44]](s32)
+ ; CHECK: [[SEXT_INREG12:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY56]], 8
+ ; CHECK: [[COPY57:%[0-9]+]]:_(s32) = COPY [[COPY45]](s32)
+ ; CHECK: [[SEXT_INREG13:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY57]], 8
+ ; CHECK: [[COPY58:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG12]](s32)
+ ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY58]], [[C]]
+ ; CHECK: [[COPY59:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG13]](s32)
+ ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY59]], [[C]]
+ ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
+ ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]]
+ ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+ ; CHECK: [[COPY60:%[0-9]+]]:_(s32) = COPY [[COPY46]](s32)
+ ; CHECK: [[SEXT_INREG14:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY60]], 8
+ ; CHECK: [[COPY61:%[0-9]+]]:_(s32) = COPY [[COPY47]](s32)
+ ; CHECK: [[SEXT_INREG15:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY61]], 8
+ ; CHECK: [[COPY62:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG14]](s32)
+ ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY62]], [[C]]
+ ; CHECK: [[COPY63:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG15]](s32)
+ ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY63]], [[C]]
+ ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C1]](s32)
+ ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]]
+ ; CHECK: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+ ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>), implicit [[BITCAST2]](<2 x s16>), implicit [[BITCAST3]](<2 x s16>), implicit [[BITCAST4]](<2 x s16>), implicit [[BITCAST5]](<2 x s16>), implicit [[BITCAST6]](<2 x s16>), implicit [[BITCAST7]](<2 x s16>)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = COPY $vgpr3
+ %4:_(s32) = COPY $vgpr4
+ %5:_(s32) = COPY $vgpr5
+ %6:_(s32) = COPY $vgpr6
+ %7:_(s32) = COPY $vgpr7
+ %8:_(s32) = COPY $vgpr8
+ %9:_(s32) = COPY $vgpr9
+ %10:_(s32) = COPY $vgpr10
+ %11:_(s32) = COPY $vgpr11
+ %12:_(s32) = COPY $vgpr12
+ %13:_(s32) = COPY $vgpr13
+ %14:_(s32) = COPY $vgpr14
+ %15:_(s32) = COPY $vgpr15
+ %16:_(s8) = G_TRUNC %0
+ %17:_(s8) = G_TRUNC %1
+ %18:_(s8) = G_TRUNC %2
+ %19:_(s8) = G_TRUNC %3
+ %20:_(s8) = G_TRUNC %4
+ %21:_(s8) = G_TRUNC %5
+ %22:_(s8) = G_TRUNC %6
+ %23:_(s8) = G_TRUNC %7
+ %24:_(s8) = G_TRUNC %8
+ %25:_(s8) = G_TRUNC %9
+ %26:_(s8) = G_TRUNC %10
+ %27:_(s8) = G_TRUNC %11
+ %28:_(s8) = G_TRUNC %12
+ %29:_(s8) = G_TRUNC %13
+ %30:_(s8) = G_TRUNC %14
+ %31:_(s8) = G_TRUNC %15
+ %32:_(<8 x s8>) = G_BUILD_VECTOR %16, %17, %18, %19, %20, %21, %22, %23
+ %33:_(<8 x s8>) = G_BUILD_VECTOR %24, %25, %26, %27, %28, %29, %30, %31
+ %34:_(<16 x s8>) = G_CONCAT_VECTORS %32, %33
+ %35:_(<16 x s16>) = G_SEXT %34
+ %36:_(<2 x s16>), %37:_(<2 x s16>), %38:_(<2 x s16>), %39:_(<2 x s16>), %40:_(<2 x s16>), %41:_(<2 x s16>), %42:_(<2 x s16>), %43:_(<2 x s16>) = G_UNMERGE_VALUES %35
+ S_ENDPGM 0, implicit %36, implicit %37, implicit %38, implicit %39, implicit %40, implicit %41, implicit %42, implicit %43
+...
diff --git a/llvm/unittests/CodeGen/LowLevelTypeTest.cpp b/llvm/unittests/CodeGen/LowLevelTypeTest.cpp
index 2094be181f8d..7ba678dad0cb 100644
--- a/llvm/unittests/CodeGen/LowLevelTypeTest.cpp
+++ b/llvm/unittests/CodeGen/LowLevelTypeTest.cpp
@@ -238,4 +238,24 @@ TEST(LowLevelTypeTest, Invalid) {
ASSERT_FALSE(Ty.isVector());
}
+TEST(LowLevelTypeTest, Divide) {
+ // Test basic scalar->scalar cases.
+ EXPECT_EQ(LLT::scalar(16), LLT::scalar(32).divide(2));
+ EXPECT_EQ(LLT::scalar(8), LLT::scalar(32).divide(4));
+ EXPECT_EQ(LLT::scalar(8), LLT::scalar(32).divide(4));
+
+ // Test pointer->scalar
+ EXPECT_EQ(LLT::scalar(32), LLT::pointer(0, 64).divide(2));
+
+ // Test dividing vectors.
+ EXPECT_EQ(LLT::scalar(32), LLT::vector(2, 32).divide(2));
+ EXPECT_EQ(LLT::vector(2, 32), LLT::vector(4, 32).divide(2));
+
+ // Test vector of pointers
+ EXPECT_EQ(LLT::pointer(1, 64),
+ LLT::vector(4, LLT::pointer(1, 64)).divide(4));
+ EXPECT_EQ(LLT::vector(2, LLT::pointer(1, 64)),
+ LLT::vector(4, LLT::pointer(1, 64)).divide(2));
+}
+
}
More information about the llvm-commits
mailing list