[llvm] 69cc9f3 - AMDGPU/GlobalISel: Legalize llvm.amdgcn.s.buffer.load
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 5 09:01:43 PST 2020
Author: Matt Arsenault
Date: 2020-02-05T12:01:34-05:00
New Revision: 69cc9f3046cedcebabe2293de0acba408d181eb4
URL: https://github.com/llvm/llvm-project/commit/69cc9f3046cedcebabe2293de0acba408d181eb4
DIFF: https://github.com/llvm/llvm-project/commit/69cc9f3046cedcebabe2293de0acba408d181eb4.diff
LOG: AMDGPU/GlobalISel: Legalize llvm.amdgcn.s.buffer.load
The 96-bit results need to be widened.
I find the interaction between LegalizerHelper and MIRBuilder somewhat
awkward. The custom legalization is called by the LegalizerHelper, but
then does not have access to the helper. You have to construct a new
helper, which then does not own the MachineIRBuilder, but does modify
it. Maybe custom legalization should be passed the helper?
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index c3f4a71fa0ac..6f2e1ae51cfc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -42,6 +42,20 @@ using namespace LegalizeMutations;
using namespace LegalityPredicates;
using namespace MIPatternMatch;
+// Round the number of elements to the next power of two elements
+static LLT getPow2VectorType(LLT Ty) {
+ unsigned NElts = Ty.getNumElements();
+ unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts);
+ return Ty.changeNumElements(Pow2NElts);
+}
+
+// Round the number of bits to the next power of two bits
+static LLT getPow2ScalarType(LLT Ty) {
+ unsigned Bits = Ty.getSizeInBits();
+ unsigned Pow2Bits = 1 << Log2_32_Ceil(Bits);
+ return LLT::scalar(Pow2Bits);
+}
+
static LegalityPredicate isMultiple32(unsigned TypeIdx,
unsigned MaxSize = 1024) {
return [=](const LegalityQuery &Query) {
@@ -2959,6 +2973,33 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
return true;
}
+bool AMDGPULegalizerInfo::legalizeSBufferLoad(
+ MachineInstr &MI, MachineIRBuilder &B,
+ GISelChangeObserver &Observer) const {
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = B.getMRI()->getType(Dst);
+ unsigned Size = Ty.getSizeInBits();
+
+ // There are no 96-bit result scalar loads, but widening to 128-bit should
+ // always be legal. We may need to restore this to a 96-bit result if it turns
+ // out this needs to be converted to a vector load during RegBankSelect.
+ if (isPowerOf2_32(Size))
+ return true;
+
+ LegalizerHelper Helper(B.getMF(), *this, Observer, B);
+ B.setInstr(MI);
+
+ Observer.changingInstr(MI);
+
+ if (Ty.isVector())
+ Helper.moreElementsVectorDst(MI, getPow2VectorType(Ty), 0);
+ else
+ Helper.widenScalarDst(MI, getPow2ScalarType(Ty), 0);
+
+ Observer.changedInstr(MI);
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MachineIRBuilder &B,
GISelChangeObserver &Observer) const {
@@ -3075,6 +3116,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MI.eraseFromParent();
return true;
}
+ case Intrinsic::amdgcn_s_buffer_load:
+ return legalizeSBufferLoad(MI, B, Observer);
case Intrinsic::amdgcn_raw_buffer_store:
case Intrinsic::amdgcn_struct_buffer_store:
return legalizeBufferStore(MI, MRI, B, false, false);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 74a244f36d45..421641e19fb0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -138,6 +138,10 @@ class AMDGPULegalizerInfo : public LegalizerInfo {
GISelChangeObserver &Observer,
const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const;
+ bool legalizeSBufferLoad(
+ MachineInstr &MI, MachineIRBuilder &B,
+ GISelChangeObserver &Observer) const;
+
bool legalizeAtomicIncDec(MachineInstr &MI, MachineIRBuilder &B,
bool IsInc) const;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir
new file mode 100644
index 000000000000..2fe45e424642
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir
@@ -0,0 +1,136 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -run-pass=legalizer %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+name: s_buffer_load_v3s32
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; GCN-LABEL: name: s_buffer_load_v3s32
+ ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GCN: [[INT:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[COPY]](<4 x s32>), [[C]](s32), 0
+ ; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[INT]](<4 x s32>), 0
+ ; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s32>)
+ %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
+ S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: s_buffer_load_v3p3
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; GCN-LABEL: name: s_buffer_load_v3p3
+ ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GCN: [[INT:%[0-9]+]]:_(<4 x p3>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[COPY]](<4 x s32>), [[C]](s32), 0
+ ; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x p3>) = G_EXTRACT [[INT]](<4 x p3>), 0
+ ; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<3 x p3>)
+ %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<3 x p3>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
+ S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: s_buffer_load_v6s16
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; GCN-LABEL: name: s_buffer_load_v6s16
+ ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GCN: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[COPY]](<4 x s32>), [[C]](s32), 0
+ ; GCN: [[EXTRACT:%[0-9]+]]:_(<6 x s16>) = G_EXTRACT [[INT]](<8 x s16>), 0
+ ; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<6 x s16>)
+ %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<6 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
+ S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: s_buffer_load_v6s32
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; GCN-LABEL: name: s_buffer_load_v6s32
+ ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GCN: [[INT:%[0-9]+]]:_(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[COPY]](<4 x s32>), [[C]](s32), 0
+ ; GCN: [[EXTRACT:%[0-9]+]]:_(<6 x s32>) = G_EXTRACT [[INT]](<8 x s32>), 0
+ ; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<6 x s32>)
+ %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<6 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
+ S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: s_buffer_load_v3s64
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; GCN-LABEL: name: s_buffer_load_v3s64
+ ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GCN: [[INT:%[0-9]+]]:_(<4 x s64>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[COPY]](<4 x s32>), [[C]](s32), 0
+ ; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[INT]](<4 x s64>), 0
+ ; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s64>)
+ %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<3 x s64>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
+ S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: s_buffer_load_v12s8
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; GCN-LABEL: name: s_buffer_load_v12s8
+ ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GCN: [[INT:%[0-9]+]]:_(<16 x s8>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[COPY]](<4 x s32>), [[C]](s32), 0
+ ; GCN: [[EXTRACT:%[0-9]+]]:_(<12 x s8>) = G_EXTRACT [[INT]](<16 x s8>), 0
+ ; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<12 x s8>)
+ %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<12 x s8>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
+ S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: s_buffer_load_s96
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; GCN-LABEL: name: s_buffer_load_s96
+ ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GCN: [[INT:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[COPY]](<4 x s32>), [[C]](s32), 0
+ ; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[INT]](<4 x s32>), 0
+ ; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s32>)
+ %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
+ S_ENDPGM 0, implicit %2
+
+...
More information about the llvm-commits
mailing list