[llvm] 0426c2d - Reapply "AMDGPU: Cleanup and fix SMRD offset handling"
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 31 06:01:41 PST 2020
Author: Matt Arsenault
Date: 2020-01-31T06:01:28-08:00
New Revision: 0426c2d07d5f2c862e78e3b4d88399ba4ba7b6d3
URL: https://github.com/llvm/llvm-project/commit/0426c2d07d5f2c862e78e3b4d88399ba4ba7b6d3
DIFF: https://github.com/llvm/llvm-project/commit/0426c2d07d5f2c862e78e3b4d88399ba4ba7b6d3.diff
LOG: Reapply "AMDGPU: Cleanup and fix SMRD offset handling"
This reverts commit 6a4acb9d809aaadb9304a7a2f3382d958a6c2adf.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index d887ed66a915..62fc6f56a0cf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1771,26 +1771,31 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
SDLoc SL(ByteOffsetNode);
GCNSubtarget::Generation Gen = Subtarget->getGeneration();
- int64_t ByteOffset = C->getSExtValue();
- int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
-
- if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
- Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
+ uint64_t ByteOffset = C->getZExtValue();
+ Optional<int64_t> EncodedOffset =
+ AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
+ if (EncodedOffset) {
+ Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
Imm = true;
return true;
}
- if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
+ if (Gen == AMDGPUSubtarget::SEA_ISLANDS) {
+ EncodedOffset =
+ AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
+ if (EncodedOffset) {
+ Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
+ return true;
+ }
+ }
+
+ if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
return false;
- if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
- // 32-bit Immediates are supported on Sea Islands.
- Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
- } else {
- SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
- Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
- C32Bit), 0);
- }
+ SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
+ Offset = SDValue(
+ CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
+
Imm = false;
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 90136f562c2c..55ed81d10009 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2107,15 +2107,14 @@ AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
return None;
const GEPInfo &GEPInfo = AddrInfo[0];
-
- if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
+ Optional<int64_t> EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
+ if (!EncodedImm)
return None;
unsigned PtrReg = GEPInfo.SgprParts[0];
- int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); }
}};
}
@@ -2129,13 +2128,14 @@ AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
const GEPInfo &GEPInfo = AddrInfo[0];
unsigned PtrReg = GEPInfo.SgprParts[0];
- int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
- if (!isUInt<32>(EncodedImm))
+ Optional<int64_t> EncodedImm =
+ AMDGPU::getSMRDEncodedLiteralOffset32(STI, GEPInfo.Imm);
+ if (!EncodedImm)
return None;
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); }
}};
}
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 8364665dda04..d0c713d07d87 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -587,7 +587,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
16, 4);
unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
- unsigned EncodedOffset = AMDGPU::getSMRDEncodedOffset(Subtarget, Offset);
+ unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
.addReg(Rsrc01)
.addImm(EncodedOffset) // offset
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index ee207586b0d0..806f8af9d511 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -501,7 +501,7 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
: 4;
break;
case S_BUFFER_LOAD_IMM:
- EltSize = AMDGPU::getSMRDEncodedOffset(STM, 4);
+ EltSize = AMDGPU::convertSMRDOffsetUnits(STM, 4);
break;
default:
EltSize = 4;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 8b21b9346987..14f9586da769 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1247,16 +1247,43 @@ static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
return isGCN3Encoding(ST) || isGFX10(ST);
}
-int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
+static bool isLegalSMRDEncodedImmOffset(const MCSubtargetInfo &ST,
+ int64_t EncodedOffset) {
+ return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
+ : isUInt<8>(EncodedOffset);
+}
+
+static bool isDwordAligned(uint64_t ByteOffset) {
+ return (ByteOffset & 3) == 0;
+}
+
+uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
+ uint64_t ByteOffset) {
if (hasSMEMByteOffset(ST))
return ByteOffset;
+
+ assert(isDwordAligned(ByteOffset));
return ByteOffset >> 2;
}
-bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
- int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
- return (hasSMEMByteOffset(ST)) ?
- isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
+Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
+ int64_t ByteOffset) {
+ if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
+ return None;
+
+ int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
+ return isLegalSMRDEncodedImmOffset(ST, EncodedOffset) ?
+ Optional<int64_t>(EncodedOffset) : None;
+}
+
+Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
+ int64_t ByteOffset) {
+ if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
+ return None;
+
+ assert(isCI(ST));
+ int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
+ return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None;
}
// Given Imm, split it into the values to put into the SOffset and ImmOffset
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index a5bada2890d2..859f83206d03 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -648,9 +648,19 @@ bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
bool isArgPassedInSGPR(const Argument *Arg);
-/// \returns The encoding that will be used for \p ByteOffset in the SMRD
-/// offset field.
-int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
+/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
+/// offsets.
+uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
+
+/// \returns The encoding that will be used for \p ByteOffset in the SMRD offset
+/// field, or None if it won't fit. This is useful on all subtargets.
+Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
+ int64_t ByteOffset);
+
+/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
+/// instruction. This is only useful on CI.s
+Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
+ int64_t ByteOffset);
/// \returns true if this offset is small enough to fit in the SMRD
/// offset field. \p ByteOffset should be the offset in bytes and
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
index 720b0de8280d..74f18b7e7c94 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
@@ -788,8 +788,9 @@ body: |
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048575
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262143, 0, 0 :: (load 4, addrspace 4)
- ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]]
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575
+ ; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
+ ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048575
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
@@ -872,8 +873,9 @@ body: |
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1073741823
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 268435455, 0, 0 :: (load 4, addrspace 4)
- ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]]
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
+ ; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
+ ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1073741823
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
index 1cdf1d391f2e..1a3cc72fe5b2 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -368,9 +368,16 @@ done:
; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
; GCN: s_and_saveexec_b64
-; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
-; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
+; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
+; SI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+
+; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
+; VI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
+; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+
+; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffffff{{$}}
+
; GCN: s_or_b64 exec, exec
define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) {
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
index 4c25ebb617b5..f35bb54e955c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
@@ -1,9 +1,11 @@
-;RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI,SICI
+; RUN: llc < %s -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,CI,SICI
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI
;GCN-LABEL: {{^}}s_buffer_load_imm:
;GCN-NOT: s_waitcnt;
;SI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x1
+;CI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x1
;VI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x4
define amdgpu_ps void @s_buffer_load_imm(<4 x i32> inreg %desc) {
main_body:
@@ -38,6 +40,7 @@ main_body:
;GCN-LABEL: {{^}}s_buffer_loadx2_imm:
;GCN-NOT: s_waitcnt;
;SI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
+;CI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
;VI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x40
define amdgpu_ps void @s_buffer_loadx2_imm(<4 x i32> inreg %desc) {
main_body:
@@ -78,6 +81,7 @@ main_body:
;GCN-LABEL: {{^}}s_buffer_loadx3_imm:
;GCN-NOT: s_waitcnt;
;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
+;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x40
define amdgpu_ps void @s_buffer_loadx3_imm(<4 x i32> inreg %desc) {
main_body:
@@ -107,6 +111,7 @@ main_body:
;GCN-LABEL: {{^}}s_buffer_loadx3_index_divergent:
;GCN-NOT: s_waitcnt;
;SI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
+;CI: buffer_load_dwordx3 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
;VI: buffer_load_dwordx3 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_ps void @s_buffer_loadx3_index_divergent(<4 x i32> inreg %desc, i32 %index) {
main_body:
@@ -122,6 +127,7 @@ main_body:
;GCN-LABEL: {{^}}s_buffer_loadx4_imm:
;GCN-NOT: s_waitcnt;
;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x32
+;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x32
;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0xc8
define amdgpu_ps void @s_buffer_loadx4_imm(<4 x i32> inreg %desc) {
main_body:
@@ -168,6 +174,7 @@ main_body:
;GCN-LABEL: {{^}}s_buffer_load_imm_mergex2:
;GCN-NOT: s_waitcnt;
;SI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x1
+;CI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x1
;VI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x4
define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) {
main_body:
@@ -182,6 +189,7 @@ main_body:
;GCN-LABEL: {{^}}s_buffer_load_imm_mergex4:
;GCN-NOT: s_waitcnt;
;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x2
+;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x2
;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x8
define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) {
main_body:
@@ -236,6 +244,214 @@ bb1: ; preds = %main_body
ret void
}
+; GCN-LABEL: {{^}}s_buffer_load_imm_neg1:
+; GCN: s_mov_b32 [[K:s[0-9]+]], -1{{$}}
+; GCN: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_neg1(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_neg4:
+; SI: s_mov_b32 [[K:s[0-9]+]], -4{{$}}
+; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+
+; CI: s_buffer_load_dword s0, s[0:3], 0x3fffffff{{$}}
+
+; VI: s_mov_b32 [[K:s[0-9]+]], -4{{$}}
+; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_neg4(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_neg8:
+; SI: s_mov_b32 [[K:s[0-9]+]], -8{{$}}
+; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+
+; CI: s_buffer_load_dword s0, s[0:3], 0x3ffffffe{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_neg8(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_bit31:
+; SI: s_brev_b32 [[K:s[0-9]+]], 1{{$}}
+; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+
+; CI: s_buffer_load_dword s0, s[0:3], 0x20000000{{$}}
+
+; VI: s_brev_b32 [[K:s[0-9]+]], 1{{$}}
+; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_bit31(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_bit30:
+; SI: s_mov_b32 [[K:s[0-9]+]], 2.0{{$}}
+; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+
+; CI: s_buffer_load_dword s0, s[0:3], 0x10000000{{$}}
+
+; VI: s_mov_b32 [[K:s[0-9]+]], 2.0{{$}}
+; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_bit30(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_bit29:
+; SI: s_brev_b32 [[K:s[0-9]+]], 4{{$}}
+; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+
+; CI: s_buffer_load_dword s0, s[0:3], 0x8000000{{$}}
+
+; VI: s_brev_b32 [[K:s[0-9]+]], 4{{$}}
+; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_bit29(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_bit21:
+; SI: s_mov_b32 [[K:s[0-9]+]], 0x200000{{$}}
+; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+
+; CI: s_buffer_load_dword s0, s[0:3], 0x80000{{$}}
+
+; VI: s_mov_b32 [[K:s[0-9]+]], 0x200000{{$}}
+; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_bit21(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_bit20:
+; SI: s_mov_b32 [[K:s[0-9]+]], 0x100000{{$}}
+; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+
+; CI: s_buffer_load_dword s0, s[0:3], 0x40000{{$}}
+
+; VI: s_mov_b32 [[K:s[0-9]+]], 0x100000{{$}}
+; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_bit20(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_neg_bit20:
+; SI: s_mov_b32 [[K:s[0-9]+]], 0xfff00000{{$}}
+; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+
+; CI: s_buffer_load_dword s0, s[0:3], 0x3ffc0000{{$}}
+
+; VI: s_mov_b32 [[K:s[0-9]+]], 0xfff00000{{$}}
+; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_neg_bit20(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_bit19:
+; SI: s_mov_b32 [[K:s[0-9]+]], 0x80000{{$}}
+; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+
+; CI s_buffer_load_dword s0, s[0:3], 0x20000{{$}}
+
+; VI s_buffer_load_dword s0, s[0:3], 0x20000{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_bit19(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_neg_bit19:
+; SI: s_mov_b32 [[K:s[0-9]+]], 0xfff80000{{$}}
+; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+
+; CI s_buffer_load_dword s0, s[0:3], 0x20000{{$}}
+
+; VI s_buffer_load_dword s0, s[0:3], 0x20000{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_neg_bit19(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_255:
+; SICI: s_movk_i32 [[K:s[0-9]+]], 0xff{{$}}
+; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+
+; VI: s_buffer_load_dword s0, s[0:3], 0xff{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_255(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 255, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_256:
+; SICI: s_buffer_load_dword s0, s[0:3], 0x40{{$}}
+; VI: s_buffer_load_dword s0, s[0:3], 0x100{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_256(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 256, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_1016:
+; SICI: s_buffer_load_dword s0, s[0:3], 0xfe{{$}}
+; VI: s_buffer_load_dword s0, s[0:3], 0x3f8{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_1016(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1016, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_1020:
+; SICI: s_buffer_load_dword s0, s[0:3], 0xff{{$}}
+; VI: s_buffer_load_dword s0, s[0:3], 0x3fc{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_1020(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1020, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_1021:
+; SICI: s_movk_i32 [[K:s[0-9]+]], 0x3fd{{$}}
+; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_1021(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1021, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_1024:
+; SI: s_movk_i32 [[K:s[0-9]+]], 0x400{{$}}
+; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+
+; CI: s_buffer_load_dword s0, s[0:3], 0x100{{$}}
+
+; VI: s_buffer_load_dword s0, s[0:3], 0x400{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_1024(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_1025:
+; SICI: s_movk_i32 [[K:s[0-9]+]], 0x401{{$}}
+; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+
+; VI: s_buffer_load_dword s0, s[0:3], 0x401{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_1025(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1025, i32 0)
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}s_buffer_load_imm_1028:
+; SI: s_movk_i32 [[K:s[0-9]+]], 0x400{{$}}
+; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
+
+; CI: s_buffer_load_dword s0, s[0:3], 0x100{{$}}
+; VI: s_buffer_load_dword s0, s[0:3], 0x400{{$}}
+define amdgpu_ps i32 @s_buffer_load_imm_1028(<4 x i32> inreg %desc) {
+ %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
+ ret i32 %load
+}
+
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
More information about the llvm-commits
mailing list