[llvm] [AMDGPU] Fix negative immediate offset for unbuffered smem loads (PR #79553)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 2 15:32:34 PST 2024
https://github.com/vangthao95 updated https://github.com/llvm/llvm-project/pull/79553
>From 48b758ade80749278c1df6be280f46fafe359503 Mon Sep 17 00:00:00 2001
From: Vang Thao <Vang.Thao at amd.com>
Date: Fri, 26 Jan 2024 06:50:16 +0000
Subject: [PATCH 1/2] [AMDGPU] Fix negative immediate offset for unbuffered
smem loads
For unbuffered smem loads, It is illegal and undefined for the immediate offset
to be negative if the resulting IOFFSET + (SGPR[Offset] or M0 or zero) is
negative. As a workaround for this issue, if there is no SGPR[Offset] and the
immediate offset is negative, subtract the absolute value of the immediate
offset from the base address. Then change the immediate offset to 0.
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 4 +
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 49 ++++-
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 14 +-
.../AMDGPU/AMDGPUInstructionSelector.cpp | 35 ++-
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 8 +-
llvm/lib/Target/AMDGPU/SMInstructions.td | 3 +-
.../GlobalISel/inst-select-load-constant.mir | 203 +++++++++++++++++-
.../AMDGPU/cgp-addressing-modes-smem.ll | 3 +-
.../AMDGPU/gfx12_scalar_subword_loads.ll | 6 +-
llvm/test/CodeGen/AMDGPU/global-saddr-load.ll | 21 +-
10 files changed, 319 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 152f495a452ba..0017f51ee5d92 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -116,6 +116,10 @@ def gi_smrd_sgpr_imm :
GIComplexOperandMatcher<s64, "selectSmrdSgprImm">,
GIComplexPatternEquiv<SMRDSgprImm>;
+def gi_smrd_prefetch_imm :
+ GIComplexOperandMatcher<s64, "selectSmrdPrefetchImm">,
+ GIComplexPatternEquiv<SMRDPrefetchImm>;
+
def gi_flat_offset :
GIComplexOperandMatcher<s64, "selectFlatOffset">,
GIComplexPatternEquiv<FlatOffset>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 5278b552a6551..73374c6f9eddd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2071,13 +2071,16 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
// true, match only 32-bit immediate offsets available on CI.
bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
SDValue *SOffset, SDValue *Offset,
- bool Imm32Only,
- bool IsBuffer) const {
+ bool Imm32Only, bool IsBuffer,
+ bool IsPrefetch,
+ bool HasSOffset) const {
if (SOffset && Offset) {
assert(!Imm32Only && !IsBuffer);
SDValue B;
- return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) &&
- SelectSMRDBaseOffset(B, SBase, SOffset, nullptr);
+ return SelectSMRDBaseOffset(Addr, B, nullptr, Offset, false, false,
+ IsPrefetch, true) &&
+ SelectSMRDBaseOffset(B, SBase, SOffset, nullptr, false, false,
+ IsPrefetch, true);
}
// A 32-bit (address + offset) should not cause unsigned 32-bit integer
@@ -2096,12 +2099,39 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
}
if (!N0 || !N1)
return false;
+
+ bool Selected = false;
if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer)) {
SBase = N0;
- return true;
+ Selected = true;
}
+
if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer)) {
SBase = N1;
+ Selected = true;
+ }
+
+ if (Selected) {
+ // For unbuffered smem loads, it is illegal and undefined for the Immediate
+ // Offset to be negative if the resulting (Offset + (M0 or SOffset or zero)
+ // is negative. Handle the case where the Immediate Offset is negative and
+ // there is no SOffset.
+ //
+ // FIXME: Also handle M0 or SOffset case?
+ if (Offset && !HasSOffset && !IsBuffer && !IsPrefetch &&
+ Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(*Offset)) {
+ if (C->getSExtValue() < 0) {
+ SDLoc SL(SBase);
+ *Offset = CurDAG->getTargetConstant(std::abs(C->getSExtValue()), SL,
+ MVT::i32);
+ const SDValue Ops[] = {SBase, *Offset};
+ SBase = SDValue(
+ CurDAG->getMachineNode(AMDGPU::S_SUB_U64, SL, MVT::i64, Ops), 0);
+ *Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
+ }
+ }
+ }
return true;
}
return false;
@@ -2109,8 +2139,8 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
SDValue *SOffset, SDValue *Offset,
- bool Imm32Only) const {
- if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only)) {
+ bool Imm32Only, bool IsPrefetch) const {
+ if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only, IsPrefetch)) {
SBase = Expand32BitAddress(SBase);
return true;
}
@@ -2169,6 +2199,11 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
/* IsBuffer */ true);
}
+bool AMDGPUDAGToDAGISel::SelectSMRDPrefetchImm(SDValue Addr, SDValue &SBase,
+ SDValue &Offset) const {
+ return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset, false, true);
+}
+
bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
SDValue &Base,
SDValue &Offset) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 3b42d88df0c24..5328ba985474d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -194,11 +194,13 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
SDValue *Offset, bool Imm32Only = false,
bool IsBuffer = false) const;
SDValue Expand32BitAddress(SDValue Addr) const;
- bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset,
- SDValue *Offset, bool Imm32Only = false,
- bool IsBuffer = false) const;
- bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset,
- SDValue *Offset, bool Imm32Only = false) const;
+ bool SelectSMRDBaseOffset(SDValue Addr, SDValue & SBase, SDValue * SOffset,
+ SDValue * Offset, bool Imm32Only = false,
+ bool IsBuffer = false, bool IsPrefetch = false,
+ bool HasSOffset = false) const;
+ bool SelectSMRD(SDValue Addr, SDValue & SBase, SDValue * SOffset,
+ SDValue * Offset, bool Imm32Only = false,
+ bool IsPrefetch = false) const;
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const;
@@ -208,6 +210,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const;
bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
SDValue &Offset) const;
+ bool SelectSMRDPrefetchImm(SDValue Addr, SDValue & SBase, SDValue & Offset)
+ const;
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index f255d098b631c..c7cc701c63dc5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4221,7 +4221,8 @@ AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const {
bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
Register &Base,
Register *SOffset,
- int64_t *Offset) const {
+ int64_t *Offset,
+ bool IsPrefetch) const {
MachineInstr *MI = Root.getParent();
MachineBasicBlock *MBB = MI->getParent();
@@ -4257,6 +4258,27 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
Base = GEPI.SgprParts[0];
*Offset = *EncodedImm;
+ // For unbuffered smem loads, it is illegal and undefined for the Immediate
+ // Offset to be negative if the resulting (Offset + (M0 or SOffset or zero)
+ // is negative. Handle the case where the Immediate Offset is negative and
+ // there is no SOffset.
+ //
+ // FIXME: Also handle M0 or SOffset case?
+ if (!IsPrefetch && *Offset < 0 &&
+ STI.getGeneration() >= AMDGPUSubtarget::GFX11) {
+ // Subtract the absolute value of the offset from the base register and
+ // set the immediate offset to 0.
+ Register SubtractReg =
+ MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_SUB_U64),
+ SubtractReg)
+ .addReg(Base)
+ .addImm(std::abs(*Offset));
+ Base = SubtractReg;
+ *Offset = 0;
+ }
+
return true;
}
@@ -4339,6 +4361,17 @@ AMDGPUInstructionSelector::selectSmrdSgprImm(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}};
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSmrdPrefetchImm(MachineOperand &Root) const {
+ Register Base;
+ int64_t Offset;
+ if (!selectSmrdOffset(Root, Base, /* SOffset= */ nullptr, &Offset, true))
+ return std::nullopt;
+
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}};
+}
+
std::pair<Register, int>
AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root,
uint64_t FlatVariant) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index ef7630f137aca..573bc9260c765 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -220,8 +220,10 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
InstructionSelector::ComplexRendererFns
selectVINTERPModsHi(MachineOperand &Root) const;
- bool selectSmrdOffset(MachineOperand &Root, Register &Base, Register *SOffset,
- int64_t *Offset) const;
+ bool selectSmrdOffset(MachineOperand & Root, Register & Base,
+ Register * SOffset, int64_t * Offset,
+ bool IsPrefetch = false) const;
+
InstructionSelector::ComplexRendererFns
selectSmrdImm(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
@@ -230,6 +232,8 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
selectSmrdSgpr(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectSmrdSgprImm(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectSmrdPrefetchImm(MachineOperand &Root) const;
std::pair<Register, int> selectFlatOffsetImpl(MachineOperand &Root,
uint64_t FlatVariant) const;
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index f3096962e2f3e..33127ac281f67 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -859,6 +859,7 @@ def SMRDSgprImm : ComplexPattern<iPTR, 3, "SelectSMRDSgprImm">;
def SMRDBufferImm : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">;
def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">;
+def SMRDPrefetchImm : ComplexPattern<iPTR, 2, "SelectSMRDPrefetchImm">;
multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> {
@@ -1078,7 +1079,7 @@ def i32imm_one : TImmLeaf <i32, [{
multiclass SMPrefetchPat<string type, TImmLeaf cache_type> {
def : GCNPat <
- (smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, cache_type),
+ (smrd_prefetch (SMRDPrefetchImm i64:$sbase, i32:$offset), timm, timm, cache_type),
(!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, $offset, (i32 SGPR_NULL), (i8 0))
>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
index c44477273dad0..b7010e4c65beb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
@@ -3,7 +3,7 @@
# RUN: llc -amdgpu-global-isel-new-legality -mtriple=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
# RUN: llc -amdgpu-global-isel-new-legality -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
# RUN: llc -amdgpu-global-isel-new-legality -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
-# RUN: llc -amdgpu-global-isel-new-legality -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -amdgpu-global-isel-new-legality -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s
---
@@ -44,6 +44,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4)
; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_s32_from_4
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4)
+ ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 4)
$sgpr0 = COPY %1
@@ -89,6 +96,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4)
; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_v2s16_from_4
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4)
+ ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 4)
$sgpr0 = COPY %1
@@ -133,6 +147,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_v2s32
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 4)
$sgpr0_sgpr1 = COPY %1
@@ -176,6 +197,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_v2s32_align4
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 4)
$sgpr0_sgpr1 = COPY %1
@@ -219,6 +247,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_v4s16_align4
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 4)
$sgpr0_sgpr1 = COPY %1
@@ -263,6 +298,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_v4s32_align4
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 4)
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1
@@ -307,6 +349,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_s64
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 4)
$sgpr0_sgpr1 = COPY %1
@@ -351,6 +400,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_s64_align4
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 4)
$sgpr0_sgpr1 = COPY %1
@@ -395,6 +451,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_v2s64
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 4)
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1
@@ -439,6 +502,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>)
+ ;
+ ; GFX11-LABEL: name: load_constant_v2p1
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>)
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 4)
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1
@@ -483,6 +553,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128)
+ ;
+ ; GFX11-LABEL: name: load_constant_s128_align4
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128)
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 4)
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1
@@ -527,6 +604,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4)
; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_p3_from_4
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4)
+ ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 4)
$sgpr0 = COPY %1
@@ -571,6 +655,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_p4_from_8
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(p4) = G_LOAD %0 :: (load (p4), align 8, addrspace 4)
$sgpr0_sgpr1 = COPY %1
@@ -615,6 +706,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](p999)
+ ;
+ ; GFX11-LABEL: name: load_constant_p999_from_8
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](p999)
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 4)
$sgpr0_sgpr1 = COPY %1
@@ -659,6 +757,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>)
+ ;
+ ; GFX11-LABEL: name: load_constant_v2p3
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>)
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 4)
$sgpr0_sgpr1 = COPY %1
@@ -703,6 +808,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4)
; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_v2s16
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4)
+ ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 4)
$sgpr0 = COPY %1
@@ -747,6 +859,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_v4s16
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 4)
$sgpr0_sgpr1 = COPY %1
@@ -791,6 +910,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_v8s16
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 4)
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1
@@ -835,6 +961,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_v8s32
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 4, addrspace 4)
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %1
@@ -879,6 +1012,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_v16s32
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 4, addrspace 4)
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1
@@ -923,6 +1063,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4)
; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_v8s64
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4)
+ ; GFX11-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>), align 4, addrspace 4)
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1
@@ -971,6 +1118,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load (s32), addrspace 4)
; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_s32_from_4_gep_1020
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load (s32), addrspace 4)
+ ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 1020
%2:sgpr(p4) = G_PTR_ADD %0, %1
@@ -1018,6 +1172,13 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load (s32), addrspace 4)
; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_s32_from_4_gep_1024
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load (s32), addrspace 4)
+ ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 1024
%2:sgpr(p4) = G_PTR_ADD %0, %1
@@ -1067,6 +1228,14 @@ body: |
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575
; GFX10-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (s32), addrspace 4)
; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_s32_from_4_gep_1048575
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575
+ ; GFX11-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (s32), addrspace 4)
+ ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 1048575
%2:sgpr(p4) = G_PTR_ADD %0, %1
@@ -1116,6 +1285,14 @@ body: |
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
; GFX10-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (s32), addrspace 4)
; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_s32_from_4_gep_1048576
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
+ ; GFX11-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (s32), addrspace 4)
+ ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 1048576
%2:sgpr(p4) = G_PTR_ADD %0, %1
@@ -1166,6 +1343,14 @@ body: |
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
; GFX10-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (s32), addrspace 4)
; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_s32_from_4_gep_1073741823
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
+ ; GFX11-NEXT: [[S_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR_IMM [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load (s32), addrspace 4)
+ ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 1073741823
%2:sgpr(p4) = G_PTR_ADD %0, %1
@@ -1236,6 +1421,14 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -1, 0 :: (load (s32), addrspace 4)
; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_s32_from_4_gep_negative_1
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_SUB_U64_:%[0-9]+]]:sreg_64 = S_SUB_U64 [[COPY]], 1
+ ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_SUB_U64_]], 0, 0 :: (load (s32), addrspace 4)
+ ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -1
%2:sgpr(p4) = G_PTR_ADD %0, %1
@@ -1306,6 +1499,14 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -524288, 0 :: (load (s32), addrspace 4)
; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
+ ;
+ ; GFX11-LABEL: name: load_constant_s32_from_4_gep_negative_524288
+ ; GFX11: liveins: $sgpr0_sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX11-NEXT: [[S_SUB_U64_:%[0-9]+]]:sreg_64 = S_SUB_U64 [[COPY]], 524288
+ ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_SUB_U64_]], 0, 0 :: (load (s32), addrspace 4)
+ ; GFX11-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -524288
%2:sgpr(p4) = G_PTR_ADD %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
index 54dc5b8b9d3dd..a76216fc8f6a3 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
@@ -307,10 +307,11 @@ define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr,
;
; GFX12-LABEL: test_sink_smem_offset_neg400:
; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[0:1], 0x190
; GFX12-NEXT: .LBB5_1: ; %loop
; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_load_b32 s3, s[0:1], -0x190
+; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x0
; GFX12-NEXT: s_add_co_i32 s2, s2, -1
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: s_cmp_lg_u32 s2, 0
diff --git a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
index 6c324ddc65466..2ae9524d2a842 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
@@ -21,7 +21,8 @@ define amdgpu_ps void @test_s_load_i8(ptr addrspace(4) inreg %in, ptr addrspace(
define amdgpu_ps void @test_s_load_i8_imm(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) {
; GCN-LABEL: test_s_load_i8_imm:
; GCN: ; %bb.0:
-; GCN-NEXT: s_load_i8 s0, s[0:1], -0x64
+; GCN-NEXT: s_sub_nc_u64 s[0:1], s[0:1], 0x64
+; GCN-NEXT: s_load_i8 s0, s[0:1], 0x0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: v_mov_b32_e32 v2, s0
; GCN-NEXT: global_store_b32 v[0:1], v2, off
@@ -197,7 +198,8 @@ define amdgpu_ps void @test_s_load_i16(ptr addrspace(4) inreg %in, ptr addrspace
define amdgpu_ps void @test_s_load_i16_imm(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) {
; GCN-LABEL: test_s_load_i16_imm:
; GCN: ; %bb.0:
-; GCN-NEXT: s_load_i16 s0, s[0:1], -0xc8
+; GCN-NEXT: s_sub_nc_u64 s[0:1], s[0:1], 0xc8
+; GCN-NEXT: s_load_i16 s0, s[0:1], 0x0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: v_mov_b32_e32 v2, s0
; GCN-NEXT: global_store_b32 v[0:1], v2, off
diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
index 6d99485b91fe4..fa51dc374be23 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
@@ -159,7 +159,8 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4096(ptr addrspace(1) inr
;
; GFX12-LABEL: global_load_saddr_i8_offset_neg4096:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x1000
+; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[2:3], 0x1000
+; GFX12-NEXT: s_load_u8 s0, s[0:1], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
@@ -200,7 +201,8 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4097(ptr addrspace(1) inr
;
; GFX12-LABEL: global_load_saddr_i8_offset_neg4097:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x1001
+; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[2:3], 0x1001
+; GFX12-NEXT: s_load_u8 s0, s[0:1], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
@@ -241,7 +243,8 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4098(ptr addrspace(1) inr
;
; GFX12-LABEL: global_load_saddr_i8_offset_neg4098:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x1002
+; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[2:3], 0x1002
+; GFX12-NEXT: s_load_u8 s0, s[0:1], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
@@ -378,7 +381,8 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg2048(ptr addrspace(1) inr
;
; GFX12-LABEL: global_load_saddr_i8_offset_neg2048:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x800
+; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[2:3], 0x800
+; GFX12-NEXT: s_load_u8 s0, s[0:1], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
@@ -415,7 +419,8 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg2049(ptr addrspace(1) inr
;
; GFX12-LABEL: global_load_saddr_i8_offset_neg2049:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x801
+; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[2:3], 0x801
+; GFX12-NEXT: s_load_u8 s0, s[0:1], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
@@ -452,7 +457,8 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg2050(ptr addrspace(1) inr
;
; GFX12-LABEL: global_load_saddr_i8_offset_neg2050:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x802
+; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[2:3], 0x802
+; GFX12-NEXT: s_load_u8 s0, s[0:1], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
@@ -527,7 +533,8 @@ define amdgpu_ps float @global_load_saddr_i8_offset_0xFFFFFF(ptr addrspace(1) in
;
; GFX12-LABEL: global_load_saddr_i8_offset_0xFFFFFF:
; GFX12: ; %bb.0:
-; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x800000
+; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[2:3], 0x800000
+; GFX12-NEXT: s_load_u8 s0, s[0:1], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
>From 8194ef18f3b4a3898f4ec1791a5b81a4f70f35ec Mon Sep 17 00:00:00 2001
From: Vang Thao <Vang.Thao at amd.com>
Date: Fri, 2 Feb 2024 23:01:27 +0000
Subject: [PATCH 2/2] Fix formatting issues and change if statements in
SelectSMRDBaseOffset() to if-else if.
---
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 9 ++++-----
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 12 ++++++------
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 5 ++---
3 files changed, 12 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 73374c6f9eddd..ffa2f42a9e7a9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2104,9 +2104,7 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer)) {
SBase = N0;
Selected = true;
- }
-
- if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer)) {
+ } else if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer)) {
SBase = N1;
Selected = true;
}
@@ -2140,7 +2138,8 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
SDValue *SOffset, SDValue *Offset,
bool Imm32Only, bool IsPrefetch) const {
- if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only, IsPrefetch)) {
+ if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only,
+ IsPrefetch)) {
SBase = Expand32BitAddress(SBase);
return true;
}
@@ -2200,7 +2199,7 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
}
bool AMDGPUDAGToDAGISel::SelectSMRDPrefetchImm(SDValue Addr, SDValue &SBase,
- SDValue &Offset) const {
+ SDValue &Offset) const {
return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset, false, true);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 5328ba985474d..24bb9fd905af5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -194,12 +194,12 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
SDValue *Offset, bool Imm32Only = false,
bool IsBuffer = false) const;
SDValue Expand32BitAddress(SDValue Addr) const;
- bool SelectSMRDBaseOffset(SDValue Addr, SDValue & SBase, SDValue * SOffset,
- SDValue * Offset, bool Imm32Only = false,
+ bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset,
+ SDValue *Offset, bool Imm32Only = false,
bool IsBuffer = false, bool IsPrefetch = false,
bool HasSOffset = false) const;
- bool SelectSMRD(SDValue Addr, SDValue & SBase, SDValue * SOffset,
- SDValue * Offset, bool Imm32Only = false,
+ bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset,
+ SDValue *Offset, bool Imm32Only = false,
bool IsPrefetch = false) const;
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
@@ -210,8 +210,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const;
bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
SDValue &Offset) const;
- bool SelectSMRDPrefetchImm(SDValue Addr, SDValue & SBase, SDValue & Offset)
- const;
+ bool SelectSMRDPrefetchImm(SDValue Addr, SDValue &SBase,
+ SDValue &Offset) const;
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 573bc9260c765..473068e7f6ac9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -220,9 +220,8 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
InstructionSelector::ComplexRendererFns
selectVINTERPModsHi(MachineOperand &Root) const;
- bool selectSmrdOffset(MachineOperand & Root, Register & Base,
- Register * SOffset, int64_t * Offset,
- bool IsPrefetch = false) const;
+ bool selectSmrdOffset(MachineOperand &Root, Register &Base, Register *SOffset,
+ int64_t *Offset, bool IsPrefetch = false) const;
InstructionSelector::ComplexRendererFns
selectSmrdImm(MachineOperand &Root) const;
More information about the llvm-commits
mailing list