[llvm] 5ec1845 - [AArch64][GlobalISel] Add a new reassociation for G_PTR_ADDs.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 14 23:58:04 PDT 2021
Author: Amara Emerson
Date: 2021-09-14T23:57:41-07:00
New Revision: 5ec1845cad9e2bce4eff78b6c7d7c0fca8dfffba
URL: https://github.com/llvm/llvm-project/commit/5ec1845cad9e2bce4eff78b6c7d7c0fca8dfffba
DIFF: https://github.com/llvm/llvm-project/commit/5ec1845cad9e2bce4eff78b6c7d7c0fca8dfffba.diff
LOG: [AArch64][GlobalISel] Add a new reassociation for G_PTR_ADDs.
G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
Improves CTMark -Os on AArch64:
Program before after diff
sqlite3 286932 287024 0.0%
kc 432512 432508 -0.0%
SPASS 412788 412764 -0.0%
pairlocalalign 249460 249416 -0.0%
bullet 475740 475512 -0.0%
7zip-benchmark 568864 568356 -0.1%
consumer-typeset 419088 418648 -0.1%
tramp3d-v4 367628 367224 -0.1%
clamscan 383184 382732 -0.1%
lencod 430028 429284 -0.2%
Geomean difference -0.1%
Differential Revision: https://reviews.llvm.org/D109528
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 50cdfb3785cc2..624f00cabcde5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -577,6 +577,14 @@ class CombinerHelper {
/// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo);
+ // Helpers for reassociation:
+ bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS,
+ BuildFnTy &MatchInfo);
+ bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo);
+ bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS,
+ MachineInstr *RHS, BuildFnTy &MatchInfo);
/// Reassociate pointer calculations with G_ADD involved, to allow better
/// addressing mode usage.
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo);
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 0190b7315ceb8..701753b9c11ba 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4090,9 +4090,91 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
return false;
}
-bool CombinerHelper::matchReassocPtrAdd(
- MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
+ Register Src1Reg = MI.getOperand(1).getReg();
+ if (RHS->getOpcode() != TargetOpcode::G_ADD)
+ return false;
+ auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto NewBase =
+ Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(NewBase.getReg(0));
+ MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
+ Observer.changedInstr(MI);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
+ MachineInstr *LHS,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
+ // if and only if (G_PTR_ADD X, C) has one use.
+ Register LHSBase;
+ Register LHSCstOff;
+ if (!mi_match(MI.getBaseReg(), MRI,
+ m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_ICst(LHSCstOff)))))
+ return false;
+
+ auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ // When we change LHSPtrAdd's offset register we might cause it to use a reg
+ // before its def. Sink the instruction so the outer PTR_ADD to ensure this
+ // doesn't happen.
+ LHSPtrAdd->moveBefore(&MI);
+ Register RHSReg = MI.getOffsetReg();
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(LHSCstOff);
+ Observer.changedInstr(MI);
+ Observer.changingInstr(*LHSPtrAdd);
+ LHSPtrAdd->getOperand(2).setReg(RHSReg);
+ Observer.changedInstr(*LHSPtrAdd);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI,
+ MachineInstr *LHS,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
+ auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
+ if (!LHSPtrAdd)
+ return false;
+
+ Register Src2Reg = MI.getOperand(2).getReg();
+ Register LHSSrc1 = LHSPtrAdd->getBaseReg();
+ Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
+ auto C1 = getConstantVRegVal(LHSSrc2, MRI);
+ if (!C1)
+ return false;
+ auto C2 = getConstantVRegVal(Src2Reg, MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(LHSSrc1);
+ MI.getOperand(2).setReg(NewCst.getReg(0));
+ Observer.changedInstr(MI);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ auto &PtrAdd = cast<GPtrAdd>(MI);
// We're trying to match a few pointer computation patterns here for
// re-association opportunities.
// 1) Isolating a constant operand to be on the RHS, e.g.:
@@ -4101,49 +4183,26 @@ bool CombinerHelper::matchReassocPtrAdd(
// 2) Folding two constants in each sub-tree as long as such folding
// doesn't break a legal addressing mode.
// G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
- Register Src1Reg = MI.getOperand(1).getReg();
- Register Src2Reg = MI.getOperand(2).getReg();
- MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
- MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
-
- if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
- // Try to match example 1).
- if (RHS->getOpcode() != TargetOpcode::G_ADD)
- return false;
- auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
- if (!C2)
- return false;
+ //
+ // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
+ // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
+ // iif (G_PTR_ADD X, C) has one use.
+ MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
+ MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
+
+ // Try to match example 2.
+ if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
+ return true;
- MatchInfo = [=,&MI](MachineIRBuilder &B) {
- LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+ // Try to match example 3.
+ if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
+ return true;
- auto NewBase =
- Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(NewBase.getReg(0));
- MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
- Observer.changedInstr(MI);
- };
- } else {
- // Try to match example 2.
- Register LHSSrc1 = LHS->getOperand(1).getReg();
- Register LHSSrc2 = LHS->getOperand(2).getReg();
- auto C1 = getConstantVRegVal(LHSSrc2, MRI);
- if (!C1)
- return false;
- auto C2 = getConstantVRegVal(Src2Reg, MRI);
- if (!C2)
- return false;
+ // Try to match example 1.
+ if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
+ return true;
- MatchInfo = [=, &MI](MachineIRBuilder &B) {
- auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(LHSSrc1);
- MI.getOperand(2).setReg(NewCst.getReg(0));
- Observer.changedInstr(MI);
- };
- }
- return !reassociationCanBreakAddressingModePattern(MI);
+ return false;
}
bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
index 1936473f15eb3..72cfa14482f46 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir
@@ -184,3 +184,82 @@ body: |
G_STORE %ptr_to_int(s64), %10(p0) :: (store 8)
$w0 = COPY %7(s32)
RET_ReallyLR implicit $w0
+...
+---
+name: reassoc_cst_inner_lhs
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+ - { reg: '$x3' }
+body: |
+ bb.1:
+ liveins: $w0, $x1, $x2, $x3
+
+ ; CHECK-LABEL: name: reassoc_cst_inner_lhs
+ ; CHECK: liveins: $w0, $x1, $x2, $x3
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x3
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY1]], [[C1]](s64)
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[SHL]](s64)
+ ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C]](s64)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
+ ; CHECK: $w0 = COPY [[LOAD]](s32)
+ ; CHECK: RET_ReallyLR
+ %1:_(p0) = COPY $x1
+ %2:_(p0) = COPY $x2
+ %3:_(s64) = COPY $x3
+ %8:_(s64) = G_CONSTANT i64 40
+ %9:_(p0) = G_PTR_ADD %2, %8(s64)
+ %10:_(s64) = G_CONSTANT i64 2
+ %11:_(s64) = G_SHL %3, %10
+ %12:_(p0) = G_PTR_ADD %9, %11(s64)
+ %14:_(s32) = G_LOAD %12(p0) :: (load (s32))
+ $w0 = COPY %14
+ RET_ReallyLR
+
+...
+---
+name: reassoc_cst_inner_lhs_multiuse
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+ - { reg: '$x3' }
+body: |
+ bb.1:
+ liveins: $w0, $x1, $x2, $x3
+
+ ; CHECK-LABEL: name: reassoc_cst_inner_lhs_multiuse
+ ; CHECK: liveins: $w0, $x1, $x2, $x3
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x3
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY1]], [[C1]](s64)
+ ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[SHL]](s64)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
+ ; CHECK: $w0 = COPY [[LOAD]](s32)
+ ; CHECK: $x0 = COPY [[PTR_ADD]](p0)
+ ; CHECK: RET_ReallyLR
+ %1:_(p0) = COPY $x1
+ %2:_(p0) = COPY $x2
+ %3:_(s64) = COPY $x3
+ %8:_(s64) = G_CONSTANT i64 40
+ %9:_(p0) = G_PTR_ADD %2, %8(s64)
+ %10:_(s64) = G_CONSTANT i64 2
+ %11:_(s64) = G_SHL %3, %10
+ %12:_(p0) = G_PTR_ADD %9, %11(s64)
+ %14:_(s32) = G_LOAD %12(p0) :: (load (s32))
+ $w0 = COPY %14
+ $x0 = COPY %9
+ RET_ReallyLR
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
index 11ed050f473ff..8f5e9b732864f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
@@ -336,32 +336,22 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace(
define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) {
; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_mov_b64 s[4:5], 0x400
-; GFX6-NEXT: v_mov_b32_e32 v2, s4
-; GFX6-NEXT: v_mov_b32_e32 v3, s5
-; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
-; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
+; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NEXT: s_mov_b32 s2, s5
-; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_mov_b64 s[4:5], 0x400
-; GFX7-NEXT: v_mov_b32_e32 v2, s4
-; GFX7-NEXT: v_mov_b32_e32 v3, s5
-; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
-; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
+; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
-; GFX7-NEXT: s_mov_b32 s2, s5
-; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
; GFX7-NEXT: s_endpgm
%gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 256
%gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %soffset
@@ -433,25 +423,27 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(i32 addrspace
; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX6-NEXT: s_add_u32 s0, s2, 0x3ffc
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
-; GFX6-NEXT: s_addc_u32 s1, s3, 0
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset:
; GFX7: ; %bb.0:
; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX7-NEXT: s_add_u32 s0, s2, 0x3ffc
; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
-; GFX7-NEXT: s_addc_u32 s1, s3, 0
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: s_mov_b32 s1, s3
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
-; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
; GFX7-NEXT: s_endpgm
%gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 4095
%gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %voffset
@@ -790,31 +782,21 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspac
define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspace(1)* %ptr, i32 inreg %soffset) {
; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_mov_b64 s[4:5], 0x400
-; GFX6-NEXT: v_mov_b32_e32 v2, s4
-; GFX6-NEXT: v_mov_b32_e32 v3, s5
-; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
-; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
+; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NEXT: s_mov_b32 s2, s5
-; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
+; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_mov_b64 s[4:5], 0x400
-; GFX7-NEXT: v_mov_b32_e32 v2, s4
-; GFX7-NEXT: v_mov_b32_e32 v3, s5
-; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
-; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
+; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
-; GFX7-NEXT: s_mov_b32 s2, s5
-; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
+; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
%gep0 = getelementptr float, float addrspace(1)* %ptr, i64 256
@@ -887,24 +869,26 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(float addrspa
; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX6-NEXT: s_add_u32 s4, s2, 0x3ffc
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
-; GFX6-NEXT: s_mov_b32 s6, 0
-; GFX6-NEXT: s_addc_u32 s5, s3, 0
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 glc
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset:
; GFX7: ; %bb.0:
; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX7-NEXT: s_add_u32 s4, s2, 0x3ffc
; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
-; GFX7-NEXT: s_mov_b32 s6, 0
-; GFX7-NEXT: s_addc_u32 s5, s3, 0
-; GFX7-NEXT: s_mov_b32 s7, 0xf000
-; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 glc
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
%gep0 = getelementptr float, float addrspace(1)* %ptr, i64 4095
More information about the llvm-commits
mailing list