[llvm-branch-commits] [llvm-branch] r236449 - Merging r235641:
Tom Stellard
thomas.stellard at amd.com
Mon May 4 12:50:13 PDT 2015
Author: tstellar
Date: Mon May 4 14:50:13 2015
New Revision: 236449
URL: http://llvm.org/viewvc/llvm-project?rev=236449&view=rev
Log:
Merging r235641:
------------------------------------------------------------------------
r235641 | thomas.stellard | 2015-04-23 16:32:01 -0400 (Thu, 23 Apr 2015) | 9 lines
R600/SI: Fix indirect addressing with a negative constant offset
When the base register index of the vector plus the constant offset
was less than zero, we were passing the wrong base register to the indirect
addressing instruction.
In this case, we need to set the base register to v0 and then add
the computed (negative) index to m0.
------------------------------------------------------------------------
Modified:
llvm/branches/release_36/lib/Target/R600/SILowerControlFlow.cpp
llvm/branches/release_36/test/CodeGen/R600/indirect-addressing-si.ll
Modified: llvm/branches/release_36/lib/Target/R600/SILowerControlFlow.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/SILowerControlFlow.cpp?rev=236449&r1=236448&r2=236449&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/SILowerControlFlow.cpp (original)
+++ llvm/branches/release_36/lib/Target/R600/SILowerControlFlow.cpp Mon May 4 14:50:13 2015
@@ -88,7 +88,8 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
- void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
+ void LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0);
+ void computeIndirectRegAndOffset(unsigned VecReg, unsigned &Reg, int &Offset);
void IndirectSrc(MachineInstr &MI);
void IndirectDst(MachineInstr &MI);
@@ -323,7 +324,7 @@ void SILowerControlFlowPass::Kill(Machin
MI.eraseFromParent();
}
-void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
+void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
@@ -333,8 +334,14 @@ void SILowerControlFlowPass::LoadM0(Mach
unsigned Idx = MI.getOperand(3).getReg();
if (AMDGPU::SReg_32RegClass.contains(Idx)) {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .addReg(Idx);
+ if (Offset) {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
+ .addReg(Idx)
+ .addImm(Offset);
+ } else {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(Idx);
+ }
MBB.insert(I, MovRel);
} else {
@@ -363,6 +370,11 @@ void SILowerControlFlowPass::LoadM0(Mach
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
.addReg(AMDGPU::VCC);
+ if (Offset) {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
+ .addReg(AMDGPU::M0)
+ .addImm(Offset);
+ }
// Do the actual move
MBB.insert(I, MovRel);
@@ -384,6 +396,33 @@ void SILowerControlFlowPass::LoadM0(Mach
MI.eraseFromParent();
}
+/// \param @VecReg The register which holds element zero of the vector
+/// being addressed into.
+/// \param[out] @Reg The base register to use in the indirect addressing instruction.
+/// \param[in,out] @Offset As an input, this is the constant offset part of the
+// indirect Index. e.g. v0 = v[VecReg + Offset]
+// As an output, this is a constant value that needs
+// to be added to the value stored in M0.
+void SILowerControlFlowPass::computeIndirectRegAndOffset(unsigned VecReg,
+ unsigned &Reg,
+ int &Offset) {
+ unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0);
+ if (!SubReg)
+ SubReg = VecReg;
+
+ const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg);
+ int RegIdx = TRI->getHWRegIndex(SubReg) + Offset;
+
+ if (RegIdx < 0) {
+ Offset = RegIdx;
+ RegIdx = 0;
+ } else {
+ Offset = 0;
+ }
+
+ Reg = RC->getRegister(RegIdx);
+}
+
void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
@@ -391,18 +430,18 @@ void SILowerControlFlowPass::IndirectSrc
unsigned Dst = MI.getOperand(0).getReg();
unsigned Vec = MI.getOperand(2).getReg();
- unsigned Off = MI.getOperand(4).getImm();
- unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
- if (!SubReg)
- SubReg = Vec;
+ int Off = MI.getOperand(4).getImm();
+ unsigned Reg;
+
+ computeIndirectRegAndOffset(Vec, Reg, Off);
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
- .addReg(SubReg + Off)
+ .addReg(Reg)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Vec, RegState::Implicit);
- LoadM0(MI, MovRel);
+ LoadM0(MI, MovRel, Off);
}
void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
@@ -411,20 +450,20 @@ void SILowerControlFlowPass::IndirectDst
DebugLoc DL = MI.getDebugLoc();
unsigned Dst = MI.getOperand(0).getReg();
- unsigned Off = MI.getOperand(4).getImm();
+ int Off = MI.getOperand(4).getImm();
unsigned Val = MI.getOperand(5).getReg();
- unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
- if (!SubReg)
- SubReg = Dst;
+ unsigned Reg;
+
+ computeIndirectRegAndOffset(Dst, Reg, Off);
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
- .addReg(SubReg + Off, RegState::Define)
+ .addReg(Reg, RegState::Define)
.addReg(Val)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Dst, RegState::Implicit);
- LoadM0(MI, MovRel);
+ LoadM0(MI, MovRel, Off);
}
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
Modified: llvm/branches/release_36/test/CodeGen/R600/indirect-addressing-si.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/test/CodeGen/R600/indirect-addressing-si.ll?rev=236449&r1=236448&r2=236449&view=diff
==============================================================================
--- llvm/branches/release_36/test/CodeGen/R600/indirect-addressing-si.ll (original)
+++ llvm/branches/release_36/test/CodeGen/R600/indirect-addressing-si.ll Mon May 4 14:50:13 2015
@@ -25,6 +25,33 @@ entry:
ret void
}
+; CHECK-LABEL: {{^}}extract_neg_offset_sgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
+; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0
+define void @extract_neg_offset_sgpr(i32 addrspace(1)* %out, i32 %offset) {
+entry:
+ %index = add i32 %offset, -512
+ %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}extract_neg_offset_vgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: v_readfirstlane_b32
+; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
+; CHECK-NEXT: v_movrels_b32_e32 v{{[0-9]}}, v0
+; CHECK: s_cbranch_execnz
+define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) {
+entry:
+ %id = call i32 @llvm.r600.read.tidig.x() #1
+ %index = add i32 %id, -512
+ %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
; CHECK-LABEL: {{^}}insert_w_offset:
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movreld_b32_e32
@@ -47,3 +74,48 @@ entry:
store float %1, float addrspace(1)* %out
ret void
}
+
+; CHECK-LABEL: {{^}}insert_neg_offset_sgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
+; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}}
+define void @insert_neg_offset_sgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, i32 %offset) {
+entry:
+ %index = add i32 %offset, -512
+ %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
+ store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}insert_neg_offset_vgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: v_readfirstlane_b32
+; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
+; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
+; CHECK: s_cbranch_execnz
+define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
+entry:
+ %id = call i32 @llvm.r600.read.tidig.x() #1
+ %index = add i32 %id, -512
+ %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
+ store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}insert_neg_inline_offset_vgpr:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: v_readfirstlane_b32
+; CHECK: s_add_i32 m0, m0, -{{[0-9]+}}
+; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
+; CHECK: s_cbranch_execnz
+define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
+entry:
+ %id = call i32 @llvm.r600.read.tidig.x() #1
+ %index = add i32 %id, -16
+ %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
+ store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #1
+attributes #1 = { nounwind readnone }
More information about the llvm-branch-commits
mailing list