[llvm] r204660 - R600/SI: Fix 64-bit bit ops that require the VALU.
Matt Arsenault
Matthew.Arsenault at amd.com
Mon Mar 24 13:08:05 PDT 2014
Author: arsenm
Date: Mon Mar 24 15:08:05 2014
New Revision: 204660
URL: http://llvm.org/viewvc/llvm-project?rev=204660&view=rev
Log:
R600/SI: Fix 64-bit bit ops that require the VALU.
Try to match scalar and first like the other instructions.
Expand 64-bit ands to a pair of 32-bit ands since that is not
available on the VALU.
Modified:
llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
llvm/trunk/lib/Target/R600/SIInstrInfo.h
llvm/trunk/lib/Target/R600/SIInstructions.td
llvm/trunk/test/CodeGen/R600/or.ll
Modified: llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstrInfo.cpp?rev=204660&r1=204659&r2=204660&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIInstrInfo.cpp Mon Mar 24 15:08:05 2014
@@ -879,6 +879,30 @@ void SIInstrInfo::moveToVALU(MachineInst
Inst->eraseFromParent();
continue;
}
+ case AMDGPU::S_AND_B64:
+ splitScalar64BitOp(Worklist, Inst, AMDGPU::S_AND_B32);
+ Inst->eraseFromParent();
+ continue;
+
+ case AMDGPU::S_OR_B64:
+ splitScalar64BitOp(Worklist, Inst, AMDGPU::S_OR_B32);
+ Inst->eraseFromParent();
+ continue;
+
+ case AMDGPU::S_XOR_B64:
+ splitScalar64BitOp(Worklist, Inst, AMDGPU::S_XOR_B32);
+ Inst->eraseFromParent();
+ continue;
+
+ case AMDGPU::S_NOT_B64:
+ splitScalar64BitOp(Worklist, Inst, AMDGPU::S_NOT_B32);
+ Inst->eraseFromParent();
+ continue;
+
+ case AMDGPU::S_BFE_U64:
+ case AMDGPU::S_BFE_I64:
+ case AMDGPU::S_BFM_B64:
+ llvm_unreachable("Moving this op to VALU not implemented");
}
unsigned NewOpcode = getVALUOp(*Inst);
@@ -968,6 +992,58 @@ const TargetRegisterClass *SIInstrInfo::
return &AMDGPU::VReg_32RegClass;
}
+void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst,
+ unsigned Opcode) const {
+ MachineBasicBlock &MBB = *Inst->getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ // We shouldn't need to worry about immediate operands here.
+ MachineOperand &Dest = Inst->getOperand(0);
+ MachineOperand &Src0 = Inst->getOperand(1);
+ MachineOperand &Src1 = Inst->getOperand(2);
+ DebugLoc DL = Inst->getDebugLoc();
+
+ MachineBasicBlock::iterator MII = Inst;
+
+ const MCInstrDesc &InstDesc = get(Opcode);
+ const TargetRegisterClass *RC = MRI.getRegClass(Src0.getReg());
+ const TargetRegisterClass *SubRC = RI.getSubRegClass(RC, AMDGPU::sub0);
+ unsigned SrcReg0Sub0 = buildExtractSubReg(MII, MRI, Src0, RC,
+ AMDGPU::sub0, SubRC);
+ unsigned SrcReg1Sub0 = buildExtractSubReg(MII, MRI, Src1, RC,
+ AMDGPU::sub0, SubRC);
+
+ unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
+ .addReg(SrcReg0Sub0)
+ .addReg(SrcReg1Sub0);
+
+ unsigned SrcReg0Sub1 = buildExtractSubReg(MII, MRI, Src0, RC,
+ AMDGPU::sub1, SubRC);
+ unsigned SrcReg1Sub1 = buildExtractSubReg(MII, MRI, Src1, RC,
+ AMDGPU::sub1, SubRC);
+
+ unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
+ .addReg(SrcReg0Sub1)
+ .addReg(SrcReg1Sub1);
+
+ unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
+ .addReg(DestSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DestSub1)
+ .addImm(AMDGPU::sub1);
+
+ MRI.replaceRegWith(Dest.getReg(), FullDestReg);
+
+ // Try to legalize the operands in case we need to swap the order to keep it
+ // valid.
+ Worklist.push_back(LoHalf);
+ Worklist.push_back(HiHalf);
+}
+
MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
Modified: llvm/trunk/lib/Target/R600/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstrInfo.h?rev=204660&r1=204659&r2=204660&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/R600/SIInstrInfo.h Mon Mar 24 15:08:05 2014
@@ -38,6 +38,10 @@ private:
const TargetRegisterClass *RC,
const MachineOperand &Op) const;
+ void splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst, unsigned Opcode) const;
+
+
public:
explicit SIInstrInfo(AMDGPUTargetMachine &tm);
@@ -92,6 +96,7 @@ public:
bool isSALUInstr(const MachineInstr &MI) const;
static unsigned getVALUOp(const MachineInstr &MI);
+
bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const;
/// \brief Return the correct register class for \p OpNo. For target-specific
Modified: llvm/trunk/lib/Target/R600/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstructions.td?rev=204660&r1=204659&r2=204660&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/R600/SIInstructions.td Mon Mar 24 15:08:05 2014
@@ -1222,7 +1222,7 @@ def S_OR_B32 : SOP2_32 <0x00000010, "S_O
>;
def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64",
- []
+ [(set i64:$dst, (or i64:$src0, i64:$src1))]
>;
def : Pat <
Modified: llvm/trunk/test/CodeGen/R600/or.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/or.ll?rev=204660&r1=204659&r2=204660&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/or.ll (original)
+++ llvm/trunk/test/CodeGen/R600/or.ll Mon Mar 24 15:08:05 2014
@@ -56,15 +56,34 @@ define void @vector_or_i32(i32 addrspace
ret void
}
-; EG-CHECK-LABEL: @or_i64
+; EG-CHECK-LABEL: @scalar_or_i64
; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
-; SI-CHECK-LABEL: @or_i64
+; SI-CHECK-LABEL: @scalar_or_i64
+; SI-CHECK: S_OR_B64
+define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+ %or = or i64 %a, %b
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
+; SI-CHECK-LABEL: @vector_or_i64
; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
-define void @or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
-entry:
- %0 = or i64 %a, %b
- store i64 %0, i64 addrspace(1)* %out
- ret void
+define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+ %loada = load i64 addrspace(1)* %a, align 8
+ %loadb = load i64 addrspace(1)* %a, align 8
+ %or = or i64 %loada, %loadb
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
+; SI-CHECK-LABEL: @scalar_vector_or_i64
+; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
+; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
+define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
+ %loada = load i64 addrspace(1)* %a
+ %or = or i64 %loada, %b
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
}
More information about the llvm-commits
mailing list