[llvm] r300040 - [AMDGPU] SDWA: make pass global
Sam Kolton via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 12 02:36:05 PDT 2017
Author: skolton
Date: Wed Apr 12 04:36:05 2017
New Revision: 300040
URL: http://llvm.org/viewvc/llvm-project?rev=300040&view=rev
Log:
[AMDGPU] SDWA: make pass global
Summary: Remove checks for basic blocks.
Reviewers: vpykhtin, rampitec, arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye
Differential Revision: https://reviews.llvm.org/D31935
Modified:
llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp?rev=300040&r1=300039&r2=300040&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp Wed Apr 12 04:36:05 2017
@@ -63,7 +63,7 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override;
- void matchSDWAOperands(MachineBasicBlock &MBB);
+ void matchSDWAOperands(MachineFunction &MF);
bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
StringRef getPassName() const override { return "SI Peephole SDWA"; }
@@ -196,11 +196,6 @@ static raw_ostream& operator<<(raw_ostre
#endif
-static bool isSameBB(const MachineInstr *FirstMI, const MachineInstr *SecondMI) {
- assert(FirstMI && SecondMI);
- return FirstMI->getParent() == SecondMI->getParent();
-}
-
static void copyRegOperand(MachineOperand &To, const MachineOperand &From) {
assert(To.isReg() && From.isReg());
To.setReg(From.getReg());
@@ -266,10 +261,9 @@ MachineInstr *SDWASrcOperand::potentialT
if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo()))
continue;
- // If there exist use of dst in another basic block or use of superreg of
- // dst then we should not combine this opernad
- if (!isSameBB(PotentialMO.getParent(), getParentInst()) ||
- !isSameReg(PotentialMO, *Replaced))
+ // If there exist use of superreg of dst then we should not combine this
+ // opernad
+ if (!isSameReg(PotentialMO, *Replaced))
return nullptr;
// Check that PotentialMI is only instruction that uses dst reg
@@ -329,8 +323,7 @@ MachineInstr *SDWADstOperand::potentialT
if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo()))
continue;
- if (!isSameBB(getParentInst(), PotentialMO.getParent()) ||
- !isSameReg(*Replaced, PotentialMO))
+ if (!isSameReg(*Replaced, PotentialMO))
return nullptr;
// Check that ParentMI is the only instruction that uses replaced register
@@ -389,7 +382,7 @@ Optional<int64_t> SIPeepholeSDWA::foldTo
continue;
const MachineInstr *DefInst = Def.getParent();
- if (!TII->isFoldableCopy(*DefInst) || !isSameBB(Op.getParent(), DefInst))
+ if (!TII->isFoldableCopy(*DefInst))
return None;
const MachineOperand &Copied = DefInst->getOperand(1);
@@ -403,179 +396,181 @@ Optional<int64_t> SIPeepholeSDWA::foldTo
return None;
}
-void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
- for (MachineInstr &MI : MBB) {
- unsigned Opcode = MI.getOpcode();
- switch (Opcode) {
- case AMDGPU::V_LSHRREV_B32_e32:
- case AMDGPU::V_ASHRREV_I32_e32:
- case AMDGPU::V_LSHLREV_B32_e32: {
- // from: v_lshrrev_b32_e32 v1, 16/24, v0
- // to SDWA src:v0 src_sel:WORD_1/BYTE_3
-
- // from: v_ashrrev_i32_e32 v1, 16/24, v0
- // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1
-
- // from: v_lshlrev_b32_e32 v1, 16/24, v0
- // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD
- MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
- auto Imm = foldToImm(*Src0);
- if (!Imm)
+void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ case AMDGPU::V_LSHRREV_B32_e32:
+ case AMDGPU::V_ASHRREV_I32_e32:
+ case AMDGPU::V_LSHLREV_B32_e32: {
+ // from: v_lshrrev_b32_e32 v1, 16/24, v0
+ // to SDWA src:v0 src_sel:WORD_1/BYTE_3
+
+ // from: v_ashrrev_i32_e32 v1, 16/24, v0
+ // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1
+
+ // from: v_lshlrev_b32_e32 v1, 16/24, v0
+ // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ auto Imm = foldToImm(*Src0);
+ if (!Imm)
+ break;
+
+ if (*Imm != 16 && *Imm != 24)
+ break;
+
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+ if (TRI->isPhysicalRegister(Src1->getReg()) ||
+ TRI->isPhysicalRegister(Dst->getReg()))
+ break;
+
+ if (Opcode == AMDGPU::V_LSHLREV_B32_e32) {
+ auto SDWADst = make_unique<SDWADstOperand>(
+ Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
+ SDWAOperands[&MI] = std::move(SDWADst);
+ ++NumSDWAPatternsFound;
+ } else {
+ auto SDWASrc = make_unique<SDWASrcOperand>(
+ Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
+ Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
+ SDWAOperands[&MI] = std::move(SDWASrc);
+ ++NumSDWAPatternsFound;
+ }
break;
+ }
- if (*Imm != 16 && *Imm != 24)
+ case AMDGPU::V_LSHRREV_B16_e32:
+ case AMDGPU::V_ASHRREV_I16_e32:
+ case AMDGPU::V_LSHLREV_B16_e32: {
+ // from: v_lshrrev_b16_e32 v1, 8, v0
+ // to SDWA src:v0 src_sel:BYTE_1
+
+ // from: v_ashrrev_i16_e32 v1, 8, v0
+ // to SDWA src:v0 src_sel:BYTE_1 sext:1
+
+ // from: v_lshlrev_b16_e32 v1, 8, v0
+ // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ auto Imm = foldToImm(*Src0);
+ if (!Imm || *Imm != 8)
+ break;
+
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+ if (TRI->isPhysicalRegister(Src1->getReg()) ||
+ TRI->isPhysicalRegister(Dst->getReg()))
+ break;
+
+ if (Opcode == AMDGPU::V_LSHLREV_B16_e32) {
+ auto SDWADst =
+ make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
+ SDWAOperands[&MI] = std::move(SDWADst);
+ ++NumSDWAPatternsFound;
+ } else {
+ auto SDWASrc = make_unique<SDWASrcOperand>(
+ Src1, Dst, BYTE_1, false, false,
+ Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
+ SDWAOperands[&MI] = std::move(SDWASrc);
+ ++NumSDWAPatternsFound;
+ }
break;
+ }
- MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (TRI->isPhysicalRegister(Src1->getReg()) ||
- TRI->isPhysicalRegister(Dst->getReg()))
- break;
+ case AMDGPU::V_BFE_I32:
+ case AMDGPU::V_BFE_U32: {
+ // e.g.:
+ // from: v_bfe_u32 v1, v0, 8, 8
+ // to SDWA src:v0 src_sel:BYTE_1
+
+ // offset | width | src_sel
+ // ------------------------
+ // 0 | 8 | BYTE_0
+ // 0 | 16 | WORD_0
+ // 0 | 32 | DWORD ?
+ // 8 | 8 | BYTE_1
+ // 16 | 8 | BYTE_2
+ // 16 | 16 | WORD_1
+ // 24 | 8 | BYTE_3
+
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ auto Offset = foldToImm(*Src1);
+ if (!Offset)
+ break;
+
+ MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ auto Width = foldToImm(*Src2);
+ if (!Width)
+ break;
+
+ SdwaSel SrcSel = DWORD;
+
+ if (*Offset == 0 && *Width == 8)
+ SrcSel = BYTE_0;
+ else if (*Offset == 0 && *Width == 16)
+ SrcSel = WORD_0;
+ else if (*Offset == 0 && *Width == 32)
+ SrcSel = DWORD;
+ else if (*Offset == 8 && *Width == 8)
+ SrcSel = BYTE_1;
+ else if (*Offset == 16 && *Width == 8)
+ SrcSel = BYTE_2;
+ else if (*Offset == 16 && *Width == 16)
+ SrcSel = WORD_1;
+ else if (*Offset == 24 && *Width == 8)
+ SrcSel = BYTE_3;
+ else
+ break;
+
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+ if (TRI->isPhysicalRegister(Src0->getReg()) ||
+ TRI->isPhysicalRegister(Dst->getReg()))
+ break;
- if (Opcode == AMDGPU::V_LSHLREV_B32_e32) {
- auto SDWADst = make_unique<SDWADstOperand>(
- Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
- DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
- SDWAOperands[&MI] = std::move(SDWADst);
- ++NumSDWAPatternsFound;
- } else {
auto SDWASrc = make_unique<SDWASrcOperand>(
- Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
- Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true);
+ Src0, Dst, SrcSel, false, false,
+ Opcode == AMDGPU::V_BFE_U32 ? false : true);
DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
SDWAOperands[&MI] = std::move(SDWASrc);
++NumSDWAPatternsFound;
- }
- break;
- }
-
- case AMDGPU::V_LSHRREV_B16_e32:
- case AMDGPU::V_ASHRREV_I16_e32:
- case AMDGPU::V_LSHLREV_B16_e32: {
- // from: v_lshrrev_b16_e32 v1, 8, v0
- // to SDWA src:v0 src_sel:BYTE_1
-
- // from: v_ashrrev_i16_e32 v1, 8, v0
- // to SDWA src:v0 src_sel:BYTE_1 sext:1
-
- // from: v_lshlrev_b16_e32 v1, 8, v0
- // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD
- MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
- auto Imm = foldToImm(*Src0);
- if (!Imm || *Imm != 8)
break;
+ }
+ case AMDGPU::V_AND_B32_e32: {
+ // e.g.:
+ // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
+ // to SDWA src:v0 src_sel:WORD_0/BYTE_0
+
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ auto Imm = foldToImm(*Src0);
+ if (!Imm)
+ break;
- MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+ if (*Imm != 0x0000ffff && *Imm != 0x000000ff)
+ break;
- if (TRI->isPhysicalRegister(Src1->getReg()) ||
- TRI->isPhysicalRegister(Dst->getReg()))
- break;
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+ if (TRI->isPhysicalRegister(Src1->getReg()) ||
+ TRI->isPhysicalRegister(Dst->getReg()))
+ break;
- if (Opcode == AMDGPU::V_LSHLREV_B16_e32) {
- auto SDWADst =
- make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
- DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
- SDWAOperands[&MI] = std::move(SDWADst);
- ++NumSDWAPatternsFound;
- } else {
auto SDWASrc = make_unique<SDWASrcOperand>(
- Src1, Dst, BYTE_1, false, false,
- Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true);
+ Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
SDWAOperands[&MI] = std::move(SDWASrc);
++NumSDWAPatternsFound;
- }
- break;
- }
-
- case AMDGPU::V_BFE_I32:
- case AMDGPU::V_BFE_U32: {
- // e.g.:
- // from: v_bfe_u32 v1, v0, 8, 8
- // to SDWA src:v0 src_sel:BYTE_1
-
- // offset | width | src_sel
- // ------------------------
- // 0 | 8 | BYTE_0
- // 0 | 16 | WORD_0
- // 0 | 32 | DWORD ?
- // 8 | 8 | BYTE_1
- // 16 | 8 | BYTE_2
- // 16 | 16 | WORD_1
- // 24 | 8 | BYTE_3
-
- MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- auto Offset = foldToImm(*Src1);
- if (!Offset)
- break;
-
- MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
- auto Width = foldToImm(*Src2);
- if (!Width)
- break;
-
- SdwaSel SrcSel = DWORD;
-
- if (*Offset == 0 && *Width == 8)
- SrcSel = BYTE_0;
- else if (*Offset == 0 && *Width == 16)
- SrcSel = WORD_0;
- else if (*Offset == 0 && *Width == 32)
- SrcSel = DWORD;
- else if (*Offset == 8 && *Width == 8)
- SrcSel = BYTE_1;
- else if (*Offset == 16 && *Width == 8)
- SrcSel = BYTE_2;
- else if (*Offset == 16 && *Width == 16)
- SrcSel = WORD_1;
- else if (*Offset == 24 && *Width == 8)
- SrcSel = BYTE_3;
- else
- break;
-
- MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
- MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
-
- if (TRI->isPhysicalRegister(Src0->getReg()) ||
- TRI->isPhysicalRegister(Dst->getReg()))
- break;
-
- auto SDWASrc = make_unique<SDWASrcOperand>(
- Src0, Dst, SrcSel, false, false,
- Opcode == AMDGPU::V_BFE_U32 ? false : true);
- DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
- SDWAOperands[&MI] = std::move(SDWASrc);
- ++NumSDWAPatternsFound;
- break;
- }
- case AMDGPU::V_AND_B32_e32: {
- // e.g.:
- // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
- // to SDWA src:v0 src_sel:WORD_0/BYTE_0
-
- MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
- auto Imm = foldToImm(*Src0);
- if (!Imm)
- break;
-
- if (*Imm != 0x0000ffff && *Imm != 0x000000ff)
- break;
-
- MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
-
- if (TRI->isPhysicalRegister(Src1->getReg()) ||
- TRI->isPhysicalRegister(Dst->getReg()))
break;
-
- auto SDWASrc = make_unique<SDWASrcOperand>(
- Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
- DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
- SDWAOperands[&MI] = std::move(SDWASrc);
- ++NumSDWAPatternsFound;
- break;
- }
+ }
+ }
}
}
}
@@ -698,24 +693,21 @@ bool SIPeepholeSDWA::runOnMachineFunctio
std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches;
- // FIXME: For now we only combine instructions in one basic block
- for (MachineBasicBlock &MBB : MF) {
- SDWAOperands.clear();
- matchSDWAOperands(MBB);
+ matchSDWAOperands(MF);
- PotentialMatches.clear();
- for (auto &OperandPair : SDWAOperands) {
- auto &Operand = OperandPair.second;
- MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
- if (PotentialMI) {
- PotentialMatches[PotentialMI].push_back(std::move(Operand));
- }
+ for (auto &OperandPair : SDWAOperands) {
+ auto &Operand = OperandPair.second;
+ MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
+ if (PotentialMI) {
+ PotentialMatches[PotentialMI].push_back(std::move(Operand));
}
+ }
- for (auto &PotentialPair : PotentialMatches) {
- MachineInstr &PotentialMI = *PotentialPair.first;
- convertToSDWA(PotentialMI, PotentialPair.second);
- }
+ for (auto &PotentialPair : PotentialMatches) {
+ MachineInstr &PotentialMI = *PotentialPair.first;
+ convertToSDWA(PotentialMI, PotentialPair.second);
}
+
+ SDWAOperands.clear();
return false;
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll?rev=300040&r1=300039&r2=300040&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll Wed Apr 12 04:36:05 2017
@@ -376,28 +376,20 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}mul_add_shr_i32:
-; NOSDWA-NOT: v_mul_u32_u24_sdwa
+; GCN-LABEL: {{^}}add_bb_v2i16:
; NOSDWA-NOT: v_add_i32_sdwa
-; SDWA-NOT: v_mul_u32_u24_sdwa
-; SDWA-NOT: v_add_i32_sdwa
-define void @mul_add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ina, i32 addrspace(1)* %inb, i1 addrspace(1)* %incond) {
+; SDWA: v_add_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+
+define amdgpu_kernel void @add_bb_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
entry:
- %a = load i32, i32 addrspace(1)* %ina, align 4
- %b = load i32, i32 addrspace(1)* %inb, align 4
- %cond = load i1, i1 addrspace(1)* %incond, align 4
- %shra = lshr i32 %a, 16
- %shrb = lshr i32 %b, 16
- br i1 %cond, label %mul_label, label %add_label
-mul_label:
- %mul = mul i32 %shra, %shrb
- br label %store_label
+ %a = load <2 x i16>, <2 x i16> addrspace(1)* %ina, align 4
+ %b = load <2 x i16>, <2 x i16> addrspace(1)* %inb, align 4
+ br label %add_label
add_label:
- %add = add i32 %shra, %shrb
+ %add = add <2 x i16> %a, %b
br label %store_label
store_label:
- %store = phi i32 [%mul, %mul_label], [%add, %add_label]
- store i32 %store, i32 addrspace(1)* %out, align 4
+ store <2 x i16> %add, <2 x i16> addrspace(1)* %out, align 4
ret void
-}
\ No newline at end of file
+}
More information about the llvm-commits
mailing list