[llvm] r291615 - AMDGPU: Constant fold when immediate is materialized
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 10 15:32:05 PST 2017
Author: arsenm
Date: Tue Jan 10 17:32:04 2017
New Revision: 291615
URL: http://llvm.org/viewvc/llvm-project?rev=291615&view=rev
Log:
AMDGPU: Constant fold when immediate is materialized
In future commits these patterns will appear after moveToVALU changes.
Added:
llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=291615&r1=291614&r2=291615&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp Tue Jan 10 17:32:04 2017
@@ -25,25 +25,6 @@ using namespace llvm;
namespace {
-class SIFoldOperands : public MachineFunctionPass {
-public:
- static char ID;
-
-public:
- SIFoldOperands() : MachineFunctionPass(ID) {
- initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- StringRef getPassName() const override { return "SI Fold Operands"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-};
-
struct FoldCandidate {
MachineInstr *UseMI;
union {
@@ -79,6 +60,36 @@ struct FoldCandidate {
}
};
+class SIFoldOperands : public MachineFunctionPass {
+public:
+ static char ID;
+ MachineRegisterInfo *MRI;
+ const SIInstrInfo *TII;
+ const SIRegisterInfo *TRI;
+
+ void foldOperand(MachineOperand &OpToFold,
+ MachineInstr *UseMI,
+ unsigned UseOpIdx,
+ SmallVectorImpl<FoldCandidate> &FoldList,
+ SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
+
+ void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
+
+public:
+ SIFoldOperands() : MachineFunctionPass(ID) {
+ initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return "SI Fold Operands"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
} // End anonymous namespace.
INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
@@ -141,7 +152,7 @@ static bool updateOperand(FoldCandidate
return false;
}
-static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList,
+static bool isUseMIInFoldList(ArrayRef<FoldCandidate> FoldList,
const MachineInstr *MI) {
for (auto Candidate : FoldList) {
if (Candidate.UseMI == MI)
@@ -150,7 +161,7 @@ static bool isUseMIInFoldList(const std:
return false;
}
-static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
+static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
MachineOperand *OpToFold,
const SIInstrInfo *TII) {
@@ -227,12 +238,12 @@ static bool isUseSafeToFold(const Machin
//return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
}
-static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
- unsigned UseOpIdx,
- std::vector<FoldCandidate> &FoldList,
- SmallVectorImpl<MachineInstr *> &CopiesToReplace,
- const SIInstrInfo *TII, const SIRegisterInfo &TRI,
- MachineRegisterInfo &MRI) {
+void SIFoldOperands::foldOperand(
+ MachineOperand &OpToFold,
+ MachineInstr *UseMI,
+ unsigned UseOpIdx,
+ SmallVectorImpl<FoldCandidate> &FoldList,
+ SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
if (!isUseSafeToFold(*UseMI, UseOp))
@@ -264,7 +275,7 @@ static void foldOperand(MachineOperand &
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
for (MachineRegisterInfo::use_iterator
- RSUse = MRI.use_begin(RegSeqDstReg), RSE = MRI.use_end();
+ RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
RSUse != RSE; ++RSUse) {
MachineInstr *RSUseMI = RSUse->getParent();
@@ -272,7 +283,7 @@ static void foldOperand(MachineOperand &
continue;
foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
- CopiesToReplace, TII, TRI, MRI);
+ CopiesToReplace);
}
return;
@@ -287,8 +298,8 @@ static void foldOperand(MachineOperand &
unsigned DestReg = UseMI->getOperand(0).getReg();
const TargetRegisterClass *DestRC
= TargetRegisterInfo::isVirtualRegister(DestReg) ?
- MRI.getRegClass(DestReg) :
- TRI.getPhysRegClass(DestReg);
+ MRI->getRegClass(DestReg) :
+ TRI->getPhysRegClass(DestReg);
unsigned MovOp = TII->getMovOpcode(DestRC);
if (MovOp == AMDGPU::COPY)
@@ -318,7 +329,7 @@ static void foldOperand(MachineOperand &
const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
const TargetRegisterClass *FoldRC =
- TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);
+ TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
APInt Imm(TII->operandBitWidth(FoldDesc.OpInfo[1].OperandType),
OpToFold.getImm());
@@ -328,8 +339,8 @@ static void foldOperand(MachineOperand &
unsigned UseReg = UseOp.getReg();
const TargetRegisterClass *UseRC
= TargetRegisterInfo::isVirtualRegister(UseReg) ?
- MRI.getRegClass(UseReg) :
- TRI.getPhysRegClass(UseReg);
+ MRI->getRegClass(UseReg) :
+ TRI->getPhysRegClass(UseReg);
assert(Imm.getBitWidth() == 64);
@@ -349,20 +360,51 @@ static void foldOperand(MachineOperand &
}
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
- int32_t LHS, int32_t RHS) {
+ uint32_t LHS, uint32_t RHS) {
switch (Opcode) {
case AMDGPU::V_AND_B32_e64:
+ case AMDGPU::V_AND_B32_e32:
case AMDGPU::S_AND_B32:
Result = LHS & RHS;
return true;
case AMDGPU::V_OR_B32_e64:
+ case AMDGPU::V_OR_B32_e32:
case AMDGPU::S_OR_B32:
Result = LHS | RHS;
return true;
case AMDGPU::V_XOR_B32_e64:
+ case AMDGPU::V_XOR_B32_e32:
case AMDGPU::S_XOR_B32:
Result = LHS ^ RHS;
return true;
+ case AMDGPU::V_LSHL_B32_e64:
+ case AMDGPU::V_LSHL_B32_e32:
+ case AMDGPU::S_LSHL_B32:
+ // The instruction ignores the high bits for out of bounds shifts.
+ Result = LHS << (RHS & 31);
+ return true;
+ case AMDGPU::V_LSHLREV_B32_e64:
+ case AMDGPU::V_LSHLREV_B32_e32:
+ Result = RHS << (LHS & 31);
+ return true;
+ case AMDGPU::V_LSHR_B32_e64:
+ case AMDGPU::V_LSHR_B32_e32:
+ case AMDGPU::S_LSHR_B32:
+ Result = LHS >> (RHS & 31);
+ return true;
+ case AMDGPU::V_LSHRREV_B32_e64:
+ case AMDGPU::V_LSHRREV_B32_e32:
+ Result = RHS >> (LHS & 31);
+ return true;
+ case AMDGPU::V_ASHR_I32_e64:
+ case AMDGPU::V_ASHR_I32_e32:
+ case AMDGPU::S_ASHR_I32:
+ Result = static_cast<int32_t>(LHS) >> (RHS & 31);
+ return true;
+ case AMDGPU::V_ASHRREV_I32_e64:
+ case AMDGPU::V_ASHRREV_I32_e32:
+ Result = static_cast<int32_t>(RHS) >> (LHS & 31);
+ return true;
default:
return false;
}
@@ -390,33 +432,47 @@ static void mutateCopyOp(MachineInstr &M
stripExtraCopyOperands(MI);
}
+static MachineOperand *getImmOrMaterializedImm(MachineRegisterInfo &MRI,
+ MachineOperand &Op) {
+ if (Op.isReg()) {
+ // If this has a subregister, it obviously is a register source.
+ if (Op.getSubReg() != AMDGPU::NoSubRegister)
+ return &Op;
+
+ MachineInstr *Def = MRI.getVRegDef(Op.getReg());
+ if (Def->isMoveImmediate()) {
+ MachineOperand &ImmSrc = Def->getOperand(1);
+ if (ImmSrc.isImm())
+ return &ImmSrc;
+ }
+ }
+
+ return &Op;
+}
+
// Try to simplify operations with a constant that may appear after instruction
// selection.
+// TODO: See if a frame index with a fixed offset can fold.
static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
const SIInstrInfo *TII,
- MachineInstr *MI) {
+ MachineInstr *MI,
+ MachineOperand *ImmOp) {
unsigned Opc = MI->getOpcode();
-
if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
Opc == AMDGPU::S_NOT_B32) {
- MachineOperand &Src0 = MI->getOperand(1);
- if (Src0.isImm()) {
- Src0.setImm(~Src0.getImm());
- mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
- return true;
- }
-
- return false;
+ MI->getOperand(1).ChangeToImmediate(~ImmOp->getImm());
+ mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
+ return true;
}
- if (!MI->isCommutable())
+ int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
+ if (Src1Idx == -1)
return false;
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
- int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
+ MachineOperand *Src0 = getImmOrMaterializedImm(MRI, MI->getOperand(Src0Idx));
+ MachineOperand *Src1 = getImmOrMaterializedImm(MRI, MI->getOperand(Src1Idx));
- MachineOperand *Src0 = &MI->getOperand(Src0Idx);
- MachineOperand *Src1 = &MI->getOperand(Src1Idx);
if (!Src0->isImm() && !Src1->isImm())
return false;
@@ -431,19 +487,26 @@ static bool tryConstantFoldOp(MachineReg
const SIRegisterInfo &TRI = TII->getRegisterInfo();
bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
- Src0->setImm(NewImm);
+ // Be careful to change the right operand, src0 may belong to a different
+ // instruction.
+ MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
MI->RemoveOperand(Src1Idx);
mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
return true;
}
+ if (!MI->isCommutable())
+ return false;
+
if (Src0->isImm() && !Src1->isImm()) {
std::swap(Src0, Src1);
std::swap(Src0Idx, Src1Idx);
}
int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
- if (Opc == AMDGPU::V_OR_B32_e64 || Opc == AMDGPU::S_OR_B32) {
+ if (Opc == AMDGPU::V_OR_B32_e64 ||
+ Opc == AMDGPU::V_OR_B32_e32 ||
+ Opc == AMDGPU::S_OR_B32) {
if (Src1Val == 0) {
// y = or x, 0 => y = copy x
MI->RemoveOperand(Src1Idx);
@@ -459,6 +522,7 @@ static bool tryConstantFoldOp(MachineReg
}
if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
+ MI->getOpcode() == AMDGPU::V_AND_B32_e32 ||
MI->getOpcode() == AMDGPU::S_AND_B32) {
if (Src1Val == 0) {
// y = and x, 0 => y = v_mov_b32 0
@@ -476,29 +540,136 @@ static bool tryConstantFoldOp(MachineReg
}
if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
+ MI->getOpcode() == AMDGPU::V_XOR_B32_e32 ||
MI->getOpcode() == AMDGPU::S_XOR_B32) {
if (Src1Val == 0) {
// y = xor x, 0 => y = copy x
MI->RemoveOperand(Src1Idx);
mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
+ return true;
}
}
return false;
}
+void SIFoldOperands::foldInstOperand(MachineInstr &MI,
+ MachineOperand &OpToFold) const {
+ // We need mutate the operands of new mov instructions to add implicit
+ // uses of EXEC, but adding them invalidates the use_iterator, so defer
+ // this.
+ SmallVector<MachineInstr *, 4> CopiesToReplace;
+ SmallVector<FoldCandidate, 4> FoldList;
+ MachineOperand &Dst = MI.getOperand(0);
+
+ bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
+ if (FoldingImm) {
+ unsigned NumLiteralUses = 0;
+ MachineOperand *NonInlineUse = nullptr;
+ int NonInlineUseOpNo = -1;
+
+ MachineRegisterInfo::use_iterator NextUse, NextInstUse;
+ for (MachineRegisterInfo::use_iterator
+ Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
+ Use != E; Use = NextUse) {
+ NextUse = std::next(Use);
+ MachineInstr *UseMI = Use->getParent();
+ unsigned OpNo = Use.getOperandNo();
+
+ // Folding the immediate may reveal operations that can be constant
+ // folded or replaced with a copy. This can happen for example after
+ // frame indices are lowered to constants or from splitting 64-bit
+ // constants.
+ //
+ // We may also encounter cases where one or both operands are
+ // immediates materialized into a register, which would ordinarily not
+ // be folded due to multiple uses or operand constraints.
+
+ if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
+ DEBUG(dbgs() << "Constant folded " << *UseMI <<'\n');
+
+ // Some constant folding cases change the same immediate's use to a new
+ // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
+ // again. The same constant folded instruction could also have a second
+ // use operand.
+ NextUse = MRI->use_begin(Dst.getReg());
+ continue;
+ }
+
+ // Try to fold any inline immediate uses, and then only fold other
+ // constants if they have one use.
+ //
+ // The legality of the inline immediate must be checked based on the use
+ // operand, not the defining instruction, because 32-bit instructions
+ // with 32-bit inline immediate sources may be used to materialize
+ // constants used in 16-bit operands.
+ //
+ // e.g. it is unsafe to fold:
+ // s_mov_b32 s0, 1.0 // materializes 0x3f800000
+ // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
+
+ // Folding immediates with more than one use will increase program size.
+ // FIXME: This will also reduce register usage, which may be better
+ // in some cases. A better heuristic is needed.
+ if (TII->isInlineConstant(*UseMI, OpNo, OpToFold)) {
+ foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
+ } else {
+ if (++NumLiteralUses == 1) {
+ NonInlineUse = &*Use;
+ NonInlineUseOpNo = OpNo;
+ }
+ }
+ }
+
+ if (NumLiteralUses == 1) {
+ MachineInstr *UseMI = NonInlineUse->getParent();
+ foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
+ }
+ } else {
+ // Folding register.
+ for (MachineRegisterInfo::use_iterator
+ Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
+ Use != E; ++Use) {
+ MachineInstr *UseMI = Use->getParent();
+
+ foldOperand(OpToFold, UseMI, Use.getOperandNo(),
+ FoldList, CopiesToReplace);
+ }
+ }
+
+ MachineFunction *MF = MI.getParent()->getParent();
+ // Make sure we add EXEC uses to any new v_mov instructions created.
+ for (MachineInstr *Copy : CopiesToReplace)
+ Copy->addImplicitDefUseOperands(*MF);
+
+ for (FoldCandidate &Fold : FoldList) {
+ if (updateOperand(Fold, *TRI)) {
+ // Clear kill flags.
+ if (Fold.isReg()) {
+ assert(Fold.OpToFold && Fold.OpToFold->isReg());
+ // FIXME: Probably shouldn't bother trying to fold if not an
+ // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
+ // copies.
+ MRI->clearKillFlags(Fold.OpToFold->getReg());
+ }
+ DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
+ static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
+ }
+ }
+}
+
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIInstrInfo *TII = ST.getInstrInfo();
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ TII = ST.getInstrInfo();
+ TRI = &TII->getRegisterInfo();
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
+ BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
MachineBasicBlock::iterator I, Next;
@@ -512,8 +683,7 @@ bool SIFoldOperands::runOnMachineFunctio
MachineOperand &OpToFold = MI.getOperand(1);
bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
- // FIXME: We could also be folding things like FrameIndexes and
- // TargetIndexes.
+ // FIXME: We could also be folding things like TargetIndexes.
if (!FoldingImm && !OpToFold.isReg())
continue;
@@ -532,90 +702,7 @@ bool SIFoldOperands::runOnMachineFunctio
!TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
continue;
- // We need mutate the operands of new mov instructions to add implicit
- // uses of EXEC, but adding them invalidates the use_iterator, so defer
- // this.
- SmallVector<MachineInstr *, 4> CopiesToReplace;
-
- std::vector<FoldCandidate> FoldList;
- if (FoldingImm) {
- unsigned NumLiteralUses = 0;
- MachineOperand *NonInlineUse = nullptr;
- int NonInlineUseOpNo = -1;
-
- // Try to fold any inline immediate uses, and then only fold other
- // constants if they have one use.
- //
- // The legality of the inline immediate must be checked based on the use
- // operand, not the defining instruction, because 32-bit instructions
- // with 32-bit inline immediate sources may be used to materialize
- // constants used in 16-bit operands.
- //
- // e.g. it is unsafe to fold:
- // s_mov_b32 s0, 1.0 // materializes 0x3f800000
- // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
-
- // Folding immediates with more than one use will increase program size.
- // FIXME: This will also reduce register usage, which may be better
- // in some cases. A better heuristic is needed.
- for (MachineRegisterInfo::use_iterator
- Use = MRI.use_begin(Dst.getReg()), E = MRI.use_end();
- Use != E; ++Use) {
- MachineInstr *UseMI = Use->getParent();
- unsigned OpNo = Use.getOperandNo();
-
- if (TII->isInlineConstant(*UseMI, OpNo, OpToFold)) {
- foldOperand(OpToFold, UseMI, OpNo, FoldList,
- CopiesToReplace, TII, TRI, MRI);
- } else {
- if (++NumLiteralUses == 1) {
- NonInlineUse = &*Use;
- NonInlineUseOpNo = OpNo;
- }
- }
- }
-
- if (NumLiteralUses == 1) {
- MachineInstr *UseMI = NonInlineUse->getParent();
- foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList,
- CopiesToReplace, TII, TRI, MRI);
- }
- } else {
- // Folding register.
- for (MachineRegisterInfo::use_iterator
- Use = MRI.use_begin(Dst.getReg()), E = MRI.use_end();
- Use != E; ++Use) {
- MachineInstr *UseMI = Use->getParent();
-
- foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList,
- CopiesToReplace, TII, TRI, MRI);
- }
- }
-
- // Make sure we add EXEC uses to any new v_mov instructions created.
- for (MachineInstr *Copy : CopiesToReplace)
- Copy->addImplicitDefUseOperands(MF);
-
- for (FoldCandidate &Fold : FoldList) {
- if (updateOperand(Fold, TRI)) {
- // Clear kill flags.
- if (Fold.isReg()) {
- assert(Fold.OpToFold && Fold.OpToFold->isReg());
- // FIXME: Probably shouldn't bother trying to fold if not an
- // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
- // copies.
- MRI.clearKillFlags(Fold.OpToFold->getReg());
- }
- DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
- static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
-
- // Folding the immediate may reveal operations that can be constant
- // folded or replaced with a copy. This can happen for example after
- // frame indices are lowered to constants or from splitting 64-bit
- // constants.
- tryConstantFoldOp(MRI, TII, Fold.UseMI);
- }
- }
+ foldInstOperand(MI, OpToFold);
}
}
return false;
Added: llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir?rev=291615&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir Tue Jan 10 17:32:04 2017
@@ -0,0 +1,858 @@
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination -o - %s | FileCheck -check-prefix=GCN %s
+--- |
+ define void @s_fold_and_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+ %and = and i32 %a, 1234567
+ store volatile i32 %and, i32 addrspace(1)* %out
+ ret void
+ }
+
+ define void @v_fold_and_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %idxprom = sext i32 %tid to i64
+ %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i64 %idxprom
+ %a = load i32, i32 addrspace(1)* %gep.a
+ %and = and i32 %a, 1234567
+ store i32 %and, i32 addrspace(1)* %gep.out
+ ret void
+ }
+
+ define void @s_fold_shl_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+ %shl = shl i32 %a, 12
+ store volatile i32 %shl, i32 addrspace(1)* %out
+ ret void
+ }
+
+ define void @v_fold_shl_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %idxprom = sext i32 %tid to i64
+ %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i64 %idxprom
+ %a = load i32, i32 addrspace(1)* %gep.a
+ %shl = shl i32 %a, 12
+ store i32 %shl, i32 addrspace(1)* %gep.out
+ ret void
+ }
+
+ define void @s_fold_ashr_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+ %ashr = ashr i32 %a, 12
+ store volatile i32 %ashr, i32 addrspace(1)* %out
+ ret void
+ }
+
+ define void @v_fold_ashr_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %idxprom = sext i32 %tid to i64
+ %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i64 %idxprom
+ %a = load i32, i32 addrspace(1)* %gep.a
+ %ashr = ashr i32 %a, 12
+ store i32 %ashr, i32 addrspace(1)* %gep.out
+ ret void
+ }
+
+ define void @s_fold_lshr_imm_regimm_32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+ %lshr = lshr i32 %a, 12
+ store volatile i32 %lshr, i32 addrspace(1)* %out
+ ret void
+ }
+
+ define void @v_fold_lshr_imm_regimm_32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %idxprom = sext i32 %tid to i64
+ %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i64 %idxprom
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i64 %idxprom
+ %a = load i32, i32 addrspace(1)* %gep.a
+ %lshr = lshr i32 %a, 12
+ store i32 %lshr, i32 addrspace(1)* %gep.out
+ ret void
+ }
+
+ declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+ attributes #0 = { nounwind }
+ attributes #1 = { nounwind readnone }
+
+...
+---
+
+# GCN-LABEL: name: s_fold_and_imm_regimm_32{{$}}
+# GCN: %10 = V_MOV_B32_e32 1543, implicit %exec
+# GCN: BUFFER_STORE_DWORD_OFFSET killed %10,
+name: s_fold_and_imm_regimm_32
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sgpr_64 }
+ - { id: 1, class: sreg_64_xexec }
+ - { id: 2, class: sreg_32_xm0 }
+ - { id: 3, class: sreg_32_xm0 }
+ - { id: 4, class: sreg_32_xm0 }
+ - { id: 5, class: sreg_32_xm0 }
+ - { id: 6, class: sreg_128 }
+ - { id: 7, class: sreg_32_xm0 }
+ - { id: 8, class: sreg_32_xm0 }
+ - { id: 9, class: sreg_32_xm0 }
+ - { id: 10, class: vgpr_32 }
+liveins:
+ - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ liveins: %sgpr0_sgpr1
+
+ %0 = COPY %sgpr0_sgpr1
+ %1 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %2 = COPY %1.sub1
+ %3 = COPY %1.sub0
+ %4 = S_MOV_B32 61440
+ %5 = S_MOV_B32 -1
+ %6 = REG_SEQUENCE killed %2, 1, killed %3, 2, killed %4, 3, killed %5, 4
+ %7 = S_MOV_B32 1234567
+ %8 = S_MOV_B32 9999
+ %9 = S_AND_B32 killed %7, killed %8, implicit-def dead %scc
+ %10 = COPY %9
+ BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: v_fold_and_imm_regimm_32{{$}}
+
+# GCN: %9 = V_MOV_B32_e32 646, implicit %exec
+# GCN: FLAT_STORE_DWORD %19, %9,
+
+# GCN: %10 = V_MOV_B32_e32 646, implicit %exec
+# GCN: FLAT_STORE_DWORD %19, %10
+
+# GCN: %11 = V_MOV_B32_e32 646, implicit %exec
+# GCN: FLAT_STORE_DWORD %19, %11,
+
+# GCN: %12 = V_MOV_B32_e32 1234567, implicit %exec
+# GCN: FLAT_STORE_DWORD %19, %12,
+
+# GCN: %13 = V_MOV_B32_e32 63, implicit %exec
+# GCN: FLAT_STORE_DWORD %19, %13,
+
+name: v_fold_and_imm_regimm_32
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sgpr_64 }
+ - { id: 1, class: sreg_32_xm0 }
+ - { id: 2, class: sgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_64_xexec }
+ - { id: 20, class: sreg_32_xm0 }
+ - { id: 24, class: vgpr_32 }
+ - { id: 25, class: vreg_64 }
+ - { id: 26, class: sreg_32_xm0 }
+ - { id: 27, class: vgpr_32 }
+ - { id: 28, class: vgpr_32 }
+ - { id: 29, class: vgpr_32 }
+ - { id: 30, class: vgpr_32 }
+ - { id: 31, class: vgpr_32 }
+ - { id: 32, class: vreg_64 }
+ - { id: 33, class: vreg_64 }
+ - { id: 34, class: vgpr_32 }
+ - { id: 35, class: vgpr_32 }
+ - { id: 36, class: vgpr_32 }
+ - { id: 37, class: vreg_64 }
+ - { id: 44, class: vgpr_32 }
+
+liveins:
+ - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+ - { reg: '%vgpr0', virtual-reg: '%3' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ liveins: %sgpr0_sgpr1, %vgpr0
+
+ %3 = COPY %vgpr0
+ %0 = COPY %sgpr0_sgpr1
+ %4 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %31 = V_ASHRREV_I32_e64 31, %3, implicit %exec
+ %32 = REG_SEQUENCE %3, 1, %31, 2
+ %33 = V_LSHLREV_B64 2, killed %32, implicit %exec
+ %20 = COPY %4.sub1
+ %44 = V_ADD_I32_e32 %4.sub0, %33.sub0, implicit-def %vcc, implicit %exec
+ %36 = COPY killed %20
+ %35 = V_ADDC_U32_e32 %33.sub1, %36, implicit-def %vcc, implicit %vcc, implicit %exec
+ %37 = REG_SEQUENCE %44, 1, killed %35, 2
+ %24 = V_MOV_B32_e32 982, implicit %exec
+ %26 = S_MOV_B32 1234567
+ %34 = V_MOV_B32_e32 63, implicit %exec
+
+ %27 = V_AND_B32_e64 %26, %24, implicit %exec
+ FLAT_STORE_DWORD %37, %27, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %28 = V_AND_B32_e64 %24, %26, implicit %exec
+ FLAT_STORE_DWORD %37, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %29 = V_AND_B32_e32 %26, %24, implicit %exec
+ FLAT_STORE_DWORD %37, %29, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %30 = V_AND_B32_e64 %26, %26, implicit %exec
+ FLAT_STORE_DWORD %37, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %31 = V_AND_B32_e64 %34, %34, implicit %exec
+ FLAT_STORE_DWORD %37, %31, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: s_fold_shl_imm_regimm_32{{$}}
+# GC1: %13 = V_MOV_B32_e32 4096, implicit %exec
+# GCN: BUFFER_STORE_DWORD_OFFSET killed %13,
+
+name: s_fold_shl_imm_regimm_32
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sgpr_64 }
+ - { id: 1, class: sreg_32_xm0 }
+ - { id: 2, class: sgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_64_xexec }
+ - { id: 5, class: sreg_32_xm0_xexec }
+ - { id: 6, class: sreg_32_xm0 }
+ - { id: 7, class: sreg_32_xm0 }
+ - { id: 8, class: sreg_32_xm0 }
+ - { id: 9, class: sreg_32_xm0 }
+ - { id: 10, class: sreg_128 }
+ - { id: 11, class: sreg_32_xm0 }
+ - { id: 12, class: sreg_32_xm0 }
+ - { id: 13, class: vgpr_32 }
+liveins:
+ - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ liveins: %sgpr0_sgpr1
+
+ %0 = COPY %sgpr0_sgpr1
+ %4 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %5 = S_MOV_B32 1
+ %6 = COPY %4.sub1
+ %7 = COPY %4.sub0
+ %8 = S_MOV_B32 61440
+ %9 = S_MOV_B32 -1
+ %10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
+ %12 = S_LSHL_B32 killed %5, 12, implicit-def dead %scc
+ %13 = COPY %12
+ BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out)
+ S_ENDPGM
+
+...
+---
+# GCN-LABEL: name: v_fold_shl_imm_regimm_32{{$}}
+
+# GCN: %11 = V_MOV_B32_e32 40955904, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %11,
+
+# GCN: %12 = V_MOV_B32_e32 24, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %12,
+
+# GCN: %13 = V_MOV_B32_e32 4096, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %13,
+
+# GCN: %14 = V_MOV_B32_e32 24, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %14,
+
+# GCN: %15 = V_MOV_B32_e32 0, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %15,
+
+# GCN: %22 = V_MOV_B32_e32 4096, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %22,
+
+# GCN: %23 = V_MOV_B32_e32 1, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %23,
+
+# GCN: %25 = V_MOV_B32_e32 2, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %25,
+
+# GCN: %26 = V_MOV_B32_e32 7927808, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %26,
+
+# GCN: %28 = V_MOV_B32_e32 -8, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %28,
+
+name: v_fold_shl_imm_regimm_32
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sgpr_64 }
+ - { id: 1, class: sreg_32_xm0 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: sreg_64_xexec }
+ - { id: 4, class: sreg_64_xexec }
+ - { id: 5, class: sreg_32_xm0 }
+ - { id: 6, class: vgpr_32 }
+ - { id: 7, class: sreg_32_xm0 }
+ - { id: 8, class: sreg_64 }
+ - { id: 9, class: sreg_32_xm0 }
+ - { id: 10, class: vgpr_32 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vgpr_32 }
+ - { id: 16, class: vreg_64 }
+ - { id: 17, class: vreg_64 }
+ - { id: 18, class: vgpr_32 }
+ - { id: 19, class: vgpr_32 }
+ - { id: 20, class: vreg_64 }
+ - { id: 21, class: vgpr_32 }
+ - { id: 22, class: vgpr_32 }
+ - { id: 23, class: vgpr_32 }
+ - { id: 24, class: vgpr_32 }
+ - { id: 25, class: vgpr_32 }
+ - { id: 26, class: vgpr_32 }
+ - { id: 27, class: sreg_32_xm0 }
+ - { id: 28, class: vgpr_32 }
+liveins:
+ - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+ - { reg: '%vgpr0', virtual-reg: '%2' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ liveins: %sgpr0_sgpr1, %vgpr0
+
+ %2 = COPY %vgpr0
+ %0 = COPY %sgpr0_sgpr1
+ %3 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %15 = V_ASHRREV_I32_e64 31, %2, implicit %exec
+ %16 = REG_SEQUENCE %2, 1, %15, 2
+ %17 = V_LSHLREV_B64 2, killed %16, implicit %exec
+ %9 = COPY %3.sub1
+ %21 = V_ADD_I32_e32 %3.sub0, %17.sub0, implicit-def %vcc, implicit %exec
+ %19 = COPY killed %9
+ %18 = V_ADDC_U32_e32 %17.sub1, %19, implicit-def %vcc, implicit %vcc, implicit %exec
+ %20 = REG_SEQUENCE %21, 1, killed %18, 2
+ %10 = V_MOV_B32_e32 9999, implicit %exec
+ %24 = V_MOV_B32_e32 3871, implicit %exec
+ %6 = V_MOV_B32_e32 1, implicit %exec
+ %7 = S_MOV_B32 1
+ %27 = S_MOV_B32 -4
+
+ %11 = V_LSHLREV_B32_e64 12, %10, implicit %exec
+ FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %12 = V_LSHLREV_B32_e64 %7, 12, implicit %exec
+ FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %13 = V_LSHL_B32_e64 %7, 12, implicit %exec
+ FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %14 = V_LSHL_B32_e64 12, %7, implicit %exec
+ FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %15 = V_LSHL_B32_e64 12, %24, implicit %exec
+ FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %22 = V_LSHL_B32_e64 %6, 12, implicit %exec
+ FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %23 = V_LSHL_B32_e64 %6, 32, implicit %exec
+ FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %25 = V_LSHL_B32_e32 %6, %6, implicit %exec
+ FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %26 = V_LSHLREV_B32_e32 11, %24, implicit %exec
+ FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %28 = V_LSHL_B32_e32 %27, %6, implicit %exec
+ FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: s_fold_ashr_imm_regimm_32{{$}}
+# GCN: %11 = V_MOV_B32_e32 243, implicit %exec
+# GCN: BUFFER_STORE_DWORD_OFFSET killed %11, killed %8,
+name: s_fold_ashr_imm_regimm_32
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sgpr_64 }
+ - { id: 1, class: sreg_32_xm0 }
+ - { id: 4, class: sreg_64_xexec }
+ - { id: 5, class: sreg_32_xm0_xexec }
+ - { id: 6, class: sreg_32_xm0 }
+ - { id: 7, class: sreg_32_xm0 }
+ - { id: 8, class: sreg_32_xm0 }
+ - { id: 9, class: sreg_32_xm0 }
+ - { id: 10, class: sreg_128 }
+ - { id: 11, class: sreg_32_xm0 }
+ - { id: 12, class: sreg_32_xm0 }
+ - { id: 13, class: vgpr_32 }
+liveins:
+ - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ liveins: %sgpr0_sgpr1
+
+ %0 = COPY %sgpr0_sgpr1
+ %4 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %5 = S_MOV_B32 999123
+ %6 = COPY %4.sub1
+ %7 = COPY %4.sub0
+ %8 = S_MOV_B32 61440
+ %9 = S_MOV_B32 -1
+ %10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
+ %12 = S_ASHR_I32 killed %5, 12, implicit-def dead %scc
+ %13 = COPY %12
+ BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out)
+ S_ENDPGM
+
+...
+
+# GCN-LABEL: name: v_fold_ashr_imm_regimm_32{{$}}
+# GCN: %11 = V_MOV_B32_e32 3903258, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %11,
+
+# GCN: %12 = V_MOV_B32_e32 62452139, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %12,
+
+# GCN: %13 = V_MOV_B32_e32 1678031, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %13,
+
+# GCN: %14 = V_MOV_B32_e32 3, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %14,
+
+# GCN: %15 = V_MOV_B32_e32 -1, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %15,
+
+# GCN: %22 = V_MOV_B32_e32 62500, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %22,
+
+# GCN: %23 = V_MOV_B32_e32 500000, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %23,
+
+# GCN: %25 = V_MOV_B32_e32 1920, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %25,
+
+# GCN: %26 = V_MOV_B32_e32 487907, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %26,
+
+# GCN: %28 = V_MOV_B32_e32 -1, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %28,
+
+name: v_fold_ashr_imm_regimm_32
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sgpr_64 }
+ - { id: 1, class: sreg_32_xm0 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: sreg_64_xexec }
+ - { id: 4, class: sreg_64_xexec }
+ - { id: 5, class: sreg_32_xm0 }
+ - { id: 6, class: vgpr_32 }
+ - { id: 7, class: sreg_32_xm0 }
+ - { id: 8, class: sreg_32_xm0 }
+ - { id: 9, class: sreg_32_xm0 }
+ - { id: 10, class: vgpr_32 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vgpr_32 }
+ - { id: 16, class: vreg_64 }
+ - { id: 17, class: vreg_64 }
+ - { id: 18, class: vgpr_32 }
+ - { id: 19, class: vgpr_32 }
+ - { id: 20, class: vreg_64 }
+ - { id: 21, class: vgpr_32 }
+ - { id: 22, class: vgpr_32 }
+ - { id: 23, class: vgpr_32 }
+ - { id: 24, class: vgpr_32 }
+ - { id: 25, class: vgpr_32 }
+ - { id: 26, class: vgpr_32 }
+ - { id: 27, class: sreg_32_xm0 }
+ - { id: 28, class: vgpr_32 }
+ - { id: 32, class: sreg_32_xm0 }
+ - { id: 33, class: sreg_32_xm0 }
+ - { id: 34, class: vgpr_32 }
+ - { id: 35, class: vgpr_32 }
+liveins:
+ - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+ - { reg: '%vgpr0', virtual-reg: '%2' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ liveins: %sgpr0_sgpr1, %vgpr0
+
+ %2 = COPY %vgpr0
+ %0 = COPY %sgpr0_sgpr1
+ %3 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %15 = V_ASHRREV_I32_e64 31, %2, implicit %exec
+ %16 = REG_SEQUENCE %2, 1, %15, 2
+ %17 = V_LSHLREV_B64 2, killed %16, implicit %exec
+ %9 = COPY %3.sub1
+ %21 = V_ADD_I32_e32 %3.sub0, %17.sub0, implicit-def %vcc, implicit %exec
+ %19 = COPY killed %9
+ %18 = V_ADDC_U32_e32 %17.sub1, %19, implicit-def %vcc, implicit %vcc, implicit %exec
+ %20 = REG_SEQUENCE %21, 1, killed %18, 2
+ %10 = V_MOV_B32_e32 999234234, implicit %exec
+ %24 = V_MOV_B32_e32 3871, implicit %exec
+ %6 = V_MOV_B32_e32 1000000, implicit %exec
+ %7 = S_MOV_B32 13424252
+ %8 = S_MOV_B32 4
+ %27 = S_MOV_B32 -4
+ %32 = S_MOV_B32 1
+ %33 = S_MOV_B32 3841
+ %34 = V_MOV_B32_e32 3841, implicit %exec
+ %35 = V_MOV_B32_e32 2, implicit %exec
+
+ %11 = V_ASHRREV_I32_e64 8, %10, implicit %exec
+ FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %12 = V_ASHRREV_I32_e64 %8, %10, implicit %exec
+ FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %13 = V_ASHR_I32_e64 %7, 3, implicit %exec
+ FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %14 = V_ASHR_I32_e64 7, %32, implicit %exec
+ FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %15 = V_ASHR_I32_e64 %27, %24, implicit %exec
+ FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %22 = V_ASHR_I32_e64 %6, 4, implicit %exec
+ FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %23 = V_ASHR_I32_e64 %6, %33, implicit %exec
+ FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %25 = V_ASHR_I32_e32 %34, %34, implicit %exec
+ FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %26 = V_ASHRREV_I32_e32 11, %10, implicit %exec
+ FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %28 = V_ASHR_I32_e32 %27, %35, implicit %exec
+ FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: s_fold_lshr_imm_regimm_32{{$}}
+# GCN: %11 = V_MOV_B32_e32 1048332, implicit %exec
+# GCN: BUFFER_STORE_DWORD_OFFSET killed %11, killed %8,
+name: s_fold_lshr_imm_regimm_32
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sgpr_64 }
+ - { id: 1, class: sreg_32_xm0 }
+ - { id: 4, class: sreg_64_xexec }
+ - { id: 5, class: sreg_32_xm0_xexec }
+ - { id: 6, class: sreg_32_xm0 }
+ - { id: 7, class: sreg_32_xm0 }
+ - { id: 8, class: sreg_32_xm0 }
+ - { id: 9, class: sreg_32_xm0 }
+ - { id: 10, class: sreg_128 }
+ - { id: 11, class: sreg_32_xm0 }
+ - { id: 12, class: sreg_32_xm0 }
+ - { id: 13, class: vgpr_32 }
+liveins:
+ - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ liveins: %sgpr0_sgpr1
+
+ %0 = COPY %sgpr0_sgpr1
+ %4 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %5 = S_MOV_B32 -999123
+ %6 = COPY %4.sub1
+ %7 = COPY %4.sub0
+ %8 = S_MOV_B32 61440
+ %9 = S_MOV_B32 -1
+ %10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
+ %12 = S_LSHR_B32 killed %5, 12, implicit-def dead %scc
+ %13 = COPY %12
+ BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: v_fold_lshr_imm_regimm_32{{$}}
+# GCN: %11 = V_MOV_B32_e32 3903258, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %11,
+
+# GCN: %12 = V_MOV_B32_e32 62452139, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %12,
+
+# GCN: %13 = V_MOV_B32_e32 1678031, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %13,
+
+# GCN: %14 = V_MOV_B32_e32 3, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %14,
+
+# GCN: %15 = V_MOV_B32_e32 1, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %15,
+
+# GCN: %22 = V_MOV_B32_e32 62500, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %22,
+
+# GCN: %23 = V_MOV_B32_e32 500000, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %23,
+
+# GCN: %25 = V_MOV_B32_e32 1920, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %25,
+
+# GCN: %26 = V_MOV_B32_e32 487907, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %26,
+
+# GCN: %28 = V_MOV_B32_e32 1073741823, implicit %exec
+# GCN: FLAT_STORE_DWORD %20, %28,
+
+name: v_fold_lshr_imm_regimm_32
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sgpr_64 }
+ - { id: 1, class: sreg_32_xm0 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: sreg_64_xexec }
+ - { id: 4, class: sreg_64_xexec }
+ - { id: 5, class: sreg_32_xm0 }
+ - { id: 6, class: vgpr_32 }
+ - { id: 7, class: sreg_32_xm0 }
+ - { id: 8, class: sreg_32_xm0 }
+ - { id: 9, class: sreg_32_xm0 }
+ - { id: 10, class: vgpr_32 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vgpr_32 }
+ - { id: 16, class: vreg_64 }
+ - { id: 17, class: vreg_64 }
+ - { id: 18, class: vgpr_32 }
+ - { id: 19, class: vgpr_32 }
+ - { id: 20, class: vreg_64 }
+ - { id: 21, class: vgpr_32 }
+ - { id: 22, class: vgpr_32 }
+ - { id: 23, class: vgpr_32 }
+ - { id: 24, class: vgpr_32 }
+ - { id: 25, class: vgpr_32 }
+ - { id: 26, class: vgpr_32 }
+ - { id: 27, class: sreg_32_xm0 }
+ - { id: 28, class: vgpr_32 }
+ - { id: 32, class: sreg_32_xm0 }
+ - { id: 33, class: sreg_32_xm0 }
+ - { id: 34, class: vgpr_32 }
+ - { id: 35, class: vgpr_32 }
+liveins:
+ - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
+ - { reg: '%vgpr0', virtual-reg: '%2' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ liveins: %sgpr0_sgpr1, %vgpr0
+
+ %2 = COPY %vgpr0
+ %0 = COPY %sgpr0_sgpr1
+ %3 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %15 = V_ASHRREV_I32_e64 31, %2, implicit %exec
+ %16 = REG_SEQUENCE %2, 1, %15, 2
+ %17 = V_LSHLREV_B64 2, killed %16, implicit %exec
+ %9 = COPY %3.sub1
+ %21 = V_ADD_I32_e32 %3.sub0, %17.sub0, implicit-def %vcc, implicit %exec
+ %19 = COPY killed %9
+ %18 = V_ADDC_U32_e32 %17.sub1, %19, implicit-def %vcc, implicit %vcc, implicit %exec
+ %20 = REG_SEQUENCE %21, 1, killed %18, 2
+ %10 = V_MOV_B32_e32 999234234, implicit %exec
+ %24 = V_MOV_B32_e32 3871, implicit %exec
+ %6 = V_MOV_B32_e32 1000000, implicit %exec
+ %7 = S_MOV_B32 13424252
+ %8 = S_MOV_B32 4
+ %27 = S_MOV_B32 -4
+ %32 = S_MOV_B32 1
+ %33 = S_MOV_B32 3841
+ %34 = V_MOV_B32_e32 3841, implicit %exec
+ %35 = V_MOV_B32_e32 2, implicit %exec
+
+ %11 = V_LSHRREV_B32_e64 8, %10, implicit %exec
+ FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %12 = V_LSHRREV_B32_e64 %8, %10, implicit %exec
+ FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %13 = V_LSHR_B32_e64 %7, 3, implicit %exec
+ FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %14 = V_LSHR_B32_e64 7, %32, implicit %exec
+ FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %15 = V_LSHR_B32_e64 %27, %24, implicit %exec
+ FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %22 = V_LSHR_B32_e64 %6, 4, implicit %exec
+ FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %23 = V_LSHR_B32_e64 %6, %33, implicit %exec
+ FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %25 = V_LSHR_B32_e32 %34, %34, implicit %exec
+ FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %26 = V_LSHRREV_B32_e32 11, %10, implicit %exec
+ FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ %28 = V_LSHR_B32_e32 %27, %35, implicit %exec
+ FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+
+ S_ENDPGM
+
+...
More information about the llvm-commits
mailing list