[PATCH] MachineScheduler: Allow tracking of subregister lane masks.
Tom Stellard
tom at stellard.net
Thu Apr 23 17:32:56 PDT 2015
On Fri, Apr 17, 2015 at 08:44:10PM -0400, Tom Stellard wrote:
> On Fri, Apr 17, 2015 at 02:20:51AM +0000, Matthias Braun wrote:
> > Hi qcolombet, atrick, tstellarAMD,
> >
> > Before this patch the machine scheduler was unable to reorder sequences like the following:
> > %vreg42:sub0 = ...
> > %vreg42:sub1 = ...
> > This patch series enables this. It is however disabled by default. Targets that benefit from this (probably R600) can enable it by change the "shouldTrackLaneMasks" flag in the SchedulingPolicy/Strategy.
> >
> > This patch requires http://reviews.llvm.org/D9067 and http://reviews.llvm.org/D9068 to be applied first.
> >
>
> Hi,
>
> I've found a crashing test case on R600 with these three patches applied
> and shouldTrackLaneMasks sett to true. The test case is attached, you
> can reproduce it with this command:
>
> ./llc -march=amdgcn sched-lane-crash.ll -o -
>
Hi Matthias,
I have just tested your updated patches and found another crashing case.
See the attached test case, which you can run using:
./llc -march=amdgcn sched-lane-crash.ll -o -
-Tom
> -Tom
>
> > About the patch:
> >
> > Tracking lanemasks allows the scheduler to differentiate between
> > different subregister defs and schedule them independently even when
> > they go into the same vreg.
> > For this to work we have to take care that the subregister defs count as
> > register pressure neutral except for the first one. We also have to attach the
> > reads-undef marker to the first subregister def and remove it from all
> > others.
> >
> > REPOSITORY
> > rL LLVM
> >
> > http://reviews.llvm.org/D9069
> >
> > Files:
> > include/llvm/CodeGen/MachineInstr.h
> > include/llvm/CodeGen/MachineScheduler.h
> > include/llvm/CodeGen/RegisterPressure.h
> > include/llvm/CodeGen/ScheduleDAGInstrs.h
> > lib/CodeGen/LiveIntervalAnalysis.cpp
> > lib/CodeGen/MachineInstr.cpp
> > lib/CodeGen/MachineScheduler.cpp
> > lib/CodeGen/RegisterPressure.cpp
> > lib/CodeGen/ScheduleDAGInstrs.cpp
> >
> > EMAIL PREFERENCES
> > http://reviews.llvm.org/settings/panel/emailpreferences/
>
> > Index: include/llvm/CodeGen/MachineInstr.h
> > ===================================================================
> > --- include/llvm/CodeGen/MachineInstr.h
> > +++ include/llvm/CodeGen/MachineInstr.h
> > @@ -1050,7 +1050,7 @@
> > /// Mark all subregister defs of register @p Reg with the undef flag.
> > /// This function is used when we determined to have a subregister def in an
> > /// otherwise undefined super register.
> > - void addRegisterDefReadUndef(unsigned Reg);
> > + void setRegisterDefReadUndef(unsigned Reg, bool IsUndef = true);
> >
> > /// addRegisterDefined - We have determined MI defines a register. Make sure
> > /// there is an operand defining Reg.
> > Index: include/llvm/CodeGen/MachineScheduler.h
> > ===================================================================
> > --- include/llvm/CodeGen/MachineScheduler.h
> > +++ include/llvm/CodeGen/MachineScheduler.h
> > @@ -150,14 +150,17 @@
> > struct MachineSchedPolicy {
> > // Allow the scheduler to disable register pressure tracking.
> > bool ShouldTrackPressure;
> > + // Allow the schedule to track subregister defs to different lanes
> > + // independently.
> > + bool ShouldTrackLaneMasks;
> >
> > // Allow the scheduler to force top-down or bottom-up scheduling. If neither
> > // is true, the scheduler runs in both directions and converges.
> > bool OnlyTopDown;
> > bool OnlyBottomUp;
> >
> > - MachineSchedPolicy(): ShouldTrackPressure(false), OnlyTopDown(false),
> > - OnlyBottomUp(false) {}
> > + MachineSchedPolicy(): ShouldTrackPressure(false), ShouldTrackLaneMasks(false),
> > + OnlyTopDown(false), OnlyBottomUp(false) {}
> > };
> >
> > /// MachineSchedStrategy - Interface to the scheduling algorithm used by
> > @@ -179,6 +182,10 @@
> > /// initializing this strategy. Called after initPolicy.
> > virtual bool shouldTrackPressure() const { return true; }
> >
> > + /// Check if lane masks should be tracked to schedule subregister defs of the
> > + /// same vreg independently. Called after initPolicy().
> > + virtual bool shouldTrackLaneMasks() const { return false; }
> > +
> > /// Initialize the strategy after building the DAG for a new region.
> > virtual void initialize(ScheduleDAGMI *DAG) = 0;
> >
> > @@ -458,6 +465,8 @@
> >
> > void updatePressureDiffs(ArrayRef<unsigned> LiveUses);
> >
> > + void updateFirstDef(const VRegInfo &Info, const MachineInstr *ToBeScheduled);
> > +
> > void updateScheduledPressure(const SUnit *SU,
> > const std::vector<unsigned> &NewMaxPressure);
> > };
> > @@ -866,6 +875,10 @@
> > return RegionPolicy.ShouldTrackPressure;
> > }
> >
> > + bool shouldTrackLaneMasks() const override {
> > + return RegionPolicy.ShouldTrackLaneMasks;
> > + }
> > +
> > void initialize(ScheduleDAGMI *dag) override;
> >
> > SUnit *pickNode(bool &IsTopNode) override;
> > Index: include/llvm/CodeGen/RegisterPressure.h
> > ===================================================================
> > --- include/llvm/CodeGen/RegisterPressure.h
> > +++ include/llvm/CodeGen/RegisterPressure.h
> > @@ -304,10 +304,11 @@
> >
> > /// Recede across the previous instruction.
> > bool recede(SmallVectorImpl<unsigned> *LiveUses = nullptr,
> > + SmallVectorImpl<unsigned> *Defs = nullptr,
> > PressureDiff *PDiff = nullptr);
> >
> > /// Advance across the current instruction.
> > - bool advance();
> > + bool advance(SmallVectorImpl<unsigned> *Defs = nullptr);
> >
> > /// Finalize the region boundaries and recored live ins and live outs.
> > void closeRegion();
> > Index: include/llvm/CodeGen/ScheduleDAGInstrs.h
> > ===================================================================
> > --- include/llvm/CodeGen/ScheduleDAGInstrs.h
> > +++ include/llvm/CodeGen/ScheduleDAGInstrs.h
> > @@ -43,6 +43,19 @@
> > }
> > };
> >
> > + struct VRegInfo {
> > + unsigned VirtReg;
> > + unsigned NumUnscheduledDefSUs : 31;
> > + bool LiveIn : 1;
> > +
> > + VRegInfo(unsigned VReg)
> > + : VirtReg(VReg), NumUnscheduledDefSUs(0), LiveIn(false) {}
> > +
> > + unsigned getSparseSetIndex() const {
> > + return TargetRegisterInfo::virtReg2Index(VirtReg);
> > + }
> > + };
> > +
> > /// Record a physical register access.
> > /// For non-data-dependent uses, OpIdx == -1.
> > struct PhysRegSUOper {
> > @@ -72,6 +85,8 @@
> > /// vreg use list.
> > typedef SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor> VReg2SUnitMultiMap;
> >
> > + typedef SparseSet<VRegInfo, VirtReg2IndexFunctor> VRegInfoMap;
> > +
> > /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
> > /// MachineInstrs.
> > class ScheduleDAGInstrs : public ScheduleDAG {
> > @@ -122,6 +137,12 @@
> > /// is mapped to a set of SUnits. These include all local vreg uses, not
> > /// just the uses for a singly defined vreg.
> > VReg2SUnitMultiMap VRegUses;
> > + /// After calling BuildSchedGraph, each vreg def in the scheduling region
> > + /// is mapped to a set of SUnits.
> > + VReg2SUnitMultiMap VRegDefs;
> > +
> > + /// Information (mostly the number of unscheduled defs left) about a vreg.
> > + VRegInfoMap VRegInfos;
> >
> > /// State internal to DAG building.
> > /// -------------------------------
> > Index: lib/CodeGen/LiveIntervalAnalysis.cpp
> > ===================================================================
> > --- lib/CodeGen/LiveIntervalAnalysis.cpp
> > +++ lib/CodeGen/LiveIntervalAnalysis.cpp
> > @@ -469,7 +469,7 @@
> > if (MRI->shouldTrackSubRegLiveness(LI.reg)) {
> > if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) {
> > MachineInstr *MI = getInstructionFromIndex(Def);
> > - MI->addRegisterDefReadUndef(LI.reg);
> > + MI->setRegisterDefReadUndef(LI.reg);
> > }
> > }
> >
> > Index: lib/CodeGen/MachineInstr.cpp
> > ===================================================================
> > --- lib/CodeGen/MachineInstr.cpp
> > +++ lib/CodeGen/MachineInstr.cpp
> > @@ -1863,11 +1863,11 @@
> > }
> > }
> >
> > -void MachineInstr::addRegisterDefReadUndef(unsigned Reg) {
> > +void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) {
> > for (MachineOperand &MO : operands()) {
> > if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0)
> > continue;
> > - MO.setIsUndef();
> > + MO.setIsUndef(IsUndef);
> > }
> > }
> >
> > Index: lib/CodeGen/MachineScheduler.cpp
> > ===================================================================
> > --- lib/CodeGen/MachineScheduler.cpp
> > +++ lib/CodeGen/MachineScheduler.cpp
> > @@ -859,6 +859,7 @@
> > SUPressureDiffs.clear();
> >
> > ShouldTrackPressure = SchedImpl->shouldTrackPressure();
> > + TrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
> > }
> >
> > // Setup the register pressure trackers for the top scheduled top and bottom
> > @@ -921,6 +922,12 @@
> > dbgs() << TRI->getRegPressureSetName(
> > RegionCriticalPSets[i].getPSet()) << " ";
> > dbgs() << "\n");
> > +
> > + if (TrackLaneMasks) {
> > + // Set read-undef flag for VRegs with exactly one (subregister) def.
> > + for (const VRegInfo &Info : VRegInfos)
> > + updateFirstDef(Info, nullptr);
> > + }
> > }
> >
> > void ScheduleDAGMILive::
> > @@ -992,6 +999,48 @@
> > }
> > }
> >
> > +/// Register pressure is tracked for full vregs. This means a subregister def
> > +/// does not change the register pressure as it appears to be reading+writing
> > +/// the vreg, except for the first which just writes the vreg. This means we
> > +/// have to change the pressure diffs as soon as we only have 1 def left to
> > +/// schedule and know it will become the first.
> > +/// If @p ToBeScheduled is given this node is considered as already scheduled.
> > +void ScheduleDAGMILive::updateFirstDef(const VRegInfo &Info,
> > + const MachineInstr *ToBeScheduled) {
> > + if (Info.LiveIn || Info.NumUnscheduledDefSUs != 1)
> > + return;
> > +
> > + unsigned VReg = Info.VirtReg;
> > +#ifndef NDEBUG
> > + unsigned Unscheduled = 0;
> > + for (const VReg2SUnit &V2SU
> > + : make_range(VRegDefs.find(VReg), VRegDefs.end())) {
> > + if (!V2SU.SU->isScheduled && V2SU.SU->getInstr() != ToBeScheduled)
> > + ++Unscheduled;
> > + }
> > + assert(Unscheduled == 1 &&
> > + "There should be exactly one other unscheduled instruction left");
> > +#endif
> > + // There should be exactly one other unscheduled MI left which needs its
> > + // pressure changed.
> > + for (const VReg2SUnit &V2SU
> > + : make_range(VRegDefs.find(VReg), VRegDefs.end())) {
> > + SUnit *SU = V2SU.SU;
> > + if (SU->isScheduled)
> > + continue;
> > + MachineInstr &MI = *SU->getInstr();
> > + if (&MI == ToBeScheduled)
> > + continue;
> > + // The remaining def will be the first in line. This means it does not read
> > + // earlier values: Change the pressure diff and add the read-undef flag.
> > + getPressureDiff(SU).addPressureChange(VReg, false, &MRI);
> > + MI.setRegisterDefReadUndef(VReg, true);
> > + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") will be first def of "
> > + << PrintVRegOrUnit(VReg, TRI) << ": " << MI);
> > + break;
> > + }
> > +}
> > +
> > /// schedule - Called back from MachineScheduler::runOnMachineFunction
> > /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
> > /// only includes instructions that have DAG nodes, not scheduling boundaries.
> > @@ -1200,9 +1249,25 @@
> >
> > if (ShouldTrackPressure) {
> > // Update top scheduled pressure.
> > - TopRPTracker.advance();
> > + SmallVector<unsigned, 2> Defs;
> > + TopRPTracker.advance(&Defs);
> > assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
> > updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
> > +
> > + if (TrackLaneMasks) {
> > + // If we schedule the first of a series of subregister defs, then we
> > + // have to mark it with the read-undef flag.
> > + for (unsigned Reg : Defs) {
> > + if (!TRI->isVirtualRegister(Reg))
> > + continue;
> > +
> > + VRegInfo &Info = *VRegInfos.find(Reg);
> > + if (Info.LiveIn)
> > + continue;
> > + MI->setRegisterDefReadUndef(Reg, true);
> > + Info.LiveIn = true;
> > + }
> > + }
> > }
> > }
> > else {
> > @@ -1222,10 +1287,23 @@
> > if (ShouldTrackPressure) {
> > // Update bottom scheduled pressure.
> > SmallVector<unsigned, 8> LiveUses;
> > - BotRPTracker.recede(&LiveUses);
> > + SmallVector<unsigned, 2> Defs;
> > + BotRPTracker.recede(&LiveUses, &Defs);
> > assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
> > updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
> > updatePressureDiffs(LiveUses);
> > +
> > + if (TrackLaneMasks) {
> > + for (unsigned Reg : Defs) {
> > + if (!TRI->isVirtualRegister(Reg))
> > + continue;
> > +
> > + VRegInfo &Info = *VRegInfos.find(Reg);
> > + assert(Info.NumUnscheduledDefSUs > 0);
> > + --Info.NumUnscheduledDefSUs;
> > + updateFirstDef(Info, MI);
> > + }
> > + }
> > }
> > }
> > }
> > Index: lib/CodeGen/RegisterPressure.cpp
> > ===================================================================
> > --- lib/CodeGen/RegisterPressure.cpp
> > +++ lib/CodeGen/RegisterPressure.cpp
> > @@ -456,6 +456,7 @@
> > /// difference pointer is provided record the changes is pressure caused by this
> > /// instruction independent of liveness.
> > bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
> > + SmallVectorImpl<unsigned> *Defs,
> > PressureDiff *PDiff) {
> > // Check for the top of the analyzable region.
> > if (CurrPos == MBB->begin()) {
> > @@ -500,6 +501,9 @@
> > // TODO: consider earlyclobbers?
> > for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
> > unsigned Reg = RegOpers.Defs[i];
> > + if (Defs != nullptr && !containsReg(*Defs, Reg))
> > + Defs->push_back(Reg);
> > +
> > bool DeadDef = false;
> > if (RequireIntervals) {
> > const LiveRange *LR = getLiveRange(Reg);
> > @@ -552,7 +556,7 @@
> > }
> >
> > /// Advance across the current instruction.
> > -bool RegPressureTracker::advance() {
> > +bool RegPressureTracker::advance(SmallVectorImpl<unsigned> *Defs) {
> > assert(!TrackUntiedDefs && "unsupported mode");
> >
> > // Check for the bottom of the analyzable region.
> > @@ -605,6 +609,8 @@
> > // Generate liveness for defs.
> > for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
> > unsigned Reg = RegOpers.Defs[i];
> > + if (Defs != nullptr && !containsReg(*Defs, Reg))
> > + Defs->push_back(Reg);
> > if (LiveRegs.insert(Reg))
> > increaseRegPressure(Reg);
> > }
> > Index: lib/CodeGen/ScheduleDAGInstrs.cpp
> > ===================================================================
> > --- lib/CodeGen/ScheduleDAGInstrs.cpp
> > +++ lib/CodeGen/ScheduleDAGInstrs.cpp
> > @@ -388,18 +388,42 @@
> > /// TODO: Hoist loop induction variable increments. This has to be
> > /// reevaluated. Generally, IV scheduling should be done before coalescing.
> > void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
> > - const MachineInstr *MI = SU->getInstr();
> > - const MachineOperand &MO = MI->getOperand(OperIdx);
> > + MachineInstr *MI = SU->getInstr();
> > + MachineOperand &MO = MI->getOperand(OperIdx);
> > unsigned Reg = MO.getReg();
> > unsigned DefLaneMask = getLaneMaskForMO(MO);
> > + // Remember the isUndef flag, as we may reset it in a bit.
> > + bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
> > +
> > + VRegInfo *RegInfo = nullptr;
> > + if (TrackLaneMasks) {
> > + auto IteratorBoolPair = VRegInfos.insert(VRegInfo(Reg));
> > + RegInfo = &*(IteratorBoolPair.first);
> > + RegInfo->LiveIn = !IsKill;
> > + // Clear undef flag, we'll re-add it later once we know which Def is first.
> > + MO.setIsUndef(false);
> > + }
> >
> > + // Record this local VReg Def.
> > if (MO.isDead()) {
> > assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&
> > "Dead defs should have no uses");
> > } else {
> > + if (TrackLaneMasks) {
> > + VReg2SUnitMultiMap::iterator UI = VRegDefs.find(Reg);
> > + for (; UI != VRegDefs.end(); ++UI) {
> > + if (UI->SU == SU)
> > + break;
> > + }
> > + if (UI == VRegDefs.end()) {
> > + VRegDefs.insert(VReg2SUnit(Reg, 0, SU));
> > + ++RegInfo->NumUnscheduledDefSUs;
> > + }
> > + }
> > +
> > // If we have a <read-undef> flag, none of the lane values comes from an
> > // earlier instruction.
> > - unsigned KillLaneMask = MO.isUndef() ? ~0u : DefLaneMask;
> > + unsigned KillLaneMask = IsKill ? ~0u : DefLaneMask;
> > // Add data dependence to all uses we found so far.
> > const TargetSubtargetInfo &ST = MF.getSubtarget();
> > for (VReg2SUnitMultiMap::iterator I = CurrentVRegUses.find(Reg),
> > @@ -853,6 +877,12 @@
> >
> > VRegUses.clear();
> > VRegUses.setUniverse(NumVirtRegs);
> > + VRegDefs.clear();
> > + VRegInfos.clear();
> > + if (TrackLaneMasks) {
> > + VRegDefs.setUniverse(NumVirtRegs);
> > + VRegInfos.setUniverse(NumVirtRegs);
> > + }
> >
> > // Model data dependencies between instructions being scheduled and the
> > // ExitSU.
> > @@ -875,13 +905,6 @@
> > SUnit *SU = MISUnitMap[MI];
> > assert(SU && "No SUnit mapped to this MI");
> >
> > - if (RPTracker) {
> > - PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr;
> > - RPTracker->recede(/*LiveUses=*/nullptr, PDiff);
> > - assert(RPTracker->getPos() == std::prev(MII) &&
> > - "RPTracker can't find MI");
> > - }
> > -
> > assert(
> > (CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) &&
> > "Cannot schedule terminators or labels!");
> > @@ -1124,6 +1147,13 @@
> > BarrierChain->addPred(SDep(SU, SDep::Barrier));
> > }
> > }
> > +
> > + if (RPTracker) {
> > + PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr;
> > + RPTracker->recede(/*LiveUses=*/nullptr, /*Defs=*/nullptr, PDiff);
> > + assert(RPTracker->getPos() == std::prev(MII) &&
> > + "RPTracker can't find MI");
> > + }
> > }
> > if (DbgMI)
> > FirstDbgValue = DbgMI;
>
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at cs.uiuc.edu
> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
> ; ModuleID = 'bugpoint-reduced-simplified.bc'
>
> define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
> main_body:
> %22 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> zeroinitializer)
> %23 = extractelement <4 x float> %22, i32 2
> %24 = call float @fabs(float %23)
> %25 = fdiv float 1.000000e+00, %24
> %26 = fmul float undef, %25
> %27 = fadd float %26, 1.500000e+00
> %28 = bitcast float %27 to i32
> %29 = insertelement <4 x i32> undef, i32 %28, i32 1
> %30 = insertelement <4 x i32> %29, i32 undef, i32 2
> %31 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %30, <32 x i8> undef, <16 x i8> undef, i32 4)
> %32 = extractelement <4 x float> %31, i32 0
> %33 = fcmp olt float undef, %32
> %34 = select i1 %33, float 1.000000e+00, float 0.000000e+00
> %35 = fmul float undef, %34
> %36 = fmul float %35, 0.000000e+00
> %37 = fmul float %36, undef
> %38 = fadd float %37, undef
> %39 = fmul float undef, undef
> %40 = fmul float %39, 0.000000e+00
> %41 = fmul float %40, undef
> %42 = fadd float %41, %38
> %43 = fadd float undef, %42
> call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %43, float undef, float undef, float 0.000000e+00)
> ret void
> }
>
> ; Function Attrs: nounwind readnone
> declare float @llvm.SI.load.const(<16 x i8>, i32) #1
>
> ; Function Attrs: nounwind readnone
> declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
>
> ; Function Attrs: nounwind readnone
> declare float @llvm.maxnum.f32(float, float) #1
>
> ; Function Attrs: nounwind readnone
> declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
>
> ; Function Attrs: nounwind readnone
> declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
>
> ; Function Attrs: nounwind readnone
> declare float @llvm.sqrt.f32(float) #1
>
> ; Function Attrs: readnone
> declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
>
> ; Function Attrs: readnone
> declare float @fabs(float) #2
>
> ; Function Attrs: nounwind readnone
> declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
>
> ; Function Attrs: readnone
> declare float @llvm.AMDIL.clamp.(float, float, float) #2
>
> ; Function Attrs: nounwind readnone
> declare float @llvm.pow.f32(float, float) #1
>
> declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
>
> attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
> attributes #1 = { nounwind readnone }
> attributes #2 = { readnone }
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
-------------- next part --------------
; ModuleID = 'bugpoint-reduced-simplified.bc'
target triple = "x86_64-unknown-linux-gnu"
define void @main() #0 {
main_body:
%0 = call float @llvm.SI.load.const(<16 x i8> undef, i32 104)
%1 = fmul float undef, undef
%2 = fadd float %1, undef
%3 = fadd float undef, undef
%4 = fadd float %2, undef
%5 = fsub float %3, 0.000000e+00
%6 = fsub float %4, %0
%7 = fadd float %5, 7.812500e-03
%8 = fadd float %6, -7.812500e-03
%9 = insertelement <4 x float> undef, float %7, i32 1
%10 = insertelement <4 x float> %9, float %8, i32 2
%11 = insertelement <4 x float> %10, float 0.000000e+00, i32 3
%12 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %11)
%13 = extractelement <4 x float> %12, i32 3
%14 = bitcast float %13 to i32
%15 = insertelement <4 x i32> undef, i32 %14, i32 2
%16 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %15, <32 x i8> undef, <16 x i8> zeroinitializer, i32 4)
%17 = extractelement <4 x float> %16, i32 0
%18 = fcmp olt float %17, 0.000000e+00
%19 = select i1 %18, float 0.000000e+00, float 2.500000e-01
%20 = fadd float undef, %19
%21 = fadd float %20, 0.000000e+00
%22 = fmul float undef, %21
%23 = fmul float undef, %22
%24 = fmul float undef, %23
%25 = fmul float %24, undef
%26 = fadd float %25, 0.000000e+00
%27 = fsub float -0.000000e+00, %26
%28 = call float @llvm.AMDIL.exp.(float %27)
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float undef, float %28, float undef, float undef)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.exp.(float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
More information about the llvm-commits
mailing list