[llvm] r183279 - R600: Add a pass that merge Vector Register
Vincent Lejeune
vljn at ovi.com
Wed Jun 5 13:43:52 PDT 2013
Sorry, I forget to cherry-pick a commit from my tree when commiting this.
It needs patch pushed at revision r183336 that sightly modify scheduling.
Can I reapply r183279 ? It should'nt break test anymore
Vincent
----- Mail original -----
> De : Rafael Espíndola <rafael.espindola at gmail.com>
> À : Vincent Lejeune <vljn at ovi.com>
> Cc : llvm-commits <llvm-commits at cs.uiuc.edu>
> Envoyé le : Mercredi 5 juin 2013 7h12
> Objet : Re: [llvm] r183279 - R600: Add a pass that merge Vector Register
>
> I reverted this since the test was failing.
>
> On 4 June 2013 19:17, Vincent Lejeune <vljn at ovi.com> wrote:
>> Author: vljn
>> Date: Tue Jun 4 18:17:26 2013
>> New Revision: 183279
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=183279&view=rev
>> Log:
>> R600: Add a pass that merge Vector Register
>>
>> Added:
>> llvm/trunk/lib/Target/R600/R600OptimizeVectorRegisters.cpp
>> llvm/trunk/test/CodeGen/R600/texture-input-merge.ll
>> Modified:
>> llvm/trunk/lib/Target/R600/AMDGPU.h
>> llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp
>> llvm/trunk/lib/Target/R600/CMakeLists.txt
>>
>> Modified: llvm/trunk/lib/Target/R600/AMDGPU.h
>> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPU.h?rev=183279&r1=183278&r2=183279&view=diff
>>
> ==============================================================================
>> --- llvm/trunk/lib/Target/R600/AMDGPU.h (original)
>> +++ llvm/trunk/lib/Target/R600/AMDGPU.h Tue Jun 4 18:17:26 2013
>> @@ -23,6 +23,7 @@ class AMDGPUTargetMachine;
>> // R600 Passes
>> FunctionPass* createR600TextureIntrinsicsReplacer();
>> FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
>> +FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
>> FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
>> FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
>> FunctionPass *createR600Packetizer(TargetMachine &tm);
>>
>> Modified: llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp
>> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp?rev=183279&r1=183278&r2=183279&view=diff
>>
> ==============================================================================
>> --- llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp (original)
>> +++ llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp Tue Jun 4 18:17:26
> 2013
>> @@ -130,6 +130,11 @@ bool AMDGPUPassConfig::addInstSelector()
>>
>> bool AMDGPUPassConfig::addPreRegAlloc() {
>> addPass(createAMDGPUConvertToISAPass(*TM));
>> + const AMDGPUSubtarget &ST =
> TM->getSubtarget<AMDGPUSubtarget>();
>> +
>> + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
>> + addPass(createR600VectorRegMerger(*TM));
>> + }
>> return false;
>> }
>>
>>
>> Modified: llvm/trunk/lib/Target/R600/CMakeLists.txt
>> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/CMakeLists.txt?rev=183279&r1=183278&r2=183279&view=diff
>>
> ==============================================================================
>> --- llvm/trunk/lib/Target/R600/CMakeLists.txt (original)
>> +++ llvm/trunk/lib/Target/R600/CMakeLists.txt Tue Jun 4 18:17:26 2013
>> @@ -41,6 +41,7 @@ add_llvm_target(R600CodeGen
>> R600ISelLowering.cpp
>> R600MachineFunctionInfo.cpp
>> R600MachineScheduler.cpp
>> + R600OptimizeVectorRegisters.cpp
>> R600Packetizer.cpp
>> R600RegisterInfo.cpp
>> R600TextureIntrinsicsReplacer.cpp
>>
>> Added: llvm/trunk/lib/Target/R600/R600OptimizeVectorRegisters.cpp
>> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600OptimizeVectorRegisters.cpp?rev=183279&view=auto
>>
> ==============================================================================
>> --- llvm/trunk/lib/Target/R600/R600OptimizeVectorRegisters.cpp (added)
>> +++ llvm/trunk/lib/Target/R600/R600OptimizeVectorRegisters.cpp Tue Jun 4
> 18:17:26 2013
>> @@ -0,0 +1,363 @@
>> +//===--------------------- R600MergeVectorRegisters.cpp
> -------------------===//
>> +//
>> +// The LLVM Compiler Infrastructure
>> +//
>> +// This file is distributed under the University of Illinois Open Source
>> +// License. See LICENSE.TXT for details.
>> +//
>>
> +//===----------------------------------------------------------------------===//
>> +//
>> +/// \file
>> +/// This pass merges inputs of swizzeable instructions into vector sharing
>> +/// common data and/or have enough undef subreg using swizzle abilities.
>> +///
>> +/// For instance let's consider the following pseudo code :
>> +/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2,
> undef, sub3
>> +/// ...
>> +/// vreg7<def> = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2,
> vreg4, sub3
>> +/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3
>> +///
>> +/// is turned into :
>> +/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2,
> undef, sub3
>> +/// ...
>> +/// vreg7<def> = INSERT_SUBREG vreg4, sub3
>> +/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3
>> +///
>> +/// This allow regalloc to reduce register pressure for vector registers
> and
>> +/// to reduce MOV count.
>>
> +//===----------------------------------------------------------------------===//
>> +
>> +#define DEBUG_TYPE "vec-merger"
>> +#include "llvm/Support/Debug.h"
>> +#include "AMDGPU.h"
>> +#include "R600InstrInfo.h"
>> +#include "llvm/CodeGen/DFAPacketizer.h"
>> +#include "llvm/CodeGen/MachineDominators.h"
>> +#include "llvm/CodeGen/MachineFunctionPass.h"
>> +#include "llvm/CodeGen/MachineLoopInfo.h"
>> +#include "llvm/CodeGen/Passes.h"
>> +#include "llvm/CodeGen/MachineInstrBuilder.h"
>> +#include "llvm/Support/raw_ostream.h"
>> +#include "llvm/CodeGen/MachineRegisterInfo.h"
>> +
>> +using namespace llvm;
>> +
>> +namespace {
>> +
>> +static bool
>> +isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
>> + for (MachineRegisterInfo::def_iterator It = MRI.def_begin(Reg),
>> + E = MRI.def_end(); It != E; ++It) {
>> + return (*It).isImplicitDef();
>> + }
>> + llvm_unreachable("Reg without a def");
>> + return false;
>> +}
>> +
>> +class RegSeqInfo {
>> +public:
>> + MachineInstr *Instr;
>> + DenseMap<unsigned, unsigned> RegToChan;
>> + std::vector<unsigned> UndefReg;
>> + RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
>> + assert (MI->getOpcode() == AMDGPU::REG_SEQUENCE);
>> + for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {
>> + MachineOperand &MO = Instr->getOperand(i);
>> + unsigned Chan = Instr->getOperand(i + 1).getImm();
>> + if (isImplicitlyDef(MRI, MO.getReg()))
>> + UndefReg.push_back(Chan);
>> + else
>> + RegToChan[MO.getReg()] = Chan;
>> + }
>> + }
>> + RegSeqInfo() {}
>> +
>> + bool operator==(const RegSeqInfo &RSI) const {
>> + return RSI.Instr == Instr;
>> + }
>> +};
>> +
>> +class R600VectorRegMerger : public MachineFunctionPass {
>> +private:
>> + MachineRegisterInfo *MRI;
>> + const R600InstrInfo *TII;
>> + bool canSwizzle(const MachineInstr &) const;
>> + bool areAllUsesSwizzeable(unsigned Reg) const;
>> + void SwizzleInput(MachineInstr &,
>> + const std::vector<std::pair<unsigned, unsigned> > &)
> const;
>> + bool tryMergeVector(const RegSeqInfo *, RegSeqInfo *,
>> + std::vector<std::pair<unsigned, unsigned> > &Remap)
> const;
>> + bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo
> &CompatibleRSI,
>> + std::vector<std::pair<unsigned, unsigned> >
> &RemapChan);
>> + bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo
> &CompatibleRSI,
>> + std::vector<std::pair<unsigned, unsigned> >
> &RemapChan);
>> + MachineInstr *RebuildVector(RegSeqInfo *MI,
>> + const RegSeqInfo *BaseVec,
>> + const std::vector<std::pair<unsigned, unsigned> >
> &RemapChan) const;
>> + void RemoveMI(MachineInstr *);
>> + void trackRSI(const RegSeqInfo &RSI);
>> +
>> + typedef DenseMap<unsigned, std::vector<MachineInstr *> >
> InstructionSetMap;
>> + DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq;
>> + InstructionSetMap PreviousRegSeqByReg;
>> + InstructionSetMap PreviousRegSeqByUndefCount;
>> +public:
>> + static char ID;
>> + R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID),
>> + TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
>> +
>> + void getAnalysisUsage(AnalysisUsage &AU) const {
>> + AU.setPreservesCFG();
>> + AU.addRequired<MachineDominatorTree>();
>> + AU.addPreserved<MachineDominatorTree>();
>> + AU.addRequired<MachineLoopInfo>();
>> + AU.addPreserved<MachineLoopInfo>();
>> + MachineFunctionPass::getAnalysisUsage(AU);
>> + }
>> +
>> + const char *getPassName() const {
>> + return "R600 Vector Registers Merge Pass";
>> + }
>> +
>> + bool runOnMachineFunction(MachineFunction &Fn);
>> +};
>> +
>> +char R600VectorRegMerger::ID = 0;
>> +
>> +bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)
>> + const {
>> + if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
>> + return true;
>> + switch (MI.getOpcode()) {
>> + case AMDGPU::R600_ExportSwz:
>> + case AMDGPU::EG_ExportSwz:
>> + return true;
>> + default:
>> + return false;
>> + }
>> +}
>> +
>> +bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched,
>> + RegSeqInfo *ToMerge, std::vector< std::pair<unsigned,
> unsigned> > &Remap)
>> + const {
>> + unsigned CurrentUndexIdx = 0;
>> + for (DenseMap<unsigned, unsigned>::iterator It =
> ToMerge->RegToChan.begin(),
>> + E = ToMerge->RegToChan.end(); It != E; ++It) {
>> + DenseMap<unsigned, unsigned>::const_iterator PosInUntouched =
>> + Untouched->RegToChan.find((*It).first);
>> + if (PosInUntouched != Untouched->RegToChan.end()) {
>> + Remap.push_back(std::pair<unsigned, unsigned>
>> + ((*It).second, (*PosInUntouched).second));
>> + continue;
>> + }
>> + if (CurrentUndexIdx >= Untouched->UndefReg.size())
>> + return false;
>> + Remap.push_back(std::pair<unsigned, unsigned>
>> + ((*It).second, Untouched->UndefReg[CurrentUndexIdx++]));
>> + }
>> +
>> + return true;
>> +}
>> +
>> +MachineInstr *R600VectorRegMerger::RebuildVector(
>> + RegSeqInfo *RSI, const RegSeqInfo *BaseRSI,
>> + const std::vector<std::pair<unsigned, unsigned> >
> &RemapChan) const {
>> + unsigned Reg = RSI->Instr->getOperand(0).getReg();
>> + MachineBasicBlock::iterator Pos = RSI->Instr;
>> + MachineBasicBlock &MBB = *Pos->getParent();
>> + DebugLoc DL = Pos->getDebugLoc();
>> +
>> + unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg();
>> + DenseMap<unsigned, unsigned> UpdatedRegToChan =
> BaseRSI->RegToChan;
>> + std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
>> + for (DenseMap<unsigned, unsigned>::iterator It =
> RSI->RegToChan.begin(),
>> + E = RSI->RegToChan.end(); It != E; ++It) {
>> + if (BaseRSI->RegToChan.find((*It).first) !=
> BaseRSI->RegToChan.end()) {
>> + UpdatedRegToChan[(*It).first] = (*It).second;
>> + continue;
>> + }
>> + unsigned DstReg =
> MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
>> + unsigned SubReg = (*It).first;
>> + unsigned Swizzle = (*It).second;
>> + unsigned Chan;
>> + for (unsigned j = 0, je = RemapChan.size(); j < je; j++) {
>> + if (RemapChan[j].first == Swizzle) {
>> + Chan = RemapChan[j].second;
>> + break;
>> + }
>> + }
>> + MachineInstr *Tmp = BuildMI(MBB, Pos, DL,
> TII->get(AMDGPU::INSERT_SUBREG),
>> + DstReg)
>> + .addReg(SrcVec)
>> + .addReg(SubReg)
>> + .addImm(Chan);
>> + UpdatedRegToChan[SubReg] = Chan;
>> + for (std::vector<unsigned>::iterator RemoveIt =
> UpdatedUndef.begin(),
>> + RemoveE = UpdatedUndef.end(); RemoveIt != RemoveE; ++ RemoveIt) {
>> + if (*RemoveIt == Chan)
>> + UpdatedUndef.erase(RemoveIt);
>> + }
>> + DEBUG(dbgs() << " ->"; Tmp->dump(););
>> + SrcVec = DstReg;
>> + }
>> + Pos = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg)
>> + .addReg(SrcVec);
>> + DEBUG(dbgs() << " ->"; Pos->dump(););
>> +
>> + DEBUG(dbgs() << " Updating Swizzle:\n");
>> + for (MachineRegisterInfo::use_iterator It = MRI->use_begin(Reg),
>> + E = MRI->use_end(); It != E; ++It) {
>> + DEBUG(dbgs() << " ";(*It).dump(); dbgs() <<
> " ->");
>> + SwizzleInput(*It, RemapChan);
>> + DEBUG((*It).dump());
>> + }
>> + RSI->Instr->eraseFromParent();
>> +
>> + // Update RSI
>> + RSI->Instr = Pos;
>> + RSI->RegToChan = UpdatedRegToChan;
>> + RSI->UndefReg = UpdatedUndef;
>> +
>> + return Pos;
>> +}
>> +
>> +void R600VectorRegMerger::RemoveMI(MachineInstr *MI) {
>> + for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(),
>> + E = PreviousRegSeqByReg.end(); It != E; ++It) {
>> + std::vector<MachineInstr *> &MIs = (*It).second;
>> + MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end());
>> + }
>> + for (InstructionSetMap::iterator It =
> PreviousRegSeqByUndefCount.begin(),
>> + E = PreviousRegSeqByUndefCount.end(); It != E; ++It) {
>> + std::vector<MachineInstr *> &MIs = (*It).second;
>> + MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end());
>> + }
>> +}
>> +
>> +void R600VectorRegMerger::SwizzleInput(MachineInstr &MI,
>> + const std::vector<std::pair<unsigned, unsigned> >
> &RemapChan) const {
>> + unsigned Offset;
>> + if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
>> + Offset = 2;
>> + else
>> + Offset = 3;
>> + for (unsigned i = 0; i < 4; i++) {
>> + unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1;
>> + for (unsigned j = 0, e = RemapChan.size(); j < e; j++) {
>> + if (RemapChan[j].first == Swizzle) {
>> + MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1);
>> + break;
>> + }
>> + }
>> + }
>> +}
>> +
>> +bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const {
>> + for (MachineRegisterInfo::use_iterator It = MRI->use_begin(Reg),
>> + E = MRI->use_end(); It != E; ++It) {
>> + if (!canSwizzle(*It))
>> + return false;
>> + }
>> + return true;
>> +}
>> +
>> +bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
>> + RegSeqInfo &CompatibleRSI,
>> + std::vector<std::pair<unsigned, unsigned> >
> &RemapChan) {
>> + for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(),
>> + MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) {
>> + if (!MOp->isReg())
>> + continue;
>> + if (PreviousRegSeqByReg[MOp->getReg()].empty())
>> + continue;
>> + std::vector<MachineInstr *> MIs =
> PreviousRegSeqByReg[MOp->getReg()];
>> + for (unsigned i = 0, e = MIs.size(); i < e; i++) {
>> + CompatibleRSI = PreviousRegSeq[MIs[i]];
>> + if (RSI == CompatibleRSI)
>> + continue;
>> + if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan))
>> + return true;
>> + }
>> + }
>> + return false;
>> +}
>> +
>> +bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI,
>> + RegSeqInfo &CompatibleRSI,
>> + std::vector<std::pair<unsigned, unsigned> >
> &RemapChan) {
>> + unsigned NeededUndefs = 4 - RSI.UndefReg.size();
>> + if (PreviousRegSeqByUndefCount[NeededUndefs].empty())
>> + return false;
>> + std::vector<MachineInstr *> &MIs =
>> + PreviousRegSeqByUndefCount[NeededUndefs];
>> + CompatibleRSI = PreviousRegSeq[MIs.back()];
>> + tryMergeVector(&CompatibleRSI, &RSI, RemapChan);
>> + return true;
>> +}
>> +
>> +void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) {
>> + for (DenseMap<unsigned, unsigned>::const_iterator
>> + It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) {
>> + PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr);
>> + }
>> + PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr);
>> + PreviousRegSeq[RSI.Instr] = RSI;
>> +}
>> +
>> +bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
>> + MRI = &(Fn.getRegInfo());
>> + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
>> + MBB != MBBe; ++MBB) {
>> + MachineBasicBlock *MB = MBB;
>> + PreviousRegSeq.clear();
>> + PreviousRegSeqByReg.clear();
>> + PreviousRegSeqByUndefCount.clear();
>> +
>> + for (MachineBasicBlock::iterator MII = MB->begin(), MIIE =
> MB->end();
>> + MII != MIIE; ++MII) {
>> + MachineInstr *MI = MII;
>> + if (MI->getOpcode() != AMDGPU::REG_SEQUENCE)
>> + continue;
>> +
>> + RegSeqInfo RSI(*MRI, MI);
>> +
>> + // All uses of MI are swizzeable ?
>> + unsigned Reg = MI->getOperand(0).getReg();
>> + if (!areAllUsesSwizzeable(Reg))
>> + continue;
>> +
>> + DEBUG (dbgs() << "Trying to optimize ";
>> + MI->dump();
>> + );
>> +
>> + RegSeqInfo CandidateRSI;
>> + std::vector<std::pair<unsigned, unsigned> > RemapChan;
>> + DEBUG(dbgs() << "Using common slots...\n";);
>> + if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) {
>> + // Remove CandidateRSI mapping
>> + RemoveMI(CandidateRSI.Instr);
>> + MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
>> + trackRSI(RSI);
>> + continue;
>> + }
>> + DEBUG(dbgs() << "Using free slots...\n";);
>> + RemapChan.clear();
>> + if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) {
>> + RemoveMI(CandidateRSI.Instr);
>> + MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
>> + trackRSI(RSI);
>> + continue;
>> + }
>> + //Failed to merge
>> + trackRSI(RSI);
>> + }
>> + }
>> + return false;
>> +}
>> +
>> +}
>> +
>> +llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm)
> {
>> + return new R600VectorRegMerger(tm);
>> +}
>> +
>>
>> Added: llvm/trunk/test/CodeGen/R600/texture-input-merge.ll
>> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/texture-input-merge.ll?rev=183279&view=auto
>>
> ==============================================================================
>> --- llvm/trunk/test/CodeGen/R600/texture-input-merge.ll (added)
>> +++ llvm/trunk/test/CodeGen/R600/texture-input-merge.ll Tue Jun 4 18:17:26
> 2013
>> @@ -0,0 +1,30 @@
>> +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>> +
>> +;CHECK-NOT: MOV
>> +
>> +define void @test() {
>> + %1 = call float @llvm.R600.load.input(i32 0)
>> + %2 = call float @llvm.R600.load.input(i32 1)
>> + %3 = call float @llvm.R600.load.input(i32 2)
>> + %4 = call float @llvm.R600.load.input(i32 3)
>> + %5 = fmul float %1, 3.0
>> + %6 = fmul float %2, 3.0
>> + %7 = fmul float %3, 3.0
>> + %8 = fmul float %4, 3.0
>> + %9 = insertelement <4 x float> undef, float %5, i32 0
>> + %10 = insertelement <4 x float> %9, float %6, i32 1
>> + %11 = insertelement <4 x float> undef, float %7, i32 0
>> + %12 = insertelement <4 x float> %11, float %5, i32 1
>> + %13 = insertelement <4 x float> undef, float %8, i32 0
>> + %14 = call <4 x float> @llvm.R600.tex(<4 x float> %10, i32
> 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
>> + %15 = call <4 x float> @llvm.R600.tex(<4 x float> %12, i32
> 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
>> + %16 = call <4 x float> @llvm.R600.tex(<4 x float> %13, i32
> 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
>> + %17 = fadd <4 x float> %14, %15
>> + %18 = fadd <4 x float> %17, %16
>> + call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 0)
>> + ret void
>> +}
>> +
>> +declare float @llvm.R600.load.input(i32) readnone
>> +declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32,
> i32, i32, i32, i32, i32, i32) readnone
>> +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
More information about the llvm-commits
mailing list