R600: Various fixes

Fri Sep 27 19:28:08 PDT 2013

On Sun, Sep 22, 2013 at 02:12:47PM -0700, Vincent Lejeune wrote:
> 
> 
> Hi,
> 
> another serie with patches for various part of R600 backend.
> The first 2 patches should fix some crash where ifcvt pass generate too big (> 128 inst long) alu clause.
> The third one allows us to enable --verify-machineinstrs in some complex test. We couldn't do it before
> because we did set wrong FrameSlot inst and did not set correct register class for some non allocatable reg
> (KCache reg actually)
> The 4th one set the "masked" flag to texture/export. It does not change anything currently but should be usefull
> for a later patch that recompute instruction dependencies.
> 
> Vincent

> From a885f1fd13b0a144027df40c685e10d785e4df27 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Fri, 20 Sep 2013 00:59:48 +0200
> Subject: [PATCH 1/4] R600: Put PRED_X instruction in its own clause
> 
> ---
>  lib/Target/R600/R600EmitClauseMarkers.cpp | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp
> index a055639..ed2da67 100644
> --- a/lib/Target/R600/R600EmitClauseMarkers.cpp
> +++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
> @@ -174,6 +174,8 @@ private:
>        if (AluInstCount > TII->getMaxAlusPerClause())
>          break;
>        if (I->getOpcode() == AMDGPU::PRED_X) {

Can you add a comment here explaining why we need to start a new clause
when there is a PRED_X instructions.  Also, it seems like this is
something we could add a test case for with lit.

> +        if (AluInstCount > 0)
> +          break;
>          if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH)
>            PushBeforeModifier = true;
>          AluInstCount ++;
> -- 
> 1.8.3.1
> 

> From 0c4f75f89c56312269b85488d2f378b6a0be13db Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Thu, 19 Sep 2013 01:16:05 +0200
> Subject: [PATCH 2/4] R600: add a pass that merges clauses.
> 
> ---
>  lib/Target/R600/AMDGPU.h                |   1 +
>  lib/Target/R600/AMDGPUTargetMachine.cpp |   5 +-
>  lib/Target/R600/CMakeLists.txt          |   1 +
>  lib/Target/R600/R600ClauseMergePass.cpp | 178 ++++++++++++++++++++++++++++++++
>  4 files changed, 183 insertions(+), 2 deletions(-)
>  create mode 100644 lib/Target/R600/R600ClauseMergePass.cpp
> 
> diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
> index e2d1caf..feec1c5 100644
> --- a/lib/Target/R600/AMDGPU.h
> +++ b/lib/Target/R600/AMDGPU.h
> @@ -29,6 +29,7 @@ FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
>  FunctionPass *createR600TextureIntrinsicsReplacer();
>  FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
>  FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
> +FunctionPass *createR600ClauseMergePass(TargetMachine &tm);
>  FunctionPass *createR600Packetizer(TargetMachine &tm);
>  FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
>  FunctionPass *createAMDGPUCFGStructurizerPass(TargetMachine &tm);
> diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
> index 28d5b6e..15824c1 100644
> --- a/lib/Target/R600/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
> @@ -168,10 +168,11 @@ bool AMDGPUPassConfig::addPostRegAlloc() {
>  bool AMDGPUPassConfig::addPreSched2() {
>    const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
>  
> -  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
> +  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
>      addPass(createR600EmitClauseMarkers(*TM));
> -  }
>    addPass(&IfConverterID);
> +  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
> +    addPass(createR600ClauseMergePass(*TM));
>    return false;
>  }
>  
> diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
> index 658eeea..7bdfa7e 100644
> --- a/lib/Target/R600/CMakeLists.txt
> +++ b/lib/Target/R600/CMakeLists.txt
> @@ -28,6 +28,7 @@ add_llvm_target(R600CodeGen
>    AMDGPUConvertToISA.cpp
>    AMDGPUInstrInfo.cpp
>    AMDGPURegisterInfo.cpp
> +  R600ClauseMergePass.cpp
>    R600ControlFlowFinalizer.cpp
>    R600EmitClauseMarkers.cpp
>    R600ExpandSpecialInstrs.cpp
> diff --git a/lib/Target/R600/R600ClauseMergePass.cpp b/lib/Target/R600/R600ClauseMergePass.cpp
> new file mode 100644
> index 0000000..eadd2ae
> --- /dev/null
> +++ b/lib/Target/R600/R600ClauseMergePass.cpp
> @@ -0,0 +1,178 @@
> +//===-- R600EClauseMergePass - Merge consecutive CF_ALU -------------------===//
               ^
This looks like a typo.

> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
> +/// This pass is merging consecutive CFAlus where applicable.
> +/// It needs to be called after IfCvt for best results.
> +//===----------------------------------------------------------------------===//
> +
> +#define DEBUG_TYPE "r600mergeclause"
> +#include "AMDGPU.h"
> +#include "R600Defines.h"
> +#include "R600InstrInfo.h"
> +#include "R600MachineFunctionInfo.h"
> +#include "R600RegisterInfo.h"
> +#include "llvm/CodeGen/MachineFunctionPass.h"
> +#include "llvm/CodeGen/MachineInstrBuilder.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/Support/raw_ostream.h"
> +
> +using namespace llvm;
> +
> +namespace {
> +
> +static bool isCFAlu(const MachineInstr *MI) {
> +  switch (MI->getOpcode()) {
> +  case AMDGPU::CF_ALU:
> +  case AMDGPU::CF_ALU_PUSH_BEFORE:
> +    return true;
> +  default:
> +    return false;
> +  }
> +}
> +
> +class R600ClauseMergePass : public MachineFunctionPass {
> +
> +private:
> +  static char ID;
> +  const R600InstrInfo *TII;
> +

I don't really like stuffing all the function implementations into the
class implementation, it makes the indentation look strange.  Could
you move the implementations outside the class like in SIInsertWaits.cpp
and other passes.

> +  bool isALU(const MachineInstr *MI) const {
> +    if (TII->isALUInstr(MI->getOpcode()))
> +      return true;
> +    if (TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode()))
> +      return true;
> +    switch (MI->getOpcode()) {
> +    case AMDGPU::PRED_X:
> +    case AMDGPU::INTERP_PAIR_XY:
> +    case AMDGPU::INTERP_PAIR_ZW:
> +    case AMDGPU::INTERP_VEC_LOAD:
> +    case AMDGPU::COPY:
> +    case AMDGPU::DOT_4:
> +      return true;
> +    default:
> +      return false;
> +    }
> +  }
> +

This function looks like it has been copied from
R600EmitClauseMarkers.cpp can we just have one implementation, maybe in
R600InstrInfo.

> +  /// IfCvt pass can generate "disabled" ALU clause marker that need to be
> +  /// removed and their content affected to the previous alu clause.
> +  /// This function parse instructions after CFAlu untill it find a disabled
> +  /// CFAlu and merge the content, or an enabled CFAlu.
> +  void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const {
> +    MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
> +    I++;
> +    do {
> +      while (I!= E && !isCFAlu(I))
> +        I++;
> +      if (I == E)
> +        return;
> +      MachineInstr *MI = I++;
> +      if (MI->getOperand(8).getImm())
> +        continue;

There are a lot of calls in this pass to MI->getOperands() with a
constant argument.  It is a little unclear what is going on in some
cases.  Could you set UseNamedOperandTable = 1 in the TableGen
definition of these CF instruction and then access the
operands using R600InstrInfo::getNamedOperandIdx().  This will make the
code much easier to read.

> +      CFAlu->getOperand(7).setImm(
> +          CFAlu->getOperand(7).getImm() + MI->getOperand(7).getImm());
> +      MI->eraseFromParent();
> +    } while (I != E);
> +  }
> +
> +  /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
> +  /// it is the case.
> +  bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
> +      const {
> +    assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
> +    unsigned RootInstCount = RootCFAlu->getOperand(7).getImm(),
> +        LaterInstCount = LatrCFAlu->getOperand(7).getImm();
> +    unsigned CumuledInsts = RootInstCount + LaterInstCount;
> +    if (CumuledInsts >= TII->getMaxAlusPerClause()) {
> +      DEBUG(dbgs() << "Excess inst counts\n");
> +      return false;
> +    }
> +    if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
> +      return false;
> +    // Is KCache Bank 0 compatible ?
> +    if (LatrCFAlu->getOperand(3).getImm() &&
> +        RootCFAlu->getOperand(3).getImm() &&
> +        (LatrCFAlu->getOperand(1).getImm() !=
> +         RootCFAlu->getOperand(1).getImm() ||
> +        LatrCFAlu->getOperand(5).getImm() !=
> +        RootCFAlu->getOperand(5).getImm())) {
> +      DEBUG(dbgs() << "Wrong KC0\n");
> +      return false;
> +    }
> +    // Is KCache Bank 1 compatible ?
> +    if (LatrCFAlu->getOperand(4).getImm() &&
> +        RootCFAlu->getOperand(4).getImm() &&
> +        (LatrCFAlu->getOperand(2).getImm() !=
> +        RootCFAlu->getOperand(2).getImm() ||
> +        LatrCFAlu->getOperand(6).getImm() !=
> +        RootCFAlu->getOperand(6).getImm())) {
> +      DEBUG(dbgs() << "Wrong KC0\n");
> +      return false;
> +    }
> +    if (LatrCFAlu->getOperand(3).getImm()) {
> +      RootCFAlu->getOperand(3).setImm(LatrCFAlu->getOperand(3).getImm());
> +      RootCFAlu->getOperand(1).setImm(LatrCFAlu->getOperand(1).getImm());
> +      RootCFAlu->getOperand(5).setImm(LatrCFAlu->getOperand(5).getImm());
> +    }
> +    if (LatrCFAlu->getOperand(4).getImm()) {
> +      RootCFAlu->getOperand(4).setImm(LatrCFAlu->getOperand(4).getImm());
> +      RootCFAlu->getOperand(2).setImm(LatrCFAlu->getOperand(2).getImm());
> +      RootCFAlu->getOperand(6).setImm(LatrCFAlu->getOperand(6).getImm());
> +    }
> +    RootCFAlu->getOperand(7).setImm(CumuledInsts);
> +    RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
> +    return true;
> +  }
> +
> +public:
> +  R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID),
> +    TII(static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }

We aren't supposed to cache InstrInfo or other Target* objects
like this, since they can change from function to function.  TII should
be initialized in runOnMachineFunction()

> +
> +  virtual bool runOnMachineFunction(MachineFunction &MF) {
> +    for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
> +                                                    BB != BB_E; ++BB) {
> +      MachineBasicBlock &MBB = *BB;
> +      MachineBasicBlock::iterator I = MBB.begin(),  E = MBB.end();
> +      MachineBasicBlock::iterator LatestCFAlu = E;
> +      for (;I != E;) {

Couldn't this be a while loop?

> +        MachineInstr *MI = I++;
> +        if ((!isALU(MI) && !isCFAlu(MI)) ||
> +            TII->mustBeLastInClause(MI->getOpcode()))
> +          LatestCFAlu = E;
> +        if (!isCFAlu(MI))
> +          continue;
> +        cleanPotentialDisabledCFAlu(MI);
> +
> +        if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
> +          MI->eraseFromParent();
> +        } else {
> +          assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
> +          LatestCFAlu = MI;
> +        }
> +      }
> +    }
> +    return false;
> +  }
> +
> +  const char *getPassName() const {
> +    return "R600 Merge Clause Markers Pass";
> +  }
> +};
> +
> +char R600ClauseMergePass::ID = 0;
> +
> +} // end anonymous namespace
> +
> +
> +llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
> +  return new R600ClauseMergePass(TM);
> +}
> -- 
> 1.8.3.1
> 

> From 672548ab55d744884927932395e47d0b00d6817d Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Sat, 21 Sep 2013 03:21:25 +0200
> Subject: [PATCH 3/4] R600: Enable -verify-machineinstrs in some tests.
> 

This looks great thanks for doing this.

Reviewed-by: Tom Stellard <thomas.stellard at amd.com>

> ---
>  lib/Target/R600/AMDGPUInstrInfo.cpp             |  2 +-
>  lib/Target/R600/AMDILInstrInfo.td               | 10 +++++-----
>  lib/Target/R600/R600InstrInfo.cpp               |  9 +++++++++
>  lib/Target/R600/R600Instructions.td             |  2 +-
>  lib/Target/R600/R600RegisterInfo.td             |  5 +++--
>  test/CodeGen/R600/schedule-fs-loop-nested-if.ll |  2 +-
>  test/CodeGen/R600/schedule-fs-loop-nested.ll    |  2 +-
>  test/CodeGen/R600/schedule-fs-loop.ll           |  2 +-
>  test/CodeGen/R600/schedule-if-2.ll              |  2 +-
>  test/CodeGen/R600/schedule-if.ll                |  2 +-
>  10 files changed, 24 insertions(+), 14 deletions(-)
> 
> diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
> index 61437e9..8d3f740 100644
> --- a/lib/Target/R600/AMDGPUInstrInfo.cpp
> +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
> @@ -28,7 +28,7 @@
>  using namespace llvm;
>  
>  AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
> -  : AMDGPUGenInstrInfo(0,0), RI(tm), TM(tm) { }
> +  : AMDGPUGenInstrInfo(-1,-1), RI(tm), TM(tm) { }
>  
>  const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
>    return RI;
> diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td
> index f7d0bd5..0f0c88d 100644
> --- a/lib/Target/R600/AMDILInstrInfo.td
> +++ b/lib/Target/R600/AMDILInstrInfo.td
> @@ -118,15 +118,15 @@ class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
>  // Multiclass Instruction formats
>  //===--------------------------------------------------------------------===//
>  // Multiclass that handles branch instructions
> -multiclass BranchConditional<SDNode Op> {
> +multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
>      def _i32 : ILFormat<(outs),
> -  (ins brtarget:$target, GPRI32:$src0),
> +  (ins brtarget:$target, rci:$src0),
>          "; i32 Pseudo branch instruction",
> -  [(Op bb:$target, GPRI32:$src0)]>;
> +  [(Op bb:$target, (i32 rci:$src0))]>;
>      def _f32 : ILFormat<(outs),
> -  (ins brtarget:$target, GPRF32:$src0),
> +  (ins brtarget:$target, rcf:$src0),
>          "; f32 Pseudo branch instruction",
> -  [(Op bb:$target, GPRF32:$src0)]>;
> +  [(Op bb:$target, (f32 rcf:$src0))]>;
>  }
>  
>  // Only scalar types should generate flow control
> diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
> index 93931e4..e3bf546 100644
> --- a/lib/Target/R600/R600InstrInfo.cpp
> +++ b/lib/Target/R600/R600InstrInfo.cpp
> @@ -651,6 +651,11 @@ bool isJump(unsigned Opcode) {
>    return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
>  }
>  
> +static bool isBranch(unsigned Opcode) {
> +  return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
> +      Opcode == AMDGPU::BRANCH_COND_f32;
> +}
> +
>  bool
>  R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
>                               MachineBasicBlock *&TBB,
> @@ -669,6 +674,10 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
>        return false;
>      --I;
>    }
> +  // AMDGPU::BRANCH* instructions are only available after isel and are not
> +  // handled
> +  if (isBranch(I->getOpcode()))
> +    return true;
>    if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
>      return false;
>    }
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index 24bc6b0..88a5745 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -2238,7 +2238,7 @@ let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
>    def BRANCH : ILFormat<(outs), (ins brtarget:$target),
>        "; Pseudo unconditional branch instruction",
>        [(br bb:$target)]>;
> -  defm BRANCH_COND : BranchConditional<IL_brcond>;
> +  defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>;
>  }
>  
>  //===---------------------------------------------------------------------===//
> diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
> index 514427e..6fec43c 100644
> --- a/lib/Target/R600/R600RegisterInfo.td
> +++ b/lib/Target/R600/R600RegisterInfo.td
> @@ -138,8 +138,6 @@ def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X",
>  def R600_LDS_SRC_REG : RegisterClass<"AMDGPU", [i32], 32,
>    (add OQA, OQB, OQAP, OQBP, LDS_DIRECT_A, LDS_DIRECT_B)>;
>  
> -} // End isAllocatable = 0
> -
>  def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32,
>                                (add (sequence "KC0_%u_X", 128, 159))>;
>  
> @@ -172,6 +170,8 @@ def R600_KC1 : RegisterClass <"AMDGPU", [f32, i32], 32,
>                                     (interleave R600_KC1_X, R600_KC1_Y,
>                                                 R600_KC1_Z, R600_KC1_W)>;
>  
> +} // End isAllocatable = 0
> +
>  def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
>                                     (add (sequence "T%u_X", 0, 127), AR_X)>;
>  
> @@ -192,6 +192,7 @@ def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
>      R600_TReg32,
>      R600_ArrayBase,
>      R600_Addr,
> +    R600_KC0, R600_KC1,
>      ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
>      ALU_CONST, ALU_PARAM, OQAP
>      )>;
> diff --git a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
> index ba9620c..2a66094 100644
> --- a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
> +++ b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
> @@ -1,4 +1,4 @@
> -;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
> +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
>  ;REQUIRES: asserts
>  
>  define void @main() {
> diff --git a/test/CodeGen/R600/schedule-fs-loop-nested.ll b/test/CodeGen/R600/schedule-fs-loop-nested.ll
> index 5e875c4..b917ec6 100644
> --- a/test/CodeGen/R600/schedule-fs-loop-nested.ll
> +++ b/test/CodeGen/R600/schedule-fs-loop-nested.ll
> @@ -1,4 +1,4 @@
> -;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
> +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
>  ;REQUIRES: asserts
>  
>  define void @main() {
> diff --git a/test/CodeGen/R600/schedule-fs-loop.ll b/test/CodeGen/R600/schedule-fs-loop.ll
> index d142cac..d6c194b 100644
> --- a/test/CodeGen/R600/schedule-fs-loop.ll
> +++ b/test/CodeGen/R600/schedule-fs-loop.ll
> @@ -1,4 +1,4 @@
> -;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
> +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
>  ;REQUIRES: asserts
>  
>  define void @main() {
> diff --git a/test/CodeGen/R600/schedule-if-2.ll b/test/CodeGen/R600/schedule-if-2.ll
> index 6afd677..38aad18 100644
> --- a/test/CodeGen/R600/schedule-if-2.ll
> +++ b/test/CodeGen/R600/schedule-if-2.ll
> @@ -1,4 +1,4 @@
> -;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
> +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
>  ;REQUIRES: asserts
>  
>  define void @main() {
> diff --git a/test/CodeGen/R600/schedule-if.ll b/test/CodeGen/R600/schedule-if.ll
> index 347d92f..f960c93 100644
> --- a/test/CodeGen/R600/schedule-if.ll
> +++ b/test/CodeGen/R600/schedule-if.ll
> @@ -1,4 +1,4 @@
> -;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
> +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
>  ;REQUIRES: asserts
>  
>  define void @main() {
> -- 
> 1.8.3.1
> 

> From 9b35fc5a260ab22389359dead262c55356d85f3c Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Fri, 6 Sep 2013 00:25:48 +0200
> Subject: [PATCH 4/4] R600: Use masked read sel for texture instructions
> 
> ---
>  lib/Target/R600/R600ISelLowering.cpp | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index c5e814f..126db73 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -1334,6 +1334,8 @@ CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
>    };
>  
>    for (unsigned i = 0; i < 4; i++) {
> +    if (NewBldVec[i].getOpcode() == ISD::UNDEF)
> +      RemapSwizzle[i] = 7;

Can you add a comment here and explain what the number 7 means and why
we are doing this.  Also, is it possible to test this with a .ll test?

>      if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
>        if (C->isZero()) {
>          RemapSwizzle[i] = 4; // SEL_0
> -- 
> 1.8.3.1
> 

-Tom

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits