R600/SI: Prettier display of input modifiers and fold of fabs

Tue May 6 14:51:27 PDT 2014

On Mon, May 05, 2014 at 01:52:37PM -0700, Vincent Lejeune wrote:
> I updated the patch set against newer llvm revision, no regression from piglit.
> 

I think these patches are OK, go ahead and commit.

What is you opinion on storing all the modifiers (including dst
modifiers) in a single operand?

-Tom

> 
> 
> 
> > Le Mardi 22 avril 2014 3h44, "Daenzer, Michel" <Michel.Daenzer at amd.com> a écrit :
> > 
> >>  +; SI-CHECK-LABEL: @fneg_fabs_fold
> >>  +
> >>  +define void @fneg_fabs_fold(float addrspace(1)* %out, float %in, float 
> > %in2) {
> > 
> > This should also check that the modifiers are used as expected.
> > 
> 
> Actually this was a left over test, I initially expected that folding abs and neg operands would happens automatically because the function that does the folding can be called several time but it turned out not being so easy.
> As fneg(fabs) is not a common pattern I though it's ok if it's not folded at the moment.
> 
> Vincent.

> From 9751f7d0fdd2ddfd3633fba032d78a334dacc242 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Thu, 13 Mar 2014 22:29:37 +0100
> Subject: [PATCH 1/3] R600/SI: Use pseudo instruction for fabs/clamp/fneg
> 
> ---
>  lib/Target/R600/SIISelLowering.cpp | 44 ++++++++++++++++++++++++++++++++++++++
>  lib/Target/R600/SIInstructions.td  | 34 +++++++++++++++++++++++++----
>  2 files changed, 74 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index e6880485..3216929 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -539,6 +539,50 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
>        MIB.addOperand(MI->getOperand(i));
>  
>      MI->eraseFromParent();
> +    break;
> +  }
> +  case AMDGPU::FABS_SI: {
> +    MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
> +    const SIInstrInfo *TII =
> +      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
> +    unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> +    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32),
> +            Reg)
> +            .addImm(0x7fffffff);
> +    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_AND_B32_e32),
> +            MI->getOperand(0).getReg())
> +            .addReg(MI->getOperand(1).getReg())
> +            .addReg(Reg);
> +    MI->eraseFromParent();
> +    break;
> +  }
> +  case AMDGPU::FNEG_SI: {
> +    MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
> +    const SIInstrInfo *TII =
> +      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
> +    unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> +    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32),
> +            Reg)
> +            .addImm(0x80000000);
> +    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_XOR_B32_e32),
> +            MI->getOperand(0).getReg())
> +            .addReg(MI->getOperand(1).getReg())
> +            .addReg(Reg);
> +    MI->eraseFromParent();
> +    break;
> +  }
> +  case AMDGPU::FCLAMP_SI: {
> +    const SIInstrInfo *TII =
> +      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
> +    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F32_e64),
> +            MI->getOperand(0).getReg())
> +            .addOperand(MI->getOperand(1))
> +            .addImm(0) // SRC1
> +            .addImm(0) // ABS
> +            .addImm(1) // CLAMP
> +            .addImm(0) // OMOD
> +            .addImm(0); // NEG
> +    MI->eraseFromParent();
>    }
>    }
>    return BB;
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 27e7abe..eac8bc9 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1930,10 +1930,18 @@ def : BitConvert <v16f32, v16i32, VReg_512>;
>  /********** Src & Dst modifiers **********/
>  /********** =================== **********/
>  
> +def FCLAMP_SI : AMDGPUShaderInst <
> +  (outs VReg_32:$dst),
> +  (ins VSrc_32:$src0),
> +  "FCLAMP_SI $dst, $src0",
> +  []
> +> {
> +  let usesCustomInserter = 1;
> +}
> +
>  def : Pat <
>    (int_AMDIL_clamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
> -  (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
> -   0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
> +  (FCLAMP_SI f32:$src)
>  >;
>  
>  /********** ================================ **********/
> @@ -1952,14 +1960,32 @@ def : Pat <
>    (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
>  >;
>  
> +def FABS_SI : AMDGPUShaderInst <
> +  (outs VReg_32:$dst),
> +  (ins VSrc_32:$src0),
> +  "FABS_SI $dst, $src0",
> +  []
> +> {
> +  let usesCustomInserter = 1;
> +}
> +
>  def : Pat <
>    (fabs f32:$src),
> -  (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff)) /* Clear sign bit */
> +  (FABS_SI f32:$src)
>  >;
>  
> +def FNEG_SI : AMDGPUShaderInst <
> +  (outs VReg_32:$dst),
> +  (ins VSrc_32:$src0),
> +  "FNEG_SI $dst, $src0",
> +  []
> +> {
> +  let usesCustomInserter = 1;
> +}
> +
>  def : Pat <
>    (fneg f32:$src),
> -  (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Toggle sign bit */
> +  (FNEG_SI f32:$src)
>  >;
>  
>  /********** ================== **********/
> -- 
> 1.9.0
> 

> From 91c268fdf0c26617390935297033d606e3d6a0fc Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Sun, 2 Feb 2014 23:26:49 +0100
> Subject: [PATCH 2/3] R600/SI: Prettier display of input modifiers
> 
> ---
>  lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 12 ++++++
>  lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h   |  1 +
>  lib/Target/R600/SIISelLowering.cpp                | 16 +++++---
>  lib/Target/R600/SIInstrFormats.td                 | 21 ++++++++---
>  lib/Target/R600/SIInstrInfo.cpp                   | 11 ++++--
>  lib/Target/R600/SIInstrInfo.td                    | 46 ++++++++++++-----------
>  lib/Target/R600/SIInstructions.td                 |  8 +++-
>  test/CodeGen/R600/fneg.ll                         |  2 +-
>  test/CodeGen/R600/seto.ll                         |  3 +-
>  test/CodeGen/R600/setuo.ll                        |  3 +-
>  10 files changed, 82 insertions(+), 41 deletions(-)
> 
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> index e275371..11ae091 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> @@ -158,6 +158,18 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
>    }
>  }
>  
> +void AMDGPUInstPrinter::printOperandAndMods(const MCInst *MI, unsigned OpNo,
> +                                            raw_ostream &O) {
> +  unsigned InputModifiers = MI->getOperand(OpNo).getImm();
> +  if (InputModifiers & 0x1)
> +    O << "-";
> +  if (InputModifiers & 0x2)
> +    O << "|";
> +  printOperand(MI, OpNo + 1, O);
> +  if (InputModifiers & 0x2)
> +    O << "|";
> +}
> +
>  void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
>                                          raw_ostream &O) {
>    unsigned Imm = MI->getOperand(OpNum).getImm();
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> index f30fc49..6ca7170 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> @@ -38,6 +38,7 @@ private:
>    void printRegOperand(unsigned RegNo, raw_ostream &O);
>    void printImmediate(uint32_t Imm, raw_ostream &O);
>    void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> +  void printOperandAndMods(const MCInst *MI, unsigned OpNo, raw_ostream &O);
>    static void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
>    void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
>    static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index 3216929..3884069 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -576,12 +576,12 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
>        static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
>      BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F32_e64),
>              MI->getOperand(0).getReg())
> +            .addImm(0) // SRC0 modifiers
>              .addOperand(MI->getOperand(1))
> +            .addImm(0) // SRC1 modifiers
>              .addImm(0) // SRC1
> -            .addImm(0) // ABS
>              .addImm(1) // CLAMP
> -            .addImm(0) // OMOD
> -            .addImm(0); // NEG
> +            .addImm(0); // OMOD
>      MI->eraseFromParent();
>    }
>    }
> @@ -1331,7 +1331,6 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
>    const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? nullptr : &TII->get(OpcodeE64);
>  
>    assert(!DescE64 || DescE64->getNumDefs() == NumDefs);
> -  assert(!DescE64 || DescE64->getNumOperands() == (NumOps + 4));
>  
>    int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
>    bool HaveVSrc = false, HaveSSrc = false;
> @@ -1428,8 +1427,15 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
>    }
>  
>    if (Promote2e64) {
> +    std::vector<SDValue> OldOps(Ops);
> +    Ops.clear();
> +    for (unsigned i = 0; i < OldOps.size(); ++i) {
> +      // src_modifier
> +      Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
> +      Ops.push_back(OldOps[i]);
> +    }
>      // Add the modifier flags while promoting
> -    for (unsigned i = 0; i < 4; ++i)
> +    for (unsigned i = 0; i < 2; ++i)
>        Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
>    }
>  
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index aa2c22c..4b2194b 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -210,16 +210,19 @@ class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
>      Enc64 <outs, ins, asm, pattern> {
>  
>    bits<8> dst;
> +  bits<2> src0_modifiers;
>    bits<9> src0;
> +  bits<2> src1_modifiers;
>    bits<9> src1;
> +  bits<2> src2_modifiers;
>    bits<9> src2;
> -  bits<3> abs;
>    bits<1> clamp;
>    bits<2> omod;
> -  bits<3> neg;
>  
>    let Inst{7-0} = dst;
> -  let Inst{10-8} = abs;
> +  let Inst{8} = src0_modifiers{1};
> +  let Inst{9} = src1_modifiers{1};
> +  let Inst{10} = src2_modifiers{1};
>    let Inst{11} = clamp;
>    let Inst{25-17} = op;
>    let Inst{31-26} = 0x34; //encoding
> @@ -227,7 +230,9 @@ class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
>    let Inst{49-41} = src1;
>    let Inst{58-50} = src2;
>    let Inst{60-59} = omod;
> -  let Inst{63-61} = neg;
> +  let Inst{61} = src0_modifiers{0};
> +  let Inst{62} = src1_modifiers{0};
> +  let Inst{63} = src2_modifiers{0};
>    
>    let mayLoad = 0;
>    let mayStore = 0;
> @@ -240,12 +245,14 @@ class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
>      Enc64 <outs, ins, asm, pattern> {
>  
>    bits<8> dst;
> +  bits<2> src0_modifiers;
>    bits<9> src0;
> +  bits<2> src1_modifiers;
>    bits<9> src1;
> +  bits<2> src2_modifiers;
>    bits<9> src2;
>    bits<7> sdst;
>    bits<2> omod;
> -  bits<3> neg;
>  
>    let Inst{7-0} = dst;
>    let Inst{14-8} = sdst;
> @@ -255,7 +262,9 @@ class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
>    let Inst{49-41} = src1;
>    let Inst{58-50} = src2;
>    let Inst{60-59} = omod;
> -  let Inst{63-61} = neg;
> +  let Inst{61} = src0_modifiers{0};
> +  let Inst{62} = src1_modifiers{0};
> +  let Inst{63} = src2_modifiers{0};
>  
>    let mayLoad = 0;
>    let mayStore = 0;
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index 5d08b91..4802312 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -1044,6 +1044,10 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
>        // We are converting these to a BFE, so we need to add the missing
>        // operands for the size and offset.
>        unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
> +      Inst->addOperand(Inst->getOperand(1));
> +      Inst->getOperand(1).ChangeToImmediate(0);
> +      Inst->addOperand(MachineOperand::CreateImm(0));
> +      Inst->addOperand(MachineOperand::CreateImm(0));
>        Inst->addOperand(MachineOperand::CreateImm(0));
>        Inst->addOperand(MachineOperand::CreateImm(Size));
>  
> @@ -1051,8 +1055,6 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
>        // 3 to not hit an assertion later in MCInstLower.
>        Inst->addOperand(MachineOperand::CreateImm(0));
>        Inst->addOperand(MachineOperand::CreateImm(0));
> -      Inst->addOperand(MachineOperand::CreateImm(0));
> -      Inst->addOperand(MachineOperand::CreateImm(0));
>      }
>  
>      addDescImplicitUseDef(NewDesc, Inst);
> @@ -1069,10 +1071,11 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
>        uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
>  
>        Inst->RemoveOperand(2); // Remove old immediate.
> +      Inst->addOperand(Inst->getOperand(1));
> +      Inst->getOperand(1).ChangeToImmediate(0);
>        Inst->addOperand(MachineOperand::CreateImm(Offset));
> -      Inst->addOperand(MachineOperand::CreateImm(BitWidth));
> -
>        Inst->addOperand(MachineOperand::CreateImm(0));
> +      Inst->addOperand(MachineOperand::CreateImm(BitWidth));
>        Inst->addOperand(MachineOperand::CreateImm(0));
>        Inst->addOperand(MachineOperand::CreateImm(0));
>        Inst->addOperand(MachineOperand::CreateImm(0));
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 7e8512d..2a1207f 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -246,6 +246,11 @@ class VOP2_REV <string revOp, bit isOrig> {
>    bit IsOrig = isOrig;
>  }
>  
> +// This must always be right before the operand being input modified.
> +def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
> +  let PrintMethod = "printOperandAndMods";
> +}
> +
>  multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
>                          string opName, list<dag> pattern> {
>  
> @@ -257,10 +262,8 @@ multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
>    def _e64 : VOP3 <
>      {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
>      (outs drc:$dst),
> -    (ins src:$src0,
> -         i32imm:$abs, i32imm:$clamp,
> -         i32imm:$omod, i32imm:$neg),
> -    opName#"_e64 $dst, $src0, $abs, $clamp, $omod, $neg", []
> +    (ins InputMods:$src0_modifiers, src:$src0, i32imm:$clamp, i32imm:$omod),
> +    opName#"_e64 $dst, $src0_modifiers, $clamp, $omod", []
>    >, VOP <opName> {
>      let src1 = SIOperand.ZERO;
>      let src2 = SIOperand.ZERO;
> @@ -289,10 +292,10 @@ multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
>    def _e64 : VOP3 <
>      {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
>      (outs vrc:$dst),
> -    (ins arc:$src0, arc:$src1,
> -         i32imm:$abs, i32imm:$clamp,
> -         i32imm:$omod, i32imm:$neg),
> -    opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
> +    (ins InputMods:$src0_modifiers, arc:$src0,
> +         InputMods:$src1_modifiers, arc:$src1,
> +         i32imm:$clamp, i32imm:$omod),
> +    opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", []
>    >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
>      let src2 = SIOperand.ZERO;
>    }
> @@ -317,10 +320,10 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
>    def _e64 : VOP3b <
>      {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
>      (outs VReg_32:$dst),
> -    (ins VSrc_32:$src0, VSrc_32:$src1,
> -         i32imm:$abs, i32imm:$clamp,
> -         i32imm:$omod, i32imm:$neg),
> -    opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
> +    (ins InputMods: $src0_modifiers, VSrc_32:$src0,
> +         InputMods:$src1_modifiers, VSrc_32:$src1,
> +         i32imm:$clamp, i32imm:$omod),
> +    opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", []
>    >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
>      let src2 = SIOperand.ZERO;
>      /* the VOP2 variant puts the carry out into VCC, the VOP3 variant
> @@ -341,15 +344,16 @@ multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
>    def _e64 : VOP3 <
>      {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
>      (outs SReg_64:$dst),
> -    (ins arc:$src0, arc:$src1,
> -         InstFlag:$abs, InstFlag:$clamp,
> -         InstFlag:$omod, InstFlag:$neg),
> -    opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg",
> +    (ins InputMods:$src0_modifiers, arc:$src0,
> +         InputMods:$src1_modifiers, arc:$src1,
> +         InstFlag:$clamp, InstFlag:$omod),
> +    opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod",
>      !if(!eq(!cast<string>(cond), "COND_NULL"), []<dag>,
>        [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))]
>      )
>    >, VOP <opName> {
>      let src2 = SIOperand.ZERO;
> +    let src2_modifiers = 0;
>    }
>  }
>  
> @@ -363,9 +367,10 @@ multiclass VOPC_64 <bits<8> op, string opName,
>  
>  class VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
>    op, (outs VReg_32:$dst),
> -  (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
> -   InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
> -  opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
> +  (ins InputMods: $src0_modifiers, VSrc_32:$src0, InputMods:$src1_modifiers,
> +   VSrc_32:$src1, InputMods:$src2_modifiers, VSrc_32:$src2,
> +   InstFlag:$clamp, InstFlag:$omod),
> +  opName#" $dst, $src0_modifiers, $src1, $src2, $clamp, $omod", pattern
>  >, VOP <opName>;
>  
>  class VOP3_64_Shift <bits <9> op, string opName, list<dag> pattern> : VOP3 <
> @@ -375,10 +380,9 @@ class VOP3_64_Shift <bits <9> op, string opName, list<dag> pattern> : VOP3 <
>  >, VOP <opName> {
>  
>    let src2 = SIOperand.ZERO;
> -  let abs = 0;
> +  let src0_modifiers = 0;
>    let clamp = 0;
>    let omod = 0;
> -  let neg = 0;
>  }
>  
>  class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index eac8bc9..814cdd6 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1115,7 +1115,11 @@ def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
>     InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
>    "V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg",
>    [(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))]
> ->;
> +> {
> +  let src0_modifiers = 0;
> +  let src1_modifiers = 0;
> +  let src2_modifiers = 0;
> +}
>  
>  //f32 pattern for V_CNDMASK_B32_e64
>  def : Pat <
> @@ -2117,7 +2121,7 @@ def : Pat <
>  def : Pat <
>    (int_SI_tid),
>    (V_MBCNT_HI_U32_B32_e32 0xffffffff,
> -                          (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0, 0, 0, 0, 0))
> +                          (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0, 0, 0))
>  >;
>  
>  /********** ================== **********/
> diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll
> index f4e6be6..7ad760c 100644
> --- a/test/CodeGen/R600/fneg.ll
> +++ b/test/CodeGen/R600/fneg.ll
> @@ -51,7 +51,7 @@ entry:
>  ; R600-CHECK: -KC0[2].Z
>  ; SI-CHECK-LABEL: @fneg_free
>  ; XXX: We could use V_ADD_F32_e64 with the negate bit here instead.
> -; SI-CHECK: V_SUB_F32_e64 v{{[0-9]}}, 0.000000e+00, s{{[0-9]}}, 0, 0, 0, 0
> +; SI-CHECK: V_SUB_F32_e64 v{{[0-9]}}, 0.000000e+00, s{{[0-9]}}, 0, 0
>  define void @fneg_free(float addrspace(1)* %out, i32 %in) {
>  entry:
>    %0 = bitcast i32 %in to float
> diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll
> index 8633a4b..e90e788 100644
> --- a/test/CodeGen/R600/seto.ll
> +++ b/test/CodeGen/R600/seto.ll
> @@ -1,6 +1,7 @@
>  ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
>  
> -;CHECK: V_CMP_O_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0, 0, 0
> +;CHECK-LABEL: @main
> +;CHECK: V_CMP_O_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0
>  
>  define void @main(float %p) {
>  main_body:
> diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll
> index c77a37e..3b1db8b 100644
> --- a/test/CodeGen/R600/setuo.ll
> +++ b/test/CodeGen/R600/setuo.ll
> @@ -1,6 +1,7 @@
>  ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
>  
> -;CHECK: V_CMP_U_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0, 0, 0
> +;CHECK-LABEL: @main
> +;CHECK: V_CMP_U_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0
>  
>  define void @main(float %p) {
>  main_body:
> -- 
> 1.9.0
> 

> From 0673da2ae02d150611964129f87babca3bad096c Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Wed, 5 Feb 2014 17:14:50 +0100
> Subject: [PATCH 3/3] R600/SI: Fold fabs/fneg into src input modifier
> 
> ---
>  lib/Target/R600/SIISelLowering.cpp | 33 +++++++++++++++++++++++++++++++--
>  test/CodeGen/R600/fabs.ll          | 11 +++++++++++
>  test/CodeGen/R600/fneg.ll          | 11 +++++++++++
>  3 files changed, 53 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index 3884069..6fe63cf 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -1329,6 +1329,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
>    // e64 version if available, -1 otherwise
>    int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
>    const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? nullptr : &TII->get(OpcodeE64);
> +  int InputModifiers[3] = {0};
>  
>    assert(!DescE64 || DescE64->getNumDefs() == NumDefs);
>  
> @@ -1405,7 +1406,10 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
>        }
>      }
>  
> -    if (DescE64 && !Immediate) {
> +    if (Immediate)
> +      continue;
> +
> +    if (DescE64) {
>  
>        // Test if it makes sense to switch to e64 encoding
>        unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass;
> @@ -1424,6 +1428,31 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
>          DescE64 = nullptr;
>        }
>      }
> +
> +    if (!DescE64 && !Promote2e64)
> +      continue;
> +    if (!Operand.isMachineOpcode())
> +      continue;
> +    if (Operand.getMachineOpcode() == AMDGPU::FNEG_SI) {
> +      Ops.pop_back();
> +      Ops.push_back(Operand.getOperand(0));
> +      InputModifiers[i] = 1;
> +      Promote2e64 = true;
> +      if (!DescE64)
> +        continue;
> +      Desc = DescE64;
> +      DescE64 = 0;
> +    }
> +    else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) {
> +      Ops.pop_back();
> +      Ops.push_back(Operand.getOperand(0));
> +      InputModifiers[i] = 2;
> +      Promote2e64 = true;
> +      if (!DescE64)
> +        continue;
> +      Desc = DescE64;
> +      DescE64 = 0;
> +    }
>    }
>  
>    if (Promote2e64) {
> @@ -1431,7 +1460,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
>      Ops.clear();
>      for (unsigned i = 0; i < OldOps.size(); ++i) {
>        // src_modifier
> -      Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
> +      Ops.push_back(DAG.getTargetConstant(InputModifiers[i], MVT::i32));
>        Ops.push_back(OldOps[i]);
>      }
>      // Add the modifier flags while promoting
> diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
> index 2cd3a4f..b87ce22 100644
> --- a/test/CodeGen/R600/fabs.ll
> +++ b/test/CodeGen/R600/fabs.ll
> @@ -49,6 +49,17 @@ entry:
>    ret void
>  }
>  
> +; SI-CHECK-LABEL: @fabs_fold
> +; SI-CHECK-NOT: V_AND_B32_e32
> +; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, |v{{[0-9]+}}|
> +define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
> +entry:
> +  %0 = call float @fabs(float %in0)
> +  %1 = fmul float %0, %in1
> +  store float %1, float addrspace(1)* %out
> +  ret void
> +}
> +
>  declare float @fabs(float ) readnone
>  declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone
>  declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone
> diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll
> index 7ad760c..4cddc73 100644
> --- a/test/CodeGen/R600/fneg.ll
> +++ b/test/CodeGen/R600/fneg.ll
> @@ -59,3 +59,14 @@ entry:
>    store float %1, float addrspace(1)* %out
>    ret void
>  }
> +
> +; SI-CHECK-LABEL: @fneg_fold
> +; SI-CHECK-NOT: V_XOR_B32
> +; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
> +define void @fneg_fold(float addrspace(1)* %out, float %in) {
> +entry:
> +  %0 = fsub float -0.0, %in
> +  %1 = fmul float %0, %in
> +  store float %1, float addrspace(1)* %out
> +  ret void
> +}
> -- 
> 1.9.0
> 

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits