[PATCH] R600/SI: Fix printing of clamp and omod

Tue Sep 30 07:11:15 PDT 2014

On Tue, Sep 30, 2014 at 02:50:51AM +0000, Matt Arsenault wrote:
> No tests for omod since nothing uses it yet, but
> this should get rid of the remaining annoying trailing
> zeros after some instructions.
> 

LGTM.

> http://reviews.llvm.org/D5535
> 
> Files:
>   lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
>   lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
>   lib/Target/R600/SIDefines.h
>   lib/Target/R600/SIInstrInfo.td
>   test/CodeGen/R600/fneg.f64.ll
>   test/CodeGen/R600/fneg.ll
>   test/CodeGen/R600/frem.ll
>   test/CodeGen/R600/imm.ll
>   test/CodeGen/R600/llvm.AMDGPU.clamp.ll

> Index: lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> ===================================================================
> --- lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> +++ lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> @@ -294,6 +294,23 @@
>    printIfSet(MI, OpNo, O, "_SAT");
>  }
>  
> +void AMDGPUInstPrinter::printClampSI(const MCInst *MI, unsigned OpNo,
> +                                     raw_ostream &O) {
> +  if (MI->getOperand(OpNo).getImm())
> +    O << " clamp";
> +}
> +
> +void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
> +                                     raw_ostream &O) {
> +  int Imm = MI->getOperand(OpNo).getImm();
> +  if (Imm == SIOutMods::MUL2)
> +    O << " mul:2";
> +  else if (Imm == SIOutMods::MUL4)
> +    O << " mul:4";
> +  else if (Imm == SIOutMods::DIV2)
> +    O << " div:2";
> +}
> +
>  void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
>                                       raw_ostream &O) {
>    int32_t Imm = MI->getOperand(OpNo).getImm();
> Index: lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> ===================================================================
> --- lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> +++ lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> @@ -52,6 +52,8 @@
>                           StringRef Asm, StringRef Default = "");
>    static void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
>    static void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> +  static void printClampSI(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> +  static void printOModSI(const MCInst *MI, unsigned OpNo, raw_ostream &O);
>    static void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
>    static void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
>    static void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> Index: lib/Target/R600/SIDefines.h
> ===================================================================
> --- lib/Target/R600/SIDefines.h
> +++ lib/Target/R600/SIDefines.h
> @@ -43,6 +43,15 @@
>    };
>  }
>  
> +namespace SIOutMods {
> +  enum {
> +    NONE = 0,
> +    MUL2 = 1,
> +    MUL4 = 2,
> +    DIV2 = 3
> +  };
> +}
> +
>  #define R_00B028_SPI_SHADER_PGM_RSRC1_PS                                0x00B028
>  #define R_00B02C_SPI_SHADER_PGM_RSRC2_PS                                0x00B02C
>  #define   S_00B02C_EXTRA_LDS_SIZE(x)                                  (((x) & 0xFF) << 8)
> Index: lib/Target/R600/SIInstrInfo.td
> ===================================================================
> --- lib/Target/R600/SIInstrInfo.td
> +++ lib/Target/R600/SIInstrInfo.td
> @@ -185,6 +185,14 @@
>    let PrintMethod = "printTFE";
>  }
>  
> +def omod : Operand <i32> {
> +  let PrintMethod = "printOModSI";
> +}
> +
> +def ClampMod : Operand <i1> {
> +  let PrintMethod = "printClampSI";
> +}
> +
>  } // End OperandType = "OPERAND_IMMEDIATE"
>  
>  //===----------------------------------------------------------------------===//
> @@ -399,7 +407,7 @@
>        !if (!eq(HasModifiers, 1),
>          // VOP1 with modifiers
>          (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
> -             i32imm:$clamp, i32imm:$omod)
> +             ClampMod:$clamp, omod:$omod)
>        /* else */,
>          // VOP1 without modifiers
>          (ins Src0RC:$src0)
> @@ -409,7 +417,7 @@
>          // VOP 2 with modifiers
>          (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
>               InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
> -             i32imm:$clamp, i32imm:$omod)
> +             ClampMod:$clamp, omod:$omod)
>        /* else */,
>          // VOP2 without modifiers
>          (ins Src0RC:$src0, Src1RC:$src1)
> @@ -420,7 +428,7 @@
>          (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
>               InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
>               InputModsNoDefault:$src2_modifiers, Src2RC:$src2,
> -             i32imm:$clamp, i32imm:$omod)
> +             ClampMod:$clamp, omod:$omod)
>        /* else */,
>          // VOP3 without modifiers
>          (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)
> @@ -442,12 +450,14 @@
>  // instruction.
>  class getAsm64 <int NumSrcArgs, bit HasModifiers> {
>    string src0 = "$src0_modifiers,";
> -  string src1 = !if(!eq(NumSrcArgs, 1), "", " $src1_modifiers,");
> -  string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers,", "");
> +  string src1 = !if(!eq(NumSrcArgs, 1), "",
> +                   !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
> +                                           " $src1_modifiers,"));
> +  string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
>    string ret =
>    !if(!eq(HasModifiers, 0),
>        getAsm32<NumSrcArgs>.ret,
> -      " $dst, "#src0#src1#src2#" $clamp, $omod");
> +      " $dst, "#src0#src1#src2#"$clamp"#"$omod");
>  }
>  
>  
> @@ -632,7 +642,7 @@
>    P.Ins64, P.Asm64,
>    !if(P.HasModifiers,
>        [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
> -                                i32:$src0_modifiers, i32:$clamp, i32:$omod))))],
> +                                i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
>        [(set P.DstVT:$dst, (node P.Src0VT:$src0))]),
>    P.HasModifiers
>  >;
> @@ -664,7 +674,7 @@
>    !if(P.HasModifiers,
>        [(set P.DstVT:$dst,
>             (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
> -                                      i32:$clamp, i32:$omod)),
> +                                      i1:$clamp, i32:$omod)),
>                   (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
>        [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
>    revOp, P.HasModifiers
> @@ -692,7 +702,7 @@
>    !if(P.HasModifiers,
>        [(set P.DstVT:$dst,
>             (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
> -                                      i32:$clamp, i32:$omod)),
> +                                      i1:$clamp, i32:$omod)),
>                   (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
>        [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
>    revOp, P.HasModifiers
> @@ -719,7 +729,7 @@
>    !if(P.HasModifiers,
>        [(set i1:$dst,
>            (setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
> -                                      i32:$clamp, i32:$omod)),
> +                                      i1:$clamp, i32:$omod)),
>                   (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
>                   cond))],
>        [(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
> @@ -767,7 +777,7 @@
>      !if(P.HasModifiers,
>          [(set P.DstVT:$dst,
>              (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
> -                                       i32:$clamp, i32:$omod)),
> +                                       i1:$clamp, i32:$omod)),
>                    (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
>                    (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))],
>          [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1,
> @@ -776,14 +786,14 @@
>      !if(P.HasModifiers,
>          [(set P.DstVT:$dst,
>              (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
> -                                       i32:$clamp, i32:$omod)),
> +                                       i1:$clamp, i32:$omod)),
>                    (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
>          [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))])
>    /* P.NumSrcArgs == 1 */,
>      !if(P.HasModifiers,
>          [(set P.DstVT:$dst,
>              (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
> -                                       i32:$clamp, i32:$omod))))],
> +                                       i1:$clamp, i32:$omod))))],
>          [(set P.DstVT:$dst, (node P.Src0VT:$src0))]))),
>    P.NumSrcArgs, P.HasModifiers
>  >;
> @@ -795,8 +805,8 @@
>        (ins InputModsNoDefault:$src0_modifiers, arc:$src0,
>             InputModsNoDefault:$src1_modifiers, arc:$src1,
>             InputModsNoDefault:$src2_modifiers, arc:$src2,
> -           i32imm:$clamp, i32imm:$omod),
> -  opName#" $dst0, $dst1, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern,
> +           ClampMod:$clamp, i32imm:$omod),
> +  opName#" $dst0, $dst1, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern,
>    opName, opName, 1, 1
>  >;
>  
> @@ -808,13 +818,13 @@
>  
>  
>  class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat<
> -  (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i32:$clamp, i32:$omod)),
> +  (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
>          (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
>          (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))),
>    (Inst i32:$src0_modifiers, P.Src0VT:$src0,
>          i32:$src1_modifiers, P.Src1VT:$src1,
>          i32:$src2_modifiers, P.Src2VT:$src2,
> -        i32:$clamp,
> +        i1:$clamp,
>          i32:$omod)>;
>  
>  //===----------------------------------------------------------------------===//
> Index: test/CodeGen/R600/fneg.f64.ll
> ===================================================================
> --- test/CodeGen/R600/fneg.f64.ll
> +++ test/CodeGen/R600/fneg.f64.ll
> @@ -39,7 +39,7 @@
>  
>  ; FUNC-LABEL: @fneg_free_f64
>  ; FIXME: Unnecessary copy to VGPRs
> -; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}}, 0, 0
> +; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
>  define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
>    %bc = bitcast i64 %in to double
>    %fsub = fsub double 0.0, %bc
> Index: test/CodeGen/R600/fneg.ll
> ===================================================================
> --- test/CodeGen/R600/fneg.ll
> +++ test/CodeGen/R600/fneg.ll
> @@ -48,7 +48,7 @@
>  ; R600: -KC0[2].Z
>  
>  ; XXX: We could use V_ADD_F32_e64 with the negate bit here instead.
> -; SI: V_SUB_F32_e64 v{{[0-9]}}, 0.0, s{{[0-9]}}, 0, 0
> +; SI: V_SUB_F32_e64 v{{[0-9]}}, 0.0, s{{[0-9]+$}}
>  define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
>    %bc = bitcast i32 %in to float
>    %fsub = fsub float 0.0, %bc
> Index: test/CodeGen/R600/frem.ll
> ===================================================================
> --- test/CodeGen/R600/frem.ll
> +++ test/CodeGen/R600/frem.ll
> @@ -27,7 +27,7 @@
>  ; SI: V_RCP_F32_e32 [[INVY:v[0-9]+]], [[Y]]
>  ; SI: V_MUL_F32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
>  ; SI: V_TRUNC_F32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
> -; SI: V_MAD_F32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]],
> +; SI: V_MAD_F32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
>  ; SI: BUFFER_STORE_DWORD [[RESULT]]
>  ; SI: S_ENDPGM
>  define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
> Index: test/CodeGen/R600/imm.ll
> ===================================================================
> --- test/CodeGen/R600/imm.ll
> +++ test/CodeGen/R600/imm.ll
> @@ -104,7 +104,7 @@
>  
>  ; CHECK-LABEL: @add_inline_imm_0.0_f32
>  ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
> -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 0.0,
> +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 0.0{{$}}
>  ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
>  define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
>    %y = fadd float %x, 0.0
> @@ -114,7 +114,7 @@
>  
>  ; CHECK-LABEL: @add_inline_imm_0.5_f32
>  ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
> -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 0.5,
> +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 0.5{{$}}
>  ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
>  define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) {
>    %y = fadd float %x, 0.5
> @@ -124,7 +124,7 @@
>  
>  ; CHECK-LABEL: @add_inline_imm_neg_0.5_f32
>  ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
> -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -0.5,
> +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -0.5{{$}}
>  ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
>  define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) {
>    %y = fadd float %x, -0.5
> @@ -134,7 +134,7 @@
>  
>  ; CHECK-LABEL: @add_inline_imm_1.0_f32
>  ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
> -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 1.0,
> +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 1.0{{$}}
>  ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
>  define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) {
>    %y = fadd float %x, 1.0
> @@ -144,7 +144,7 @@
>  
>  ; CHECK-LABEL: @add_inline_imm_neg_1.0_f32
>  ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
> -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -1.0,
> +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -1.0{{$}}
>  ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
>  define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) {
>    %y = fadd float %x, -1.0
> @@ -154,7 +154,7 @@
>  
>  ; CHECK-LABEL: @add_inline_imm_2.0_f32
>  ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
> -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 2.0,
> +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 2.0{{$}}
>  ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
>  define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) {
>    %y = fadd float %x, 2.0
> @@ -164,7 +164,7 @@
>  
>  ; CHECK-LABEL: @add_inline_imm_neg_2.0_f32
>  ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
> -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -2.0,
> +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -2.0{{$}}
>  ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
>  define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) {
>    %y = fadd float %x, -2.0
> @@ -174,7 +174,7 @@
>  
>  ; CHECK-LABEL: @add_inline_imm_4.0_f32
>  ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
> -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 4.0,
> +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 4.0{{$}}
>  ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
>  define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) {
>    %y = fadd float %x, 4.0
> @@ -184,7 +184,7 @@
>  
>  ; CHECK-LABEL: @add_inline_imm_neg_4.0_f32
>  ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
> -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -4.0,
> +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -4.0{{$}}
>  ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
>  define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) {
>    %y = fadd float %x, -4.0
> Index: test/CodeGen/R600/llvm.AMDGPU.clamp.ll
> ===================================================================
> --- test/CodeGen/R600/llvm.AMDGPU.clamp.ll
> +++ test/CodeGen/R600/llvm.AMDGPU.clamp.ll
> @@ -1,4 +1,4 @@
> -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=FUNC %s
>  ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
>  
>  declare float @llvm.AMDGPU.clamp.f32(float, float, float) nounwind readnone
> @@ -6,7 +6,7 @@
>  
>  ; FUNC-LABEL: @clamp_0_1_f32
>  ; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
> -; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]], 1, 0
> +; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
>  ; SI: BUFFER_STORE_DWORD [[RESULT]]
>  ; SI: S_ENDPGM
>  
> @@ -19,7 +19,7 @@
>  
>  ; FUNC-LABEL: @clamp_0_1_amdil_legacy_f32
>  ; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
> -; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]], 1, 0
> +; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
>  ; SI: BUFFER_STORE_DWORD [[RESULT]]
>  define void @clamp_0_1_amdil_legacy_f32(float addrspace(1)* %out, float %src) nounwind {
>    %clamp = call float @llvm.AMDIL.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits