[PATCH] R600/SI: Also enable WQM for image opcodes which calculate LOD

Tom Stellard tom at stellard.net
Thu Feb 5 08:28:53 PST 2015


On Tue, Feb 03, 2015 at 05:48:46PM +0900, Michel Dänzer wrote:
> On 02.02.2015 17:09, Michel Dänzer wrote:
> > On 30.01.2015 23:52, Tom Stellard wrote:
> >> On Fri, Jan 30, 2015 at 10:55:38AM +0900, Michel Dänzer wrote:
> >>> On 22.01.2015 00:30, Tom Stellard wrote:
> >>>> On Wed, Jan 21, 2015 at 01:07:25PM +0900, Michel Dänzer wrote:
> >>>>> From: Michel Dänzer <michel.daenzer at amd.com>
> >>>>>
> >>>>> If whole quad mode isn't enabled for these, the level of detail is
> >>>>> calculated incorrectly for pixels along diagonal triangle edges, causing
> >>>>> artifacts.
> >>>>>
> >>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88642
> >>>>> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> > 
> > [...]
> > 
> >>>> Would it be possible to avoid this switch statement by adding a
> >>>> new target flag to these tablegen definitions?
> >>>
> >>> How about the attached v2 patch?
> >>>
> >>> I'm also attaching another patch which drops enabling WQM for V_INTERP_*
> >>> instructions.
> >>
> >> Hi Michel,
> >>
> >> These both look good to me.
> > 
> > Thanks, but it occurred to me in the meantime that I should add some
> > test coverage for the changes. I'll add that and resend.
> 
> Here are the updated patches with test coverage. I added a third patch
> to ensure WQM is enabled for LDS in pixel shaders.
> 

Hi Michel,

These patches LGTM.

-Tom
> 
> -- 
> Earthling Michel Dänzer               |               http://www.amd.com
> Libre software enthusiast             |             Mesa and X developer

> From 7e831107dad07aba4e90b9706f7ea0167072a852 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
> Date: Wed, 21 Jan 2015 12:59:05 +0900
> Subject: [PATCH 1/3] R600/SI: Also enable WQM for image opcodes which
>  calculate LOD v3
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> If whole quad mode isn't enabled for these, the level of detail is
> calculated incorrectly for pixels along diagonal triangle edges, causing
> artifacts.
> 
> v2: Use a TSFlag instead of lots of switch cases
> v3: Add test coverage
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88642
> Signed-off-by: Michel D??nzer <michel.daenzer at amd.com>
> ---
>  lib/Target/R600/SIDefines.h                 |  3 +-
>  lib/Target/R600/SIInstrFormats.td           |  2 +
>  lib/Target/R600/SIInstrInfo.h               |  4 ++
>  lib/Target/R600/SIInstrInfo.td              | 60 +++++++++++++++++----------
>  lib/Target/R600/SIInstructions.td           | 64 ++++++++++++++---------------
>  lib/Target/R600/SILowerControlFlow.cpp      |  2 +-
>  test/CodeGen/R600/llvm.SI.image.sample.ll   | 20 +++++++++
>  test/CodeGen/R600/llvm.SI.image.sample.o.ll | 20 +++++++++
>  8 files changed, 119 insertions(+), 56 deletions(-)
> 
> diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h
> index 7601794..b540140 100644
> --- a/lib/Target/R600/SIDefines.h
> +++ b/lib/Target/R600/SIDefines.h
> @@ -35,7 +35,8 @@ enum {
>    SMRD = 1 << 16,
>    DS = 1 << 17,
>    MIMG = 1 << 18,
> -  FLAT = 1 << 19
> +  FLAT = 1 << 19,
> +  WQM = 1 << 20
>  };
>  }
>  
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index 913a769..16a35ff 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -38,6 +38,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
>    field bits<1> DS = 0;
>    field bits<1> MIMG = 0;
>    field bits<1> FLAT = 0;
> +  field bits<1> WQM = 0;
>  
>    // These need to be kept in sync with the enum in SIInstrFlags.
>    let TSFlags{0} = VM_CNT;
> @@ -64,6 +65,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
>    let TSFlags{17} = DS;
>    let TSFlags{18} = MIMG;
>    let TSFlags{19} = FLAT;
> +  let TSFlags{20} = WQM;
>  
>    // Most instructions require adjustments after selection to satisfy
>    // operand requirements.
> diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
> index 28cd27d..b25e35e 100644
> --- a/lib/Target/R600/SIInstrInfo.h
> +++ b/lib/Target/R600/SIInstrInfo.h
> @@ -204,6 +204,10 @@ public:
>      return get(Opcode).TSFlags & SIInstrFlags::FLAT;
>    }
>  
> +  bool isWQM(uint16_t Opcode) const {
> +    return get(Opcode).TSFlags & SIInstrFlags::WQM;
> +  }
> +
>    bool isInlineConstant(const APInt &Imm) const;
>    bool isInlineConstant(const MachineOperand &MO) const;
>    bool isLiteralConstant(const MachineOperand &MO) const;
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index e2e5bde..d10567a 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -1919,7 +1919,7 @@ multiclass MIMG_NoSampler <bits<7> op, string asm> {
>  
>  class MIMG_Sampler_Helper <bits<7> op, string asm,
>                             RegisterClass dst_rc,
> -                           RegisterClass src_rc> : MIMG <
> +                           RegisterClass src_rc, int wqm> : MIMG <
>    op,
>    (outs dst_rc:$vdata),
>    (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
> @@ -1931,33 +1931,41 @@ class MIMG_Sampler_Helper <bits<7> op, string asm,
>    let mayLoad = 1;
>    let mayStore = 0;
>    let hasPostISelHook = 1;
> +  let WQM = wqm;
>  }
>  
>  multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
>                                      RegisterClass dst_rc,
> -                                    int channels> {
> -  def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32>,
> +                                    int channels, int wqm> {
> +  def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm>,
>              MIMG_Mask<asm#"_V1", channels>;
> -  def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64>,
> +  def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm>,
>              MIMG_Mask<asm#"_V2", channels>;
> -  def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128>,
> +  def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm>,
>              MIMG_Mask<asm#"_V4", channels>;
> -  def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256>,
> +  def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm>,
>              MIMG_Mask<asm#"_V8", channels>;
> -  def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512>,
> +  def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm>,
>              MIMG_Mask<asm#"_V16", channels>;
>  }
>  
>  multiclass MIMG_Sampler <bits<7> op, string asm> {
> -  defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1>;
> -  defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2>;
> -  defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3>;
> -  defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>;
> +  defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, 0>;
> +  defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, 0>;
> +  defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, 0>;
> +  defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, 0>;
> +}
> +
> +multiclass MIMG_Sampler_WQM <bits<7> op, string asm> {
> +  defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, 1>;
> +  defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, 1>;
> +  defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, 1>;
> +  defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, 1>;
>  }
>  
>  class MIMG_Gather_Helper <bits<7> op, string asm,
>                            RegisterClass dst_rc,
> -                          RegisterClass src_rc> : MIMG <
> +                          RegisterClass src_rc, int wqm> : MIMG <
>    op,
>    (outs dst_rc:$vdata),
>    (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
> @@ -1978,28 +1986,36 @@ class MIMG_Gather_Helper <bits<7> op, string asm,
>    // Therefore, disable all code which updates DMASK by setting these two:
>    let MIMG = 0;
>    let hasPostISelHook = 0;
> +  let WQM = wqm;
>  }
>  
>  multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
>                                      RegisterClass dst_rc,
> -                                    int channels> {
> -  def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32>,
> +                                    int channels, int wqm> {
> +  def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm>,
>              MIMG_Mask<asm#"_V1", channels>;
> -  def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>,
> +  def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm>,
>              MIMG_Mask<asm#"_V2", channels>;
> -  def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128>,
> +  def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm>,
>              MIMG_Mask<asm#"_V4", channels>;
> -  def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256>,
> +  def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm>,
>              MIMG_Mask<asm#"_V8", channels>;
> -  def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512>,
> +  def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm>,
>              MIMG_Mask<asm#"_V16", channels>;
>  }
>  
>  multiclass MIMG_Gather <bits<7> op, string asm> {
> -  defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1>;
> -  defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>;
> -  defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>;
> -  defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>;
> +  defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1, 0>;
> +  defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, 0>;
> +  defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3, 0>;
> +  defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, 0>;
> +}
> +
> +multiclass MIMG_Gather_WQM <bits<7> op, string asm> {
> +  defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1, 1>;
> +  defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, 1>;
> +  defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3, 1>;
> +  defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, 1>;
>  }
>  
>  //===----------------------------------------------------------------------===//
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 7b203d6..a70701b 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1034,63 +1034,63 @@ defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
>  //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d>;
>  //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>;
>  //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>;
> -defm IMAGE_SAMPLE           : MIMG_Sampler <0x00000020, "image_sample">;
> -defm IMAGE_SAMPLE_CL        : MIMG_Sampler <0x00000021, "image_sample_cl">;
> +defm IMAGE_SAMPLE           : MIMG_Sampler_WQM <0x00000020, "image_sample">;
> +defm IMAGE_SAMPLE_CL        : MIMG_Sampler_WQM <0x00000021, "image_sample_cl">;
>  defm IMAGE_SAMPLE_D         : MIMG_Sampler <0x00000022, "image_sample_d">;
>  defm IMAGE_SAMPLE_D_CL      : MIMG_Sampler <0x00000023, "image_sample_d_cl">;
>  defm IMAGE_SAMPLE_L         : MIMG_Sampler <0x00000024, "image_sample_l">;
> -defm IMAGE_SAMPLE_B         : MIMG_Sampler <0x00000025, "image_sample_b">;
> -defm IMAGE_SAMPLE_B_CL      : MIMG_Sampler <0x00000026, "image_sample_b_cl">;
> +defm IMAGE_SAMPLE_B         : MIMG_Sampler_WQM <0x00000025, "image_sample_b">;
> +defm IMAGE_SAMPLE_B_CL      : MIMG_Sampler_WQM <0x00000026, "image_sample_b_cl">;
>  defm IMAGE_SAMPLE_LZ        : MIMG_Sampler <0x00000027, "image_sample_lz">;
> -defm IMAGE_SAMPLE_C         : MIMG_Sampler <0x00000028, "image_sample_c">;
> -defm IMAGE_SAMPLE_C_CL      : MIMG_Sampler <0x00000029, "image_sample_c_cl">;
> +defm IMAGE_SAMPLE_C         : MIMG_Sampler_WQM <0x00000028, "image_sample_c">;
> +defm IMAGE_SAMPLE_C_CL      : MIMG_Sampler_WQM <0x00000029, "image_sample_c_cl">;
>  defm IMAGE_SAMPLE_C_D       : MIMG_Sampler <0x0000002a, "image_sample_c_d">;
>  defm IMAGE_SAMPLE_C_D_CL    : MIMG_Sampler <0x0000002b, "image_sample_c_d_cl">;
>  defm IMAGE_SAMPLE_C_L       : MIMG_Sampler <0x0000002c, "image_sample_c_l">;
> -defm IMAGE_SAMPLE_C_B       : MIMG_Sampler <0x0000002d, "image_sample_c_b">;
> -defm IMAGE_SAMPLE_C_B_CL    : MIMG_Sampler <0x0000002e, "image_sample_c_b_cl">;
> +defm IMAGE_SAMPLE_C_B       : MIMG_Sampler_WQM <0x0000002d, "image_sample_c_b">;
> +defm IMAGE_SAMPLE_C_B_CL    : MIMG_Sampler_WQM <0x0000002e, "image_sample_c_b_cl">;
>  defm IMAGE_SAMPLE_C_LZ      : MIMG_Sampler <0x0000002f, "image_sample_c_lz">;
> -defm IMAGE_SAMPLE_O         : MIMG_Sampler <0x00000030, "image_sample_o">;
> -defm IMAGE_SAMPLE_CL_O      : MIMG_Sampler <0x00000031, "image_sample_cl_o">;
> +defm IMAGE_SAMPLE_O         : MIMG_Sampler_WQM <0x00000030, "image_sample_o">;
> +defm IMAGE_SAMPLE_CL_O      : MIMG_Sampler_WQM <0x00000031, "image_sample_cl_o">;
>  defm IMAGE_SAMPLE_D_O       : MIMG_Sampler <0x00000032, "image_sample_d_o">;
>  defm IMAGE_SAMPLE_D_CL_O    : MIMG_Sampler <0x00000033, "image_sample_d_cl_o">;
>  defm IMAGE_SAMPLE_L_O       : MIMG_Sampler <0x00000034, "image_sample_l_o">;
> -defm IMAGE_SAMPLE_B_O       : MIMG_Sampler <0x00000035, "image_sample_b_o">;
> -defm IMAGE_SAMPLE_B_CL_O    : MIMG_Sampler <0x00000036, "image_sample_b_cl_o">;
> +defm IMAGE_SAMPLE_B_O       : MIMG_Sampler_WQM <0x00000035, "image_sample_b_o">;
> +defm IMAGE_SAMPLE_B_CL_O    : MIMG_Sampler_WQM <0x00000036, "image_sample_b_cl_o">;
>  defm IMAGE_SAMPLE_LZ_O      : MIMG_Sampler <0x00000037, "image_sample_lz_o">;
> -defm IMAGE_SAMPLE_C_O       : MIMG_Sampler <0x00000038, "image_sample_c_o">;
> -defm IMAGE_SAMPLE_C_CL_O    : MIMG_Sampler <0x00000039, "image_sample_c_cl_o">;
> +defm IMAGE_SAMPLE_C_O       : MIMG_Sampler_WQM <0x00000038, "image_sample_c_o">;
> +defm IMAGE_SAMPLE_C_CL_O    : MIMG_Sampler_WQM <0x00000039, "image_sample_c_cl_o">;
>  defm IMAGE_SAMPLE_C_D_O     : MIMG_Sampler <0x0000003a, "image_sample_c_d_o">;
>  defm IMAGE_SAMPLE_C_D_CL_O  : MIMG_Sampler <0x0000003b, "image_sample_c_d_cl_o">;
>  defm IMAGE_SAMPLE_C_L_O     : MIMG_Sampler <0x0000003c, "image_sample_c_l_o">;
> -defm IMAGE_SAMPLE_C_B_O     : MIMG_Sampler <0x0000003d, "image_sample_c_b_o">;
> -defm IMAGE_SAMPLE_C_B_CL_O  : MIMG_Sampler <0x0000003e, "image_sample_c_b_cl_o">;
> +defm IMAGE_SAMPLE_C_B_O     : MIMG_Sampler_WQM <0x0000003d, "image_sample_c_b_o">;
> +defm IMAGE_SAMPLE_C_B_CL_O  : MIMG_Sampler_WQM <0x0000003e, "image_sample_c_b_cl_o">;
>  defm IMAGE_SAMPLE_C_LZ_O    : MIMG_Sampler <0x0000003f, "image_sample_c_lz_o">;
> -defm IMAGE_GATHER4          : MIMG_Gather <0x00000040, "image_gather4">;
> -defm IMAGE_GATHER4_CL       : MIMG_Gather <0x00000041, "image_gather4_cl">;
> +defm IMAGE_GATHER4          : MIMG_Gather_WQM <0x00000040, "image_gather4">;
> +defm IMAGE_GATHER4_CL       : MIMG_Gather_WQM <0x00000041, "image_gather4_cl">;
>  defm IMAGE_GATHER4_L        : MIMG_Gather <0x00000044, "image_gather4_l">;
> -defm IMAGE_GATHER4_B        : MIMG_Gather <0x00000045, "image_gather4_b">;
> -defm IMAGE_GATHER4_B_CL     : MIMG_Gather <0x00000046, "image_gather4_b_cl">;
> +defm IMAGE_GATHER4_B        : MIMG_Gather_WQM <0x00000045, "image_gather4_b">;
> +defm IMAGE_GATHER4_B_CL     : MIMG_Gather_WQM <0x00000046, "image_gather4_b_cl">;
>  defm IMAGE_GATHER4_LZ       : MIMG_Gather <0x00000047, "image_gather4_lz">;
> -defm IMAGE_GATHER4_C        : MIMG_Gather <0x00000048, "image_gather4_c">;
> -defm IMAGE_GATHER4_C_CL     : MIMG_Gather <0x00000049, "image_gather4_c_cl">;
> +defm IMAGE_GATHER4_C        : MIMG_Gather_WQM <0x00000048, "image_gather4_c">;
> +defm IMAGE_GATHER4_C_CL     : MIMG_Gather_WQM <0x00000049, "image_gather4_c_cl">;
>  defm IMAGE_GATHER4_C_L      : MIMG_Gather <0x0000004c, "image_gather4_c_l">;
> -defm IMAGE_GATHER4_C_B      : MIMG_Gather <0x0000004d, "image_gather4_c_b">;
> -defm IMAGE_GATHER4_C_B_CL   : MIMG_Gather <0x0000004e, "image_gather4_c_b_cl">;
> +defm IMAGE_GATHER4_C_B      : MIMG_Gather_WQM <0x0000004d, "image_gather4_c_b">;
> +defm IMAGE_GATHER4_C_B_CL   : MIMG_Gather_WQM <0x0000004e, "image_gather4_c_b_cl">;
>  defm IMAGE_GATHER4_C_LZ     : MIMG_Gather <0x0000004f, "image_gather4_c_lz">;
> -defm IMAGE_GATHER4_O        : MIMG_Gather <0x00000050, "image_gather4_o">;
> -defm IMAGE_GATHER4_CL_O     : MIMG_Gather <0x00000051, "image_gather4_cl_o">;
> +defm IMAGE_GATHER4_O        : MIMG_Gather_WQM <0x00000050, "image_gather4_o">;
> +defm IMAGE_GATHER4_CL_O     : MIMG_Gather_WQM <0x00000051, "image_gather4_cl_o">;
>  defm IMAGE_GATHER4_L_O      : MIMG_Gather <0x00000054, "image_gather4_l_o">;
> -defm IMAGE_GATHER4_B_O      : MIMG_Gather <0x00000055, "image_gather4_b_o">;
> +defm IMAGE_GATHER4_B_O      : MIMG_Gather_WQM <0x00000055, "image_gather4_b_o">;
>  defm IMAGE_GATHER4_B_CL_O   : MIMG_Gather <0x00000056, "image_gather4_b_cl_o">;
>  defm IMAGE_GATHER4_LZ_O     : MIMG_Gather <0x00000057, "image_gather4_lz_o">;
> -defm IMAGE_GATHER4_C_O      : MIMG_Gather <0x00000058, "image_gather4_c_o">;
> -defm IMAGE_GATHER4_C_CL_O   : MIMG_Gather <0x00000059, "image_gather4_c_cl_o">;
> +defm IMAGE_GATHER4_C_O      : MIMG_Gather_WQM <0x00000058, "image_gather4_c_o">;
> +defm IMAGE_GATHER4_C_CL_O   : MIMG_Gather_WQM <0x00000059, "image_gather4_c_cl_o">;
>  defm IMAGE_GATHER4_C_L_O    : MIMG_Gather <0x0000005c, "image_gather4_c_l_o">;
> -defm IMAGE_GATHER4_C_B_O    : MIMG_Gather <0x0000005d, "image_gather4_c_b_o">;
> -defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "image_gather4_c_b_cl_o">;
> +defm IMAGE_GATHER4_C_B_O    : MIMG_Gather_WQM <0x0000005d, "image_gather4_c_b_o">;
> +defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, "image_gather4_c_b_cl_o">;
>  defm IMAGE_GATHER4_C_LZ_O   : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
> -defm IMAGE_GET_LOD          : MIMG_Sampler <0x00000060, "image_get_lod">;
> +defm IMAGE_GET_LOD          : MIMG_Sampler_WQM <0x00000060, "image_get_lod">;
>  defm IMAGE_SAMPLE_CD        : MIMG_Sampler <0x00000068, "image_sample_cd">;
>  defm IMAGE_SAMPLE_CD_CL     : MIMG_Sampler <0x00000069, "image_sample_cd_cl">;
>  defm IMAGE_SAMPLE_C_CD      : MIMG_Sampler <0x0000006a, "image_sample_c_cd">;
> diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
> index 068b22f..f014f2e 100644
> --- a/lib/Target/R600/SILowerControlFlow.cpp
> +++ b/lib/Target/R600/SILowerControlFlow.cpp
> @@ -447,7 +447,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
>        Next = std::next(I);
>  
>        MachineInstr &MI = *I;
> -      if (TII->isDS(MI.getOpcode()))
> +      if (TII->isWQM(MI.getOpcode()) || TII->isDS(MI.getOpcode()))
>          NeedWQM = true;
>  
>        // Flat uses m0 in case it needs to access LDS.
> diff --git a/test/CodeGen/R600/llvm.SI.image.sample.ll b/test/CodeGen/R600/llvm.SI.image.sample.ll
> index 61e2b6d..4bc638a 100644
> --- a/test/CodeGen/R600/llvm.SI.image.sample.ll
> +++ b/test/CodeGen/R600/llvm.SI.image.sample.ll
> @@ -2,6 +2,7 @@
>  ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
>  
>  ;CHECK-LABEL: {{^}}sample:
> +;CHECK: s_wqm
>  ;CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample() #0 {
>  main_body:
> @@ -15,6 +16,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_cl:
> +;CHECK: s_wqm
>  ;CHECK: image_sample_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_cl() #0 {
>  main_body:
> @@ -28,6 +30,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_d:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_d() #0 {
>  main_body:
> @@ -41,6 +44,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_d_cl:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_d_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_d_cl() #0 {
>  main_body:
> @@ -54,6 +58,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_l:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_l {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_l() #0 {
>  main_body:
> @@ -67,6 +72,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_b:
> +;CHECK: s_wqm
>  ;CHECK: image_sample_b {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_b() #0 {
>  main_body:
> @@ -80,6 +86,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_b_cl:
> +;CHECK: s_wqm
>  ;CHECK: image_sample_b_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_b_cl() #0 {
>  main_body:
> @@ -93,6 +100,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_lz:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_lz {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_lz() #0 {
>  main_body:
> @@ -106,6 +114,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_cd:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_cd {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_cd() #0 {
>  main_body:
> @@ -119,6 +128,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_cd_cl:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_cd_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_cd_cl() #0 {
>  main_body:
> @@ -132,6 +142,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c:
> +;CHECK: s_wqm
>  ;CHECK: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c() #0 {
>  main_body:
> @@ -145,6 +156,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_cl:
> +;CHECK: s_wqm
>  ;CHECK: image_sample_c_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_cl() #0 {
>  main_body:
> @@ -158,6 +170,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_d:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_d() #0 {
>  main_body:
> @@ -171,6 +184,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_d_cl:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_c_d_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_d_cl() #0 {
>  main_body:
> @@ -184,6 +198,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_l:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_c_l {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_l() #0 {
>  main_body:
> @@ -197,6 +212,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_b:
> +;CHECK: s_wqm
>  ;CHECK: image_sample_c_b {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_b() #0 {
>  main_body:
> @@ -210,6 +226,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_b_cl:
> +;CHECK: s_wqm
>  ;CHECK: image_sample_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_b_cl() #0 {
>  main_body:
> @@ -223,6 +240,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_lz:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_c_lz {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_lz() #0 {
>  main_body:
> @@ -236,6 +254,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_cd:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_c_cd {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_cd() #0 {
>  main_body:
> @@ -249,6 +268,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_cd_cl:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_c_cd_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_cd_cl() #0 {
>  main_body:
> diff --git a/test/CodeGen/R600/llvm.SI.image.sample.o.ll b/test/CodeGen/R600/llvm.SI.image.sample.o.ll
> index 6d9aa5e..9d89354 100644
> --- a/test/CodeGen/R600/llvm.SI.image.sample.o.ll
> +++ b/test/CodeGen/R600/llvm.SI.image.sample.o.ll
> @@ -2,6 +2,7 @@
>  ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
>  
>  ;CHECK-LABEL: {{^}}sample:
> +;CHECK: s_wqm
>  ;CHECK: image_sample_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample() #0 {
>  main_body:
> @@ -15,6 +16,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_cl:
> +;CHECK: s_wqm
>  ;CHECK: image_sample_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_cl() #0 {
>  main_body:
> @@ -28,6 +30,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_d:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_d_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_d() #0 {
>  main_body:
> @@ -41,6 +44,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_d_cl:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_d_cl() #0 {
>  main_body:
> @@ -54,6 +58,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_l:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_l_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_l() #0 {
>  main_body:
> @@ -67,6 +72,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_b:
> +;CHECK: s_wqm
>  ;CHECK: image_sample_b_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_b() #0 {
>  main_body:
> @@ -80,6 +86,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_b_cl:
> +;CHECK: s_wqm
>  ;CHECK: image_sample_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_b_cl() #0 {
>  main_body:
> @@ -93,6 +100,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_lz:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_lz_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_lz() #0 {
>  main_body:
> @@ -106,6 +114,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_cd:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_cd_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_cd() #0 {
>  main_body:
> @@ -119,6 +128,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_cd_cl:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_cd_cl() #0 {
>  main_body:
> @@ -132,6 +142,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c:
> +;CHECK: s_wqm
>  ;CHECK: image_sample_c_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c() #0 {
>  main_body:
> @@ -145,6 +156,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_cl:
> +;CHECK: s_wqm
>  ;CHECK: image_sample_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_cl() #0 {
>  main_body:
> @@ -158,6 +170,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_d:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_c_d_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_d() #0 {
>  main_body:
> @@ -171,6 +184,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_d_cl:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_c_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_d_cl() #0 {
>  main_body:
> @@ -184,6 +198,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_l:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_c_l_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_l() #0 {
>  main_body:
> @@ -197,6 +212,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_b:
> +;CHECK: s_wqm
>  ;CHECK: image_sample_c_b_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_b() #0 {
>  main_body:
> @@ -210,6 +226,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_b_cl:
> +;CHECK: s_wqm
>  ;CHECK: image_sample_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_b_cl() #0 {
>  main_body:
> @@ -223,6 +240,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_lz:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_lz() #0 {
>  main_body:
> @@ -236,6 +254,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_cd:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_c_cd_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_cd() #0 {
>  main_body:
> @@ -249,6 +268,7 @@ main_body:
>  }
>  
>  ;CHECK-LABEL: {{^}}sample_c_cd_cl:
> +;CHECK-NOT: s_wqm
>  ;CHECK: image_sample_c_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
>  define void @sample_c_cd_cl() #0 {
>  main_body:
> -- 
> 2.1.4
> 

> From 5f5611d3dad439765bb5ff22f66c0ba277655578 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
> Date: Thu, 29 Jan 2015 19:18:34 +0900
> Subject: [PATCH 2/3] R600/SI: Don't enable WQM for V_INTERP_* instructions v2
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> Doesn't seem necessary anymore. I think this was mostly compensating for
> not enabling WQM for texture sampling instructions.
> 
> v2: Add test coverage
> Signed-off-by: Michel D??nzer <michel.daenzer at amd.com>
> ---
>  lib/Target/R600/SILowerControlFlow.cpp          |  6 -----
>  test/CodeGen/R600/llvm.SI.fs.interp.constant.ll | 22 ------------------
>  test/CodeGen/R600/llvm.SI.fs.interp.ll          | 30 +++++++++++++++++++++++++
>  3 files changed, 30 insertions(+), 28 deletions(-)
>  delete mode 100644 test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
>  create mode 100644 test/CodeGen/R600/llvm.SI.fs.interp.ll
> 
> diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
> index f014f2e..2e08c9f 100644
> --- a/lib/Target/R600/SILowerControlFlow.cpp
> +++ b/lib/Target/R600/SILowerControlFlow.cpp
> @@ -513,12 +513,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
>          case AMDGPU::SI_INDIRECT_DST_V16:
>            IndirectDst(MI);
>            break;
> -
> -        case AMDGPU::V_INTERP_P1_F32:
> -        case AMDGPU::V_INTERP_P2_F32:
> -        case AMDGPU::V_INTERP_MOV_F32:
> -          NeedWQM = true;
> -          break;
>        }
>      }
>    }
> diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
> deleted file mode 100644
> index 409d46d..0000000
> --- a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
> +++ /dev/null
> @@ -1,22 +0,0 @@
> -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
> -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
> -
> -;CHECK: s_mov_b32
> -;CHECK-NEXT: v_interp_mov_f32
> -
> -define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
> -main_body:
> -  %4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
> -  %5 = call i32 @llvm.SI.packf16(float %4, float %4)
> -  %6 = bitcast i32 %5 to float
> -  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %6, float %6, float %6, float %6)
> -  ret void
> -}
> -
> -declare void @llvm.AMDGPU.shader.type(i32)
> -
> -declare float @llvm.SI.fs.constant(i32, i32, i32) readnone
> -
> -declare i32 @llvm.SI.packf16(float, float) readnone
> -
> -declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
> diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.ll b/test/CodeGen/R600/llvm.SI.fs.interp.ll
> new file mode 100644
> index 0000000..6b36140
> --- /dev/null
> +++ b/test/CodeGen/R600/llvm.SI.fs.interp.ll
> @@ -0,0 +1,30 @@
> +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
> +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
> +
> +;CHECK-NOT: s_wqm
> +;CHECK: s_mov_b32
> +;CHECK-NEXT: v_interp_mov_f32
> +;CHECK: v_interp_p1_f32
> +;CHECK: v_interp_p2_f32
> +
> +define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) #0 {
> +main_body:
> +  %5 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
> +  %6 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %4)
> +  %7 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %4)
> +  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %5, float %6, float %7, float %7)
> +  ret void
> +}
> +
> +declare void @llvm.AMDGPU.shader.type(i32)
> +
> +; Function Attrs: nounwind readnone
> +declare float @llvm.SI.fs.constant(i32, i32, i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
> +
> +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
> +
> +attributes #0 = { "ShaderType"="0" }
> +attributes #1 = { nounwind readnone }
> -- 
> 2.1.4
> 

> From a16635ee1186c3727f69e8c2e1b6b942c4473e5b Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
> Date: Tue, 3 Feb 2015 17:32:45 +0900
> Subject: [PATCH 3/3] R600/SI: Amend a test to ensure WQM is enabled for LDS in
>  pixel shaders
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> Signed-off-by: Michel D??nzer <michel.daenzer at amd.com>
> ---
>  test/CodeGen/R600/si-sgpr-spill.ll | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll
> index ef3e779..18fda20 100644
> --- a/test/CodeGen/R600/si-sgpr-spill.ll
> +++ b/test/CodeGen/R600/si-sgpr-spill.ll
> @@ -5,6 +5,7 @@
>  ; SGPRs.
>  
>  ; CHECK-LABEL: {{^}}main:
> +; CHECK: s_wqm
>  ; Writing to M0 from an SMRD instruction will hang the GPU.
>  ; CHECK-NOT: s_buffer_load_dword m0
>  ; CHECK: s_endpgm
> -- 
> 2.1.4
> 





More information about the llvm-commits mailing list