[PATCH] R600/SI: Also enable WQM for image opcodes which calculate LOD
Tom Stellard
tom at stellard.net
Thu Feb 5 08:28:53 PST 2015
On Tue, Feb 03, 2015 at 05:48:46PM +0900, Michel Dänzer wrote:
> On 02.02.2015 17:09, Michel Dänzer wrote:
> > On 30.01.2015 23:52, Tom Stellard wrote:
> >> On Fri, Jan 30, 2015 at 10:55:38AM +0900, Michel Dänzer wrote:
> >>> On 22.01.2015 00:30, Tom Stellard wrote:
> >>>> On Wed, Jan 21, 2015 at 01:07:25PM +0900, Michel Dänzer wrote:
> >>>>> From: Michel Dänzer <michel.daenzer at amd.com>
> >>>>>
> >>>>> If whole quad mode isn't enabled for these, the level of detail is
> >>>>> calculated incorrectly for pixels along diagonal triangle edges, causing
> >>>>> artifacts.
> >>>>>
> >>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88642
> >>>>> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> >
> > [...]
> >
> >>>> Would it be possible to avoid this switch statement by adding a
> >>>> new target flag to these tablegen definitions?
> >>>
> >>> How about the attached v2 patch?
> >>>
> >>> I'm also attaching another patch which drops enabling WQM for V_INTERP_*
> >>> instructions.
> >>
> >> Hi Michel,
> >>
> >> These both look good to me.
> >
> > Thanks, but it occurred to me in the meantime that I should add some
> > test coverage for the changes. I'll add that and resend.
>
> Here are the updated patches with test coverage. I added a third patch
> to ensure WQM is enabled for LDS in pixel shaders.
>
Hi Michel,
These patches LGTM.
-Tom
>
> --
> Earthling Michel Dänzer | http://www.amd.com
> Libre software enthusiast | Mesa and X developer
> From 7e831107dad07aba4e90b9706f7ea0167072a852 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
> Date: Wed, 21 Jan 2015 12:59:05 +0900
> Subject: [PATCH 1/3] R600/SI: Also enable WQM for image opcodes which
> calculate LOD v3
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> If whole quad mode isn't enabled for these, the level of detail is
> calculated incorrectly for pixels along diagonal triangle edges, causing
> artifacts.
>
> v2: Use a TSFlag instead of lots of switch cases
> v3: Add test coverage
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88642
> Signed-off-by: Michel D??nzer <michel.daenzer at amd.com>
> ---
> lib/Target/R600/SIDefines.h | 3 +-
> lib/Target/R600/SIInstrFormats.td | 2 +
> lib/Target/R600/SIInstrInfo.h | 4 ++
> lib/Target/R600/SIInstrInfo.td | 60 +++++++++++++++++----------
> lib/Target/R600/SIInstructions.td | 64 ++++++++++++++---------------
> lib/Target/R600/SILowerControlFlow.cpp | 2 +-
> test/CodeGen/R600/llvm.SI.image.sample.ll | 20 +++++++++
> test/CodeGen/R600/llvm.SI.image.sample.o.ll | 20 +++++++++
> 8 files changed, 119 insertions(+), 56 deletions(-)
>
> diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h
> index 7601794..b540140 100644
> --- a/lib/Target/R600/SIDefines.h
> +++ b/lib/Target/R600/SIDefines.h
> @@ -35,7 +35,8 @@ enum {
> SMRD = 1 << 16,
> DS = 1 << 17,
> MIMG = 1 << 18,
> - FLAT = 1 << 19
> + FLAT = 1 << 19,
> + WQM = 1 << 20
> };
> }
>
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index 913a769..16a35ff 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -38,6 +38,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
> field bits<1> DS = 0;
> field bits<1> MIMG = 0;
> field bits<1> FLAT = 0;
> + field bits<1> WQM = 0;
>
> // These need to be kept in sync with the enum in SIInstrFlags.
> let TSFlags{0} = VM_CNT;
> @@ -64,6 +65,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
> let TSFlags{17} = DS;
> let TSFlags{18} = MIMG;
> let TSFlags{19} = FLAT;
> + let TSFlags{20} = WQM;
>
> // Most instructions require adjustments after selection to satisfy
> // operand requirements.
> diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
> index 28cd27d..b25e35e 100644
> --- a/lib/Target/R600/SIInstrInfo.h
> +++ b/lib/Target/R600/SIInstrInfo.h
> @@ -204,6 +204,10 @@ public:
> return get(Opcode).TSFlags & SIInstrFlags::FLAT;
> }
>
> + bool isWQM(uint16_t Opcode) const {
> + return get(Opcode).TSFlags & SIInstrFlags::WQM;
> + }
> +
> bool isInlineConstant(const APInt &Imm) const;
> bool isInlineConstant(const MachineOperand &MO) const;
> bool isLiteralConstant(const MachineOperand &MO) const;
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index e2e5bde..d10567a 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -1919,7 +1919,7 @@ multiclass MIMG_NoSampler <bits<7> op, string asm> {
>
> class MIMG_Sampler_Helper <bits<7> op, string asm,
> RegisterClass dst_rc,
> - RegisterClass src_rc> : MIMG <
> + RegisterClass src_rc, int wqm> : MIMG <
> op,
> (outs dst_rc:$vdata),
> (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
> @@ -1931,33 +1931,41 @@ class MIMG_Sampler_Helper <bits<7> op, string asm,
> let mayLoad = 1;
> let mayStore = 0;
> let hasPostISelHook = 1;
> + let WQM = wqm;
> }
>
> multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
> RegisterClass dst_rc,
> - int channels> {
> - def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32>,
> + int channels, int wqm> {
> + def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm>,
> MIMG_Mask<asm#"_V1", channels>;
> - def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64>,
> + def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm>,
> MIMG_Mask<asm#"_V2", channels>;
> - def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128>,
> + def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm>,
> MIMG_Mask<asm#"_V4", channels>;
> - def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256>,
> + def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm>,
> MIMG_Mask<asm#"_V8", channels>;
> - def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512>,
> + def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm>,
> MIMG_Mask<asm#"_V16", channels>;
> }
>
> multiclass MIMG_Sampler <bits<7> op, string asm> {
> - defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1>;
> - defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2>;
> - defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3>;
> - defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>;
> + defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, 0>;
> + defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, 0>;
> + defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, 0>;
> + defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, 0>;
> +}
> +
> +multiclass MIMG_Sampler_WQM <bits<7> op, string asm> {
> + defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, 1>;
> + defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, 1>;
> + defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, 1>;
> + defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, 1>;
> }
>
> class MIMG_Gather_Helper <bits<7> op, string asm,
> RegisterClass dst_rc,
> - RegisterClass src_rc> : MIMG <
> + RegisterClass src_rc, int wqm> : MIMG <
> op,
> (outs dst_rc:$vdata),
> (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
> @@ -1978,28 +1986,36 @@ class MIMG_Gather_Helper <bits<7> op, string asm,
> // Therefore, disable all code which updates DMASK by setting these two:
> let MIMG = 0;
> let hasPostISelHook = 0;
> + let WQM = wqm;
> }
>
> multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
> RegisterClass dst_rc,
> - int channels> {
> - def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32>,
> + int channels, int wqm> {
> + def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm>,
> MIMG_Mask<asm#"_V1", channels>;
> - def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>,
> + def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm>,
> MIMG_Mask<asm#"_V2", channels>;
> - def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128>,
> + def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm>,
> MIMG_Mask<asm#"_V4", channels>;
> - def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256>,
> + def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm>,
> MIMG_Mask<asm#"_V8", channels>;
> - def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512>,
> + def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm>,
> MIMG_Mask<asm#"_V16", channels>;
> }
>
> multiclass MIMG_Gather <bits<7> op, string asm> {
> - defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1>;
> - defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>;
> - defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>;
> - defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>;
> + defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1, 0>;
> + defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, 0>;
> + defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3, 0>;
> + defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, 0>;
> +}
> +
> +multiclass MIMG_Gather_WQM <bits<7> op, string asm> {
> + defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1, 1>;
> + defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, 1>;
> + defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3, 1>;
> + defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, 1>;
> }
>
> //===----------------------------------------------------------------------===//
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 7b203d6..a70701b 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1034,63 +1034,63 @@ defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
> //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d>;
> //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>;
> //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>;
> -defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "image_sample">;
> -defm IMAGE_SAMPLE_CL : MIMG_Sampler <0x00000021, "image_sample_cl">;
> +defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, "image_sample">;
> +defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, "image_sample_cl">;
> defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "image_sample_d">;
> defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, "image_sample_d_cl">;
> defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "image_sample_l">;
> -defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "image_sample_b">;
> -defm IMAGE_SAMPLE_B_CL : MIMG_Sampler <0x00000026, "image_sample_b_cl">;
> +defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM <0x00000025, "image_sample_b">;
> +defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM <0x00000026, "image_sample_b_cl">;
> defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, "image_sample_lz">;
> -defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "image_sample_c">;
> -defm IMAGE_SAMPLE_C_CL : MIMG_Sampler <0x00000029, "image_sample_c_cl">;
> +defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM <0x00000028, "image_sample_c">;
> +defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM <0x00000029, "image_sample_c_cl">;
> defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "image_sample_c_d">;
> defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, "image_sample_c_d_cl">;
> defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "image_sample_c_l">;
> -defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "image_sample_c_b">;
> -defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler <0x0000002e, "image_sample_c_b_cl">;
> +defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM <0x0000002d, "image_sample_c_b">;
> +defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM <0x0000002e, "image_sample_c_b_cl">;
> defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, "image_sample_c_lz">;
> -defm IMAGE_SAMPLE_O : MIMG_Sampler <0x00000030, "image_sample_o">;
> -defm IMAGE_SAMPLE_CL_O : MIMG_Sampler <0x00000031, "image_sample_cl_o">;
> +defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM <0x00000030, "image_sample_o">;
> +defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM <0x00000031, "image_sample_cl_o">;
> defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, "image_sample_d_o">;
> defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, "image_sample_d_cl_o">;
> defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, "image_sample_l_o">;
> -defm IMAGE_SAMPLE_B_O : MIMG_Sampler <0x00000035, "image_sample_b_o">;
> -defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler <0x00000036, "image_sample_b_cl_o">;
> +defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM <0x00000035, "image_sample_b_o">;
> +defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM <0x00000036, "image_sample_b_cl_o">;
> defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, "image_sample_lz_o">;
> -defm IMAGE_SAMPLE_C_O : MIMG_Sampler <0x00000038, "image_sample_c_o">;
> -defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler <0x00000039, "image_sample_c_cl_o">;
> +defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM <0x00000038, "image_sample_c_o">;
> +defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM <0x00000039, "image_sample_c_cl_o">;
> defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, "image_sample_c_d_o">;
> defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, "image_sample_c_d_cl_o">;
> defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, "image_sample_c_l_o">;
> -defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler <0x0000003d, "image_sample_c_b_o">;
> -defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler <0x0000003e, "image_sample_c_b_cl_o">;
> +defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM <0x0000003d, "image_sample_c_b_o">;
> +defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM <0x0000003e, "image_sample_c_b_cl_o">;
> defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, "image_sample_c_lz_o">;
> -defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "image_gather4">;
> -defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "image_gather4_cl">;
> +defm IMAGE_GATHER4 : MIMG_Gather_WQM <0x00000040, "image_gather4">;
> +defm IMAGE_GATHER4_CL : MIMG_Gather_WQM <0x00000041, "image_gather4_cl">;
> defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "image_gather4_l">;
> -defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "image_gather4_b">;
> -defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "image_gather4_b_cl">;
> +defm IMAGE_GATHER4_B : MIMG_Gather_WQM <0x00000045, "image_gather4_b">;
> +defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM <0x00000046, "image_gather4_b_cl">;
> defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "image_gather4_lz">;
> -defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "image_gather4_c">;
> -defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "image_gather4_c_cl">;
> +defm IMAGE_GATHER4_C : MIMG_Gather_WQM <0x00000048, "image_gather4_c">;
> +defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM <0x00000049, "image_gather4_c_cl">;
> defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "image_gather4_c_l">;
> -defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "image_gather4_c_b">;
> -defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "image_gather4_c_b_cl">;
> +defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM <0x0000004d, "image_gather4_c_b">;
> +defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM <0x0000004e, "image_gather4_c_b_cl">;
> defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "image_gather4_c_lz">;
> -defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "image_gather4_o">;
> -defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "image_gather4_cl_o">;
> +defm IMAGE_GATHER4_O : MIMG_Gather_WQM <0x00000050, "image_gather4_o">;
> +defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM <0x00000051, "image_gather4_cl_o">;
> defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "image_gather4_l_o">;
> -defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "image_gather4_b_o">;
> +defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM <0x00000055, "image_gather4_b_o">;
> defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "image_gather4_b_cl_o">;
> defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "image_gather4_lz_o">;
> -defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "image_gather4_c_o">;
> -defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "image_gather4_c_cl_o">;
> +defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM <0x00000058, "image_gather4_c_o">;
> +defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM <0x00000059, "image_gather4_c_cl_o">;
> defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "image_gather4_c_l_o">;
> -defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "image_gather4_c_b_o">;
> -defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "image_gather4_c_b_cl_o">;
> +defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <0x0000005d, "image_gather4_c_b_o">;
> +defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, "image_gather4_c_b_cl_o">;
> defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
> -defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "image_get_lod">;
> +defm IMAGE_GET_LOD : MIMG_Sampler_WQM <0x00000060, "image_get_lod">;
> defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "image_sample_cd">;
> defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, "image_sample_cd_cl">;
> defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, "image_sample_c_cd">;
> diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
> index 068b22f..f014f2e 100644
> --- a/lib/Target/R600/SILowerControlFlow.cpp
> +++ b/lib/Target/R600/SILowerControlFlow.cpp
> @@ -447,7 +447,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
> Next = std::next(I);
>
> MachineInstr &MI = *I;
> - if (TII->isDS(MI.getOpcode()))
> + if (TII->isWQM(MI.getOpcode()) || TII->isDS(MI.getOpcode()))
> NeedWQM = true;
>
> // Flat uses m0 in case it needs to access LDS.
> diff --git a/test/CodeGen/R600/llvm.SI.image.sample.ll b/test/CodeGen/R600/llvm.SI.image.sample.ll
> index 61e2b6d..4bc638a 100644
> --- a/test/CodeGen/R600/llvm.SI.image.sample.ll
> +++ b/test/CodeGen/R600/llvm.SI.image.sample.ll
> @@ -2,6 +2,7 @@
> ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
>
> ;CHECK-LABEL: {{^}}sample:
> +;CHECK: s_wqm
> ;CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample() #0 {
> main_body:
> @@ -15,6 +16,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_cl:
> +;CHECK: s_wqm
> ;CHECK: image_sample_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_cl() #0 {
> main_body:
> @@ -28,6 +30,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_d:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_d() #0 {
> main_body:
> @@ -41,6 +44,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_d_cl:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_d_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_d_cl() #0 {
> main_body:
> @@ -54,6 +58,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_l:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_l {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_l() #0 {
> main_body:
> @@ -67,6 +72,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_b:
> +;CHECK: s_wqm
> ;CHECK: image_sample_b {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_b() #0 {
> main_body:
> @@ -80,6 +86,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_b_cl:
> +;CHECK: s_wqm
> ;CHECK: image_sample_b_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_b_cl() #0 {
> main_body:
> @@ -93,6 +100,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_lz:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_lz {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_lz() #0 {
> main_body:
> @@ -106,6 +114,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_cd:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_cd {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_cd() #0 {
> main_body:
> @@ -119,6 +128,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_cd_cl:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_cd_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_cd_cl() #0 {
> main_body:
> @@ -132,6 +142,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c:
> +;CHECK: s_wqm
> ;CHECK: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c() #0 {
> main_body:
> @@ -145,6 +156,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_cl:
> +;CHECK: s_wqm
> ;CHECK: image_sample_c_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_cl() #0 {
> main_body:
> @@ -158,6 +170,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_d:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_d() #0 {
> main_body:
> @@ -171,6 +184,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_d_cl:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_c_d_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_d_cl() #0 {
> main_body:
> @@ -184,6 +198,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_l:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_c_l {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_l() #0 {
> main_body:
> @@ -197,6 +212,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_b:
> +;CHECK: s_wqm
> ;CHECK: image_sample_c_b {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_b() #0 {
> main_body:
> @@ -210,6 +226,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_b_cl:
> +;CHECK: s_wqm
> ;CHECK: image_sample_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_b_cl() #0 {
> main_body:
> @@ -223,6 +240,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_lz:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_c_lz {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_lz() #0 {
> main_body:
> @@ -236,6 +254,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_cd:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_c_cd {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_cd() #0 {
> main_body:
> @@ -249,6 +268,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_cd_cl:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_c_cd_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_cd_cl() #0 {
> main_body:
> diff --git a/test/CodeGen/R600/llvm.SI.image.sample.o.ll b/test/CodeGen/R600/llvm.SI.image.sample.o.ll
> index 6d9aa5e..9d89354 100644
> --- a/test/CodeGen/R600/llvm.SI.image.sample.o.ll
> +++ b/test/CodeGen/R600/llvm.SI.image.sample.o.ll
> @@ -2,6 +2,7 @@
> ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
>
> ;CHECK-LABEL: {{^}}sample:
> +;CHECK: s_wqm
> ;CHECK: image_sample_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample() #0 {
> main_body:
> @@ -15,6 +16,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_cl:
> +;CHECK: s_wqm
> ;CHECK: image_sample_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_cl() #0 {
> main_body:
> @@ -28,6 +30,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_d:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_d_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_d() #0 {
> main_body:
> @@ -41,6 +44,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_d_cl:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_d_cl() #0 {
> main_body:
> @@ -54,6 +58,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_l:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_l_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_l() #0 {
> main_body:
> @@ -67,6 +72,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_b:
> +;CHECK: s_wqm
> ;CHECK: image_sample_b_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_b() #0 {
> main_body:
> @@ -80,6 +86,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_b_cl:
> +;CHECK: s_wqm
> ;CHECK: image_sample_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_b_cl() #0 {
> main_body:
> @@ -93,6 +100,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_lz:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_lz_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_lz() #0 {
> main_body:
> @@ -106,6 +114,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_cd:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_cd_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_cd() #0 {
> main_body:
> @@ -119,6 +128,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_cd_cl:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_cd_cl() #0 {
> main_body:
> @@ -132,6 +142,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c:
> +;CHECK: s_wqm
> ;CHECK: image_sample_c_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c() #0 {
> main_body:
> @@ -145,6 +156,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_cl:
> +;CHECK: s_wqm
> ;CHECK: image_sample_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_cl() #0 {
> main_body:
> @@ -158,6 +170,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_d:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_c_d_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_d() #0 {
> main_body:
> @@ -171,6 +184,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_d_cl:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_c_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_d_cl() #0 {
> main_body:
> @@ -184,6 +198,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_l:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_c_l_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_l() #0 {
> main_body:
> @@ -197,6 +212,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_b:
> +;CHECK: s_wqm
> ;CHECK: image_sample_c_b_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_b() #0 {
> main_body:
> @@ -210,6 +226,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_b_cl:
> +;CHECK: s_wqm
> ;CHECK: image_sample_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_b_cl() #0 {
> main_body:
> @@ -223,6 +240,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_lz:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_lz() #0 {
> main_body:
> @@ -236,6 +254,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_cd:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_c_cd_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_cd() #0 {
> main_body:
> @@ -249,6 +268,7 @@ main_body:
> }
>
> ;CHECK-LABEL: {{^}}sample_c_cd_cl:
> +;CHECK-NOT: s_wqm
> ;CHECK: image_sample_c_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
> define void @sample_c_cd_cl() #0 {
> main_body:
> --
> 2.1.4
>
> From 5f5611d3dad439765bb5ff22f66c0ba277655578 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
> Date: Thu, 29 Jan 2015 19:18:34 +0900
> Subject: [PATCH 2/3] R600/SI: Don't enable WQM for V_INTERP_* instructions v2
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Doesn't seem necessary anymore. I think this was mostly compensating for
> not enabling WQM for texture sampling instructions.
>
> v2: Add test coverage
> Signed-off-by: Michel D??nzer <michel.daenzer at amd.com>
> ---
> lib/Target/R600/SILowerControlFlow.cpp | 6 -----
> test/CodeGen/R600/llvm.SI.fs.interp.constant.ll | 22 ------------------
> test/CodeGen/R600/llvm.SI.fs.interp.ll | 30 +++++++++++++++++++++++++
> 3 files changed, 30 insertions(+), 28 deletions(-)
> delete mode 100644 test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
> create mode 100644 test/CodeGen/R600/llvm.SI.fs.interp.ll
>
> diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
> index f014f2e..2e08c9f 100644
> --- a/lib/Target/R600/SILowerControlFlow.cpp
> +++ b/lib/Target/R600/SILowerControlFlow.cpp
> @@ -513,12 +513,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
> case AMDGPU::SI_INDIRECT_DST_V16:
> IndirectDst(MI);
> break;
> -
> - case AMDGPU::V_INTERP_P1_F32:
> - case AMDGPU::V_INTERP_P2_F32:
> - case AMDGPU::V_INTERP_MOV_F32:
> - NeedWQM = true;
> - break;
> }
> }
> }
> diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
> deleted file mode 100644
> index 409d46d..0000000
> --- a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
> +++ /dev/null
> @@ -1,22 +0,0 @@
> -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
> -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
> -
> -;CHECK: s_mov_b32
> -;CHECK-NEXT: v_interp_mov_f32
> -
> -define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
> -main_body:
> - %4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
> - %5 = call i32 @llvm.SI.packf16(float %4, float %4)
> - %6 = bitcast i32 %5 to float
> - call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %6, float %6, float %6, float %6)
> - ret void
> -}
> -
> -declare void @llvm.AMDGPU.shader.type(i32)
> -
> -declare float @llvm.SI.fs.constant(i32, i32, i32) readnone
> -
> -declare i32 @llvm.SI.packf16(float, float) readnone
> -
> -declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
> diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.ll b/test/CodeGen/R600/llvm.SI.fs.interp.ll
> new file mode 100644
> index 0000000..6b36140
> --- /dev/null
> +++ b/test/CodeGen/R600/llvm.SI.fs.interp.ll
> @@ -0,0 +1,30 @@
> +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
> +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
> +
> +;CHECK-NOT: s_wqm
> +;CHECK: s_mov_b32
> +;CHECK-NEXT: v_interp_mov_f32
> +;CHECK: v_interp_p1_f32
> +;CHECK: v_interp_p2_f32
> +
> +define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) #0 {
> +main_body:
> + %5 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
> + %6 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %4)
> + %7 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %4)
> + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %5, float %6, float %7, float %7)
> + ret void
> +}
> +
> +declare void @llvm.AMDGPU.shader.type(i32)
> +
> +; Function Attrs: nounwind readnone
> +declare float @llvm.SI.fs.constant(i32, i32, i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
> +
> +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
> +
> +attributes #0 = { "ShaderType"="0" }
> +attributes #1 = { nounwind readnone }
> --
> 2.1.4
>
> From a16635ee1186c3727f69e8c2e1b6b942c4473e5b Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
> Date: Tue, 3 Feb 2015 17:32:45 +0900
> Subject: [PATCH 3/3] R600/SI: Amend a test to ensure WQM is enabled for LDS in
> pixel shaders
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Michel D??nzer <michel.daenzer at amd.com>
> ---
> test/CodeGen/R600/si-sgpr-spill.ll | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll
> index ef3e779..18fda20 100644
> --- a/test/CodeGen/R600/si-sgpr-spill.ll
> +++ b/test/CodeGen/R600/si-sgpr-spill.ll
> @@ -5,6 +5,7 @@
> ; SGPRs.
>
> ; CHECK-LABEL: {{^}}main:
> +; CHECK: s_wqm
> ; Writing to M0 from an SMRD instruction will hang the GPU.
> ; CHECK-NOT: s_buffer_load_dword m0
> ; CHECK: s_endpgm
> --
> 2.1.4
>
More information about the llvm-commits
mailing list