[llvm] r339190 - AMDGPU: Remove broken i16 ternary patterns

Hans Wennborg via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 8 04:35:50 PDT 2018


Merged to 7.0 in r339235.

On Tue, Aug 7, 2018 at 11:54 PM, Jan Vesely via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: jvesely
> Date: Tue Aug  7 14:54:37 2018
> New Revision: 339190
>
> URL: http://llvm.org/viewvc/llvm-project?rev=339190&view=rev
> Log:
> AMDGPU: Remove broken i16 ternary patterns
>
> Fixup test to check for GCN prefix
> These patterns always zero extend the result even though it might need sign extension.
> This has been broken since the addition of i16 support.
> It has popped up in mad_sat(char) test since min(max()) combination is turned into v_med3, resulting in the following (incorrect) sequence:
>         v_mad_i16 v2, v10, v9, v11
>         v_med3_i32 v2, v2, v8, v7
>
> Fixes mad_sat(char) piglit on VI.
>
> Differential Revision: https://reviews.llvm.org/D49836
>
> Modified:
>     llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td
>     llvm/trunk/test/CodeGen/AMDGPU/mad_uint24.ll
>
> Modified: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td?rev=339190&r1=339189&r2=339190&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td (original)
> +++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td Tue Aug  7 14:54:37 2018
> @@ -461,17 +461,6 @@ def : GCNPat <
>    (inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
>  >;
>
> -def : GCNPat<
> -  (i32 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))),
> -  (inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
> ->;
> -
> -def : GCNPat<
> -  (i64 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))),
> -   (REG_SEQUENCE VReg_64,
> -     (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)), sub0,
> -     (V_MOV_B32_e32 (i32 0)), sub1)
> ->;
>  }
>
>  defm: Ternary_i16_Pats<mul, add, V_MAD_U16, zext>;
>
> Modified: llvm/trunk/test/CodeGen/AMDGPU/mad_uint24.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mad_uint24.ll?rev=339190&r1=339189&r2=339190&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/mad_uint24.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/mad_uint24.ll Tue Aug  7 14:54:37 2018
> @@ -1,8 +1,8 @@
>  ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
>  ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
> -; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
> -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC
> -; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC
> +; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC --check-prefix=GCN
> +; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN
> +; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN
>
>  declare i32 @llvm.r600.read.tidig.x() nounwind readnone
>
> @@ -138,3 +138,90 @@ bb18:
>    store i32 %tmp16, i32 addrspace(1)* %arg
>    ret void
>  }
> +
> +; FUNC-LABEL: {{^}}i8_mad_sat_16:
> +; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
> +; The result must be sign-extended
> +; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
> +; EG: 8
> +; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
> +; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
> +; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
> +; GCN: v_med3_i32 v{{[0-9]}}, [[EXT]],
> +define amdgpu_kernel void @i8_mad_sat_16(i8 addrspace(1)* %out, i8 addrspace(1)* %in0, i8 addrspace(1)* %in1, i8 addrspace(1)* %in2, i64 addrspace(5)* %idx) {
> +entry:
> +  %retval.0.i = load i64, i64 addrspace(5)* %idx
> +  %arrayidx = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 %retval.0.i
> +  %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 %retval.0.i
> +  %arrayidx4 = getelementptr inbounds i8, i8 addrspace(1)* %in2, i64 %retval.0.i
> +  %l1 = load i8, i8 addrspace(1)* %arrayidx, align 1
> +  %l2 = load i8, i8 addrspace(1)* %arrayidx2, align 1
> +  %l3 = load i8, i8 addrspace(1)* %arrayidx4, align 1
> +  %conv1.i = sext i8 %l1 to i16
> +  %conv3.i = sext i8 %l2 to i16
> +  %conv5.i = sext i8 %l3 to i16
> +  %mul.i.i.i = mul nsw i16 %conv3.i, %conv1.i
> +  %add.i.i = add i16 %mul.i.i.i, %conv5.i
> +  %c4 = icmp sgt i16 %add.i.i, -128
> +  %cond.i.i = select i1 %c4, i16 %add.i.i, i16 -128
> +  %c5 = icmp slt i16 %cond.i.i, 127
> +  %cond13.i.i = select i1 %c5, i16 %cond.i.i, i16 127
> +  %conv8.i = trunc i16 %cond13.i.i to i8
> +  %arrayidx7 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 %retval.0.i
> +  store i8 %conv8.i, i8 addrspace(1)* %arrayidx7, align 1
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}i8_mad_32:
> +; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
> +; The result must be sign-extended
> +; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
> +; EG: 8
> +; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
> +; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
> +; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
> +define amdgpu_kernel void @i8_mad_32(i32 addrspace(1)* %out, i8 addrspace(1)* %a, i8 addrspace(1)* %b, i8 addrspace(1)* %c, i64 addrspace(5)* %idx) {
> +entry:
> +  %retval.0.i = load i64, i64 addrspace(5)* %idx
> +  %arrayidx = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 %retval.0.i
> +  %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %b, i64 %retval.0.i
> +  %arrayidx4 = getelementptr inbounds i8, i8 addrspace(1)* %c, i64 %retval.0.i
> +  %la = load i8, i8 addrspace(1)* %arrayidx, align 1
> +  %lb = load i8, i8 addrspace(1)* %arrayidx2, align 1
> +  %lc = load i8, i8 addrspace(1)* %arrayidx4, align 1
> +  %exta = sext i8 %la to i16
> +  %extb = sext i8 %lb to i16
> +  %extc = sext i8 %lc to i16
> +  %mul = mul i16 %exta, %extb
> +  %mad = add i16 %mul, %extc
> +  %mad_ext = sext i16 %mad to i32
> +  store i32 %mad_ext, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}i8_mad_64:
> +; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
> +; The result must be sign-extended
> +; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
> +; EG: 8
> +; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
> +; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
> +; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
> +define amdgpu_kernel void @i8_mad_64(i64 addrspace(1)* %out, i8 addrspace(1)* %a, i8 addrspace(1)* %b, i8 addrspace(1)* %c, i64 addrspace(5)* %idx) {
> +entry:
> +  %retval.0.i = load i64, i64 addrspace(5)* %idx
> +  %arrayidx = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 %retval.0.i
> +  %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %b, i64 %retval.0.i
> +  %arrayidx4 = getelementptr inbounds i8, i8 addrspace(1)* %c, i64 %retval.0.i
> +  %la = load i8, i8 addrspace(1)* %arrayidx, align 1
> +  %lb = load i8, i8 addrspace(1)* %arrayidx2, align 1
> +  %lc = load i8, i8 addrspace(1)* %arrayidx4, align 1
> +  %exta = sext i8 %la to i16
> +  %extb = sext i8 %lb to i16
> +  %extc = sext i8 %lc to i16
> +  %mul = mul i16 %exta, %extb
> +  %mad = add i16 %mul, %extc
> +  %mad_ext = sext i16 %mad to i64
> +  store i64 %mad_ext, i64 addrspace(1)* %out
> +  ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list