[PATCH] R600: Fix miscompiles when BFE has multiple uses
Tom Stellard
tom at stellard.net
Wed Oct 15 06:44:15 PDT 2014
On Wed, Oct 15, 2014 at 03:34:53AM +0000, Matt Arsenault wrote:
> SimplifyDemandedBits would break the other uses of the operand.
>
> http://reviews.llvm.org/D5790
>
> Files:
> lib/Target/R600/AMDGPUISelLowering.cpp
> test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
> Index: lib/Target/R600/AMDGPUISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.cpp
> +++ lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -2173,13 +2173,16 @@
> BitsFrom, ShiftVal);
> }
>
> - APInt KnownZero, KnownOne;
> - TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
> - !DCI.isBeforeLegalizeOps());
> - const TargetLowering &TLI = DAG.getTargetLoweringInfo();
> - if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
> - TLI.SimplifyDemandedBits(BitsFrom, Demanded, KnownZero, KnownOne, TLO)) {
> - DCI.CommitTargetLoweringOpt(TLO);
> + if (BitsFrom.hasOneUse()) {
> + APInt KnownZero, KnownOne;
> + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
> + !DCI.isBeforeLegalizeOps());
> + const TargetLowering &TLI = DAG.getTargetLoweringInfo();
> + if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
> + TLI.SimplifyDemandedBits(BitsFrom, Demanded,
> + KnownZero, KnownOne, TLO)) {
> + DCI.CommitTargetLoweringOpt(TLO);
> + }
> }
>
> break;
> Index: test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
> ===================================================================
> --- test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
> +++ test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
> @@ -552,3 +552,25 @@
> store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
> ret void
> }
> +
> +; Make sure that SimplifyDemandedBits doesn't cause the and to be
> +; reduced to the bits demanded by the bfe.
> +
> +; XXX: The operand to v_bfe_u32 could also just directly be the load register.
> +; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg
I think you need a : at the end of the function name. Otherwise, LGTM.
-Tom
> +; SI: BUFFER_LOAD_DWORD [[ARG:v[0-9]+]]
> +; SI: V_AND_B32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
> +; SI: V_BFE_U32 [[BFE:v[0-9]+]], [[AND]], 2, 2
> +; SI-DAG: BUFFER_STORE_DWORD [[AND]]
> +; SI-DAG: BUFFER_STORE_DWORD [[BFE]]
> +; SI: S_ENDPGM
> +define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
> + i32 addrspace(1)* %out1,
> + i32 addrspace(1)* %in) nounwind {
> + %src = load i32 addrspace(1)* %in, align 4
> + %and = and i32 %src, 63
> + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone
> + store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
> + store i32 %and, i32 addrspace(1)* %out1, align 4
> + ret void
> +}
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list