[llvm] r219819 - R600: Fix miscompiles when BFE has multiple uses
Matt Arsenault
Matthew.Arsenault at amd.com
Wed Oct 15 10:58:34 PDT 2014
Author: arsenm
Date: Wed Oct 15 12:58:34 2014
New Revision: 219819
URL: http://llvm.org/viewvc/llvm-project?rev=219819&view=rev
Log:
R600: Fix miscompiles when BFE has multiple uses
SimplifyDemandedBits would break the other uses of the operand.
Modified:
llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
Modified: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp?rev=219819&r1=219818&r2=219819&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp Wed Oct 15 12:58:34 2014
@@ -2170,13 +2170,16 @@ SDValue AMDGPUTargetLowering::PerformDAG
BitsFrom, ShiftVal);
}
- APInt KnownZero, KnownOne;
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
- TLI.SimplifyDemandedBits(BitsFrom, Demanded, KnownZero, KnownOne, TLO)) {
- DCI.CommitTargetLoweringOpt(TLO);
+ if (BitsFrom.hasOneUse()) {
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
+ TLI.SimplifyDemandedBits(BitsFrom, Demanded,
+ KnownZero, KnownOne, TLO)) {
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
}
break;
Modified: llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll?rev=219819&r1=219818&r2=219819&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll Wed Oct 15 12:58:34 2014
@@ -552,3 +552,25 @@ define void @bfe_u32_constant_fold_test_
store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
ret void
}
+
+; Make sure that SimplifyDemandedBits doesn't cause the and to be
+; reduced to the bits demanded by the bfe.
+
+; XXX: The operand to v_bfe_u32 could also just directly be the load register.
+; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
+; SI: BUFFER_LOAD_DWORD [[ARG:v[0-9]+]]
+; SI: V_AND_B32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
+; SI: V_BFE_U32 [[BFE:v[0-9]+]], [[AND]], 2, 2
+; SI-DAG: BUFFER_STORE_DWORD [[AND]]
+; SI-DAG: BUFFER_STORE_DWORD [[BFE]]
+; SI: S_ENDPGM
+define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
+ i32 addrspace(1)* %out1,
+ i32 addrspace(1)* %in) nounwind {
+ %src = load i32 addrspace(1)* %in, align 4
+ %and = and i32 %src, 63
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
+ store i32 %and, i32 addrspace(1)* %out1, align 4
+ ret void
+}
More information about the llvm-commits
mailing list