[llvm] r219819 - R600: Fix miscompiles when BFE has multiple uses

Matt Arsenault Matthew.Arsenault at amd.com
Wed Oct 15 10:58:34 PDT 2014


Author: arsenm
Date: Wed Oct 15 12:58:34 2014
New Revision: 219819

URL: http://llvm.org/viewvc/llvm-project?rev=219819&view=rev
Log:
R600: Fix miscompiles when BFE has multiple uses

SimplifyDemandedBits would break the other uses of the operand.

Modified:
    llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
    llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll

Modified: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp?rev=219819&r1=219818&r2=219819&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp Wed Oct 15 12:58:34 2014
@@ -2170,13 +2170,16 @@ SDValue AMDGPUTargetLowering::PerformDAG
                          BitsFrom, ShiftVal);
     }
 
-    APInt KnownZero, KnownOne;
-    TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
-                                          !DCI.isBeforeLegalizeOps());
-    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-    if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
-        TLI.SimplifyDemandedBits(BitsFrom, Demanded, KnownZero, KnownOne, TLO)) {
-      DCI.CommitTargetLoweringOpt(TLO);
+    if (BitsFrom.hasOneUse()) {
+      APInt KnownZero, KnownOne;
+      TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+                                            !DCI.isBeforeLegalizeOps());
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
+          TLI.SimplifyDemandedBits(BitsFrom, Demanded,
+                                   KnownZero, KnownOne, TLO)) {
+        DCI.CommitTargetLoweringOpt(TLO);
+      }
     }
 
     break;

Modified: llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll?rev=219819&r1=219818&r2=219819&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll Wed Oct 15 12:58:34 2014
@@ -552,3 +552,25 @@ define void @bfe_u32_constant_fold_test_
   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
   ret void
 }
+
+; Make sure that SimplifyDemandedBits doesn't cause the and to be
+; reduced to the bits demanded by the bfe.
+
+; XXX: The operand to v_bfe_u32 could also just directly be the load register.
+; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
+; SI: BUFFER_LOAD_DWORD [[ARG:v[0-9]+]]
+; SI: V_AND_B32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
+; SI: V_BFE_U32 [[BFE:v[0-9]+]], [[AND]], 2, 2
+; SI-DAG: BUFFER_STORE_DWORD [[AND]]
+; SI-DAG: BUFFER_STORE_DWORD [[BFE]]
+; SI: S_ENDPGM
+define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
+                                            i32 addrspace(1)* %out1,
+                                            i32 addrspace(1)* %in) nounwind {
+  %src = load i32 addrspace(1)* %in, align 4
+  %and = and i32 %src, 63
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
+  store i32 %and, i32 addrspace(1)* %out1, align 4
+  ret void
+}





More information about the llvm-commits mailing list