[PATCH] D86878: [AMDGPU] Fix a miscompile in add combine

Piotr Sobczak via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 11 02:40:01 PDT 2020


piotr updated this revision to Diff 291165.
piotr added a comment.

I think you are right - I moved the fix to the better place.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86878/new/

https://reviews.llvm.org/D86878

Files:
  llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
  llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll


Index: llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
@@ -0,0 +1,60 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,GFX9
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,GFX10
+
+; Test that unused lanes are masked out in the s_xor result before it is used as condition code.
+
+; CHECK-LABEL: {{^}}combine_add_zext_xor:
+
+; GFX9: s_xor_b64 [[XOR:s\[[0-9]+:[0-9]+\]]]
+; GFX9: s_and_b64 [[CC:s\[[0-9]+:[0-9]+\]]], exec, [[XOR]]
+; GFX9: s_cmp_lg_u64 [[CC]], 0
+; GFX10: s_xor_b32 [[XOR:s[0-9]+]]
+; GFX10: s_and_b32 [[CC:s[0-9]+]], exec_lo, [[XOR]]
+; GFX10: s_cmpk_lg_u32 [[CC]], 0
+; CHECK: s_addc_u32
+
+define void @combine_add_zext_xor() {
+.entry:
+  br label %.exit
+
+.exit:                                            ; preds = %10, %.entry
+  %.0311 = phi i32 [ 1050, %.entry ], [ 0, %10 ]
+  %.0 = phi i32 [ 0, %.entry ], [ %11, %10 ]
+  %.2 = phi i32 [ 0, %.entry ], [ %12, %10 ]
+  %0 = call <4 x i32> @llvm.amdgcn.image.load.1d.v4i32.i32(i32 15, i32 %.0311, <8 x i32> undef, i32 0, i32 1)
+  %.i112 = extractelement <4 x i32> %0, i32 1
+  %.i3 = extractelement <4 x i32> %0, i32 3
+  br i1 undef, label %10, label %1
+
+1:                                                ; preds = %.exit
+  %2 = or i32 0, %.i112
+  %3 = or i32 0, %2
+  %4 = icmp eq i32 %3, 0
+  %5 = or i32 %.i3, %3
+  %6 = icmp eq i32 %5, 0
+  %7 = icmp eq i32 %.i3, 1
+  %8 = and i1 %7, %4
+  %9 = or i1 %6, %8
+  br label %10
+
+10:                                               ; preds = %1, %.exit
+  %.2.0.in.in = phi i1 [ %9, %1 ], [ undef, %.exit ]
+  %.2.0.in = xor i1 %.2.0.in.in, true
+  %.2.0 = zext i1 %.2.0.in to i32
+  %11 = add i32 %.0, %.2.0
+  %12 = add i32 %.2, %.2.0
+  %13 = icmp sgt i32 %.0311, -1050
+  br i1 %13, label %.exit, label %14
+
+14:                                               ; preds = %10
+  %15 = add i32 %.2, %.2.0
+  %16 = add i32 0, %.2.0
+  %17 = add i32 %.0, %.2.0
+  %18 = or i32 %17, %16
+  %19 = or i32 %15, %18
+  %20 = or i32 undef, %19
+  ret void
+}
+
+attributes #0 = { nounwind readonly willreturn }
+declare <4 x i32> @llvm.amdgcn.image.load.1d.v4i32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
Index: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1050,6 +1050,21 @@
         {LHS, RHS, CI,
          CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
   } else {
+    // The result of a boolean operation is represented as a 32-bit/64-bit sgpr
+    // with bits set potentially even for inactive lanes, so mask them out here.
+    unsigned CondOpc = CI->getOpcode();
+    if (CondOpc == ISD::AND || CondOpc == ISD::OR || CondOpc == ISD::XOR) {
+      auto ST = static_cast<const GCNSubtarget *>(Subtarget);
+      CI = SDValue(
+          CurDAG->getMachineNode(
+              ST->isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, DL,
+              MVT::i1,
+              CurDAG->getRegister(
+                  ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, MVT::i1),
+              CI),
+          0);
+    }
+
     unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
                                                    : AMDGPU::S_SUB_CO_PSEUDO;
     CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D86878.291165.patch
Type: text/x-patch
Size: 3626 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200911/83cba208/attachment.bin>


More information about the llvm-commits mailing list