[PATCH] D86878: [AMDGPU] Fix a miscompile in add combine
Piotr Sobczak via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 11 02:40:01 PDT 2020
piotr updated this revision to Diff 291165.
piotr added a comment.
I think you are right - I moved the fix to the better place.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D86878/new/
https://reviews.llvm.org/D86878
Files:
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
Index: llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
@@ -0,0 +1,60 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,GFX9
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,GFX10
+
+; Test that unused lanes are masked out in the s_xor result before it is used as condition code.
+
+; CHECK-LABEL: {{^}}combine_add_zext_xor:
+
+; GFX9: s_xor_b64 [[XOR:s\[[0-9]+:[0-9]+\]]]
+; GFX9: s_and_b64 [[CC:s\[[0-9]+:[0-9]+\]]], exec, [[XOR]]
+; GFX9: s_cmp_lg_u64 [[CC]], 0
+; GFX10: s_xor_b32 [[XOR:s[0-9]+]]
+; GFX10: s_and_b32 [[CC:s[0-9]+]], exec_lo, [[XOR]]
+; GFX10: s_cmpk_lg_u32 [[CC]], 0
+; CHECK: s_addc_u32
+
+define void @combine_add_zext_xor() {
+.entry:
+ br label %.exit
+
+.exit: ; preds = %10, %.entry
+ %.0311 = phi i32 [ 1050, %.entry ], [ 0, %10 ]
+ %.0 = phi i32 [ 0, %.entry ], [ %11, %10 ]
+ %.2 = phi i32 [ 0, %.entry ], [ %12, %10 ]
+ %0 = call <4 x i32> @llvm.amdgcn.image.load.1d.v4i32.i32(i32 15, i32 %.0311, <8 x i32> undef, i32 0, i32 1)
+ %.i112 = extractelement <4 x i32> %0, i32 1
+ %.i3 = extractelement <4 x i32> %0, i32 3
+ br i1 undef, label %10, label %1
+
+1: ; preds = %.exit
+ %2 = or i32 0, %.i112
+ %3 = or i32 0, %2
+ %4 = icmp eq i32 %3, 0
+ %5 = or i32 %.i3, %3
+ %6 = icmp eq i32 %5, 0
+ %7 = icmp eq i32 %.i3, 1
+ %8 = and i1 %7, %4
+ %9 = or i1 %6, %8
+ br label %10
+
+10: ; preds = %1, %.exit
+ %.2.0.in.in = phi i1 [ %9, %1 ], [ undef, %.exit ]
+ %.2.0.in = xor i1 %.2.0.in.in, true
+ %.2.0 = zext i1 %.2.0.in to i32
+ %11 = add i32 %.0, %.2.0
+ %12 = add i32 %.2, %.2.0
+ %13 = icmp sgt i32 %.0311, -1050
+ br i1 %13, label %.exit, label %14
+
+14: ; preds = %10
+ %15 = add i32 %.2, %.2.0
+ %16 = add i32 0, %.2.0
+ %17 = add i32 %.0, %.2.0
+ %18 = or i32 %17, %16
+ %19 = or i32 %15, %18
+ %20 = or i32 undef, %19
+ ret void
+}
+
+attributes #0 = { nounwind readonly willreturn }
+declare <4 x i32> @llvm.amdgcn.image.load.1d.v4i32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
Index: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1050,6 +1050,21 @@
{LHS, RHS, CI,
CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
} else {
+ // The result of a boolean operation is represented as a 32-bit/64-bit sgpr
+ // with bits set potentially even for inactive lanes, so mask them out here.
+ unsigned CondOpc = CI->getOpcode();
+ if (CondOpc == ISD::AND || CondOpc == ISD::OR || CondOpc == ISD::XOR) {
+ auto ST = static_cast<const GCNSubtarget *>(Subtarget);
+ CI = SDValue(
+ CurDAG->getMachineNode(
+ ST->isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, DL,
+ MVT::i1,
+ CurDAG->getRegister(
+ ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, MVT::i1),
+ CI),
+ 0);
+ }
+
unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
: AMDGPU::S_SUB_CO_PSEUDO;
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D86878.291165.patch
Type: text/x-patch
Size: 3626 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200911/83cba208/attachment.bin>
More information about the llvm-commits
mailing list