[llvm] [InstCombine] Don't use dominating conditions to transform sub into xor. (PR #88566)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 16 21:45:29 PDT 2024
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/88566
>From 0c05324f9801b1eedc712f81e6ef5b0bc7fbcc34 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Fri, 12 Apr 2024 12:35:40 -0700
Subject: [PATCH 1/3] [InstCombine] Add test case for turning sub into xor
using dominating condition. NFC
I plan to disable using dominating conditions for turning sub into
xor, but first we need that demonstrates it currently happens.
---
llvm/test/Transforms/InstCombine/sub-xor.ll | 23 +++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/sub-xor.ll b/llvm/test/Transforms/InstCombine/sub-xor.ll
index 71da73d51ae37e..b4e87d0405fc48 100644
--- a/llvm/test/Transforms/InstCombine/sub-xor.ll
+++ b/llvm/test/Transforms/InstCombine/sub-xor.ll
@@ -157,3 +157,26 @@ define <2 x i8> @xor_add_splat_undef(<2 x i8> %x) {
%add = add <2 x i8> %xor, <i8 42, i8 42>
ret <2 x i8> %add
}
+
+define i32 @xor_dominating_cond(i32 %x) {
+; CHECK-LABEL: @xor_dominating_cond(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[X:%.*]], 256
+; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[A:%.*]] = xor i32 [[X]], 255
+; CHECK-NEXT: ret i32 [[A]]
+; CHECK: if.end:
+; CHECK-NEXT: ret i32 [[X]]
+;
+entry:
+ %cond = icmp ult i32 %x, 256
+ br i1 %cond, label %if.then, label %if.end
+
+if.then:
+ %a = sub i32 255, %x
+ ret i32 %a
+
+if.end:
+ ret i32 %x
+}
>From 2e86abe8783ddb550cbccf088776891bb45a4d4a Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 16 Apr 2024 21:25:28 -0700
Subject: [PATCH 2/3] [InstCombine] Add phase ordering test for #88239. NFC
---
.../Transforms/PhaseOrdering/X86/pr88239.ll | 55 +++++++++++++++++++
1 file changed, 55 insertions(+)
create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
new file mode 100644
index 00000000000000..3afa1904fb249a
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes="default<O3>" -mcpu=skx -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(ptr noalias noundef %0, ptr noalias noundef %1) optsize {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: ptr noalias nocapture noundef readonly [[TMP0:%.*]], ptr noalias nocapture noundef writeonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: br label [[TMP4:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[TMP4]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[TMP2]] ], [ [[VEC_IND_NEXT:%.*]], [[TMP4]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[VEC_IND]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[TMP6]], <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], <8 x i64> [[TMP3]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = tail call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
+; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP10]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 8
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 256
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[MIDDLE_BLOCK:%.*]], label [[TMP4]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: ret void
+;
+ br label %3
+
+3: ; preds = %7, %2
+ %4 = phi i32 [ 0, %2 ], [ %15, %7 ]
+ %5 = icmp slt i32 %4, 256
+ br i1 %5, label %7, label %6
+
+6: ; preds = %3
+ ret void
+
+7: ; preds = %3
+ %8 = sub nsw i32 255, %4
+ %9 = zext nneg i32 %8 to i64
+ %10 = getelementptr inbounds i32, ptr %0, i64 %9
+ %11 = load i32, ptr %10, align 4
+ %12 = add nsw i32 %11, 5
+ %13 = sext i32 %4 to i64
+ %14 = getelementptr inbounds i32, ptr %1, i64 %13
+ store i32 %12, ptr %14, align 4
+ %15 = add nsw i32 %4, 1
+ br label %3
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+;.
>From bba4ceb82f63f1d46ca11b397f64c34d242463a2 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Fri, 12 Apr 2024 12:38:02 -0700
Subject: [PATCH 3/3] [InstCombine] Don't use dominating conditions to
transform sub into xor.
Other passes are unable to reverse this transform if we use dominating
conditions.
Fixes #88239.
---
llvm/include/llvm/Analysis/SimplifyQuery.h | 6 ++++++
.../Transforms/InstCombine/InstCombineAddSub.cpp | 6 ++++--
llvm/test/Transforms/InstCombine/sub-xor.ll | 4 +++-
llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll | 13 ++++++-------
4 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/llvm/include/llvm/Analysis/SimplifyQuery.h b/llvm/include/llvm/Analysis/SimplifyQuery.h
index e5e6ae0d3d8e3e..a10c0dc49fa22e 100644
--- a/llvm/include/llvm/Analysis/SimplifyQuery.h
+++ b/llvm/include/llvm/Analysis/SimplifyQuery.h
@@ -113,6 +113,12 @@ struct SimplifyQuery {
using namespace PatternMatch;
return match(V, m_Undef());
}
+
+ SimplifyQuery getWithoutDomCondCache() const {
+ SimplifyQuery Copy(*this);
+ Copy.DC = nullptr;
+ return Copy;
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 07c50d866544b3..c8734bc00c4b59 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2281,8 +2281,10 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
if (match(Op0, m_APInt(Op0C))) {
if (Op0C->isMask()) {
// Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
- // zero.
- KnownBits RHSKnown = computeKnownBits(Op1, 0, &I);
+ // zero. We don't use information from dominating conditions so this
+ // transform is easier to reverse if necessary.
+ KnownBits RHSKnown = llvm::computeKnownBits(
+ Op1, 0, SQ.getWithInstruction(&I).getWithoutDomCondCache());
if ((*Op0C | RHSKnown.Zero).isAllOnes())
return BinaryOperator::CreateXor(Op1, Op0);
}
diff --git a/llvm/test/Transforms/InstCombine/sub-xor.ll b/llvm/test/Transforms/InstCombine/sub-xor.ll
index b4e87d0405fc48..2976598e043fee 100644
--- a/llvm/test/Transforms/InstCombine/sub-xor.ll
+++ b/llvm/test/Transforms/InstCombine/sub-xor.ll
@@ -158,13 +158,15 @@ define <2 x i8> @xor_add_splat_undef(<2 x i8> %x) {
ret <2 x i8> %add
}
+; Make sure we don't convert sub to xor using dominating condition. That makes
+; it hard for other passe to reverse.
define i32 @xor_dominating_cond(i32 %x) {
; CHECK-LABEL: @xor_dominating_cond(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[X:%.*]], 256
; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
-; CHECK-NEXT: [[A:%.*]] = xor i32 [[X]], 255
+; CHECK-NEXT: [[A:%.*]] = sub nuw nsw i32 255, [[X]]
; CHECK-NEXT: ret i32 [[A]]
; CHECK: if.end:
; CHECK-NEXT: ret i32 [[X]]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
index 3afa1904fb249a..1c16b37144f1eb 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
@@ -8,19 +8,18 @@ define void @foo(ptr noalias noundef %0, ptr noalias noundef %1) optsize {
; CHECK-LABEL: define void @foo(
; CHECK-SAME: ptr noalias nocapture noundef readonly [[TMP0:%.*]], ptr noalias nocapture noundef writeonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -28
; CHECK-NEXT: br label [[TMP4:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[TMP4]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[TMP2]] ], [ [[VEC_IND_NEXT:%.*]], [[TMP4]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[VEC_IND]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
-; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[TMP6]], <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], <8 x i64> [[TMP3]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = tail call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
+; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 255, [[INDVARS_IV]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[TMP3]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = load <8 x i32>, ptr [[GEP]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP10]], align 4
+; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP10]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 256
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[MIDDLE_BLOCK:%.*]], label [[TMP4]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
More information about the llvm-commits
mailing list