[llvm] [InstCombine] Don't use dominating conditions to transform sub into xor. (PR #88566)

Wed Apr 17 11:01:35 PDT 2024

https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/88566

>From 084d6ee4fdcbad25bab692232c75d321a5f60908 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Fri, 12 Apr 2024 12:38:02 -0700
Subject: [PATCH] [InstCombine] Don't use dominating conditions to transform
 sub into xor.

Other passes are unable to reverse this transform if we use dominating
conditions.

Fixes #88239.
---
 llvm/include/llvm/Analysis/SimplifyQuery.h          |  6 ++++++
 .../Transforms/InstCombine/InstCombineAddSub.cpp    |  6 ++++--
 llvm/test/Transforms/InstCombine/sub-xor.ll         |  4 +++-
 llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll   | 13 ++++++-------
 4 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/Analysis/SimplifyQuery.h b/llvm/include/llvm/Analysis/SimplifyQuery.h
index e5e6ae0d3d8e3e..a10c0dc49fa22e 100644
--- a/llvm/include/llvm/Analysis/SimplifyQuery.h
+++ b/llvm/include/llvm/Analysis/SimplifyQuery.h
@@ -113,6 +113,12 @@ struct SimplifyQuery {
     using namespace PatternMatch;
     return match(V, m_Undef());
   }
+
+  SimplifyQuery getWithoutDomCondCache() const {
+    SimplifyQuery Copy(*this);
+    Copy.DC = nullptr;
+    return Copy;
+  }
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index c59b867b10e7d1..df7f02810ee43f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2281,8 +2281,10 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
   if (match(Op0, m_APInt(Op0C))) {
     if (Op0C->isMask()) {
       // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
-      // zero.
-      KnownBits RHSKnown = computeKnownBits(Op1, 0, &I);
+      // zero. We don't use information from dominating conditions so this
+      // transform is easier to reverse if necessary.
+      KnownBits RHSKnown = llvm::computeKnownBits(
+          Op1, 0, SQ.getWithInstruction(&I).getWithoutDomCondCache());
       if ((*Op0C | RHSKnown.Zero).isAllOnes())
         return BinaryOperator::CreateXor(Op1, Op0);
     }
diff --git a/llvm/test/Transforms/InstCombine/sub-xor.ll b/llvm/test/Transforms/InstCombine/sub-xor.ll
index b4e87d0405fc48..2976598e043fee 100644
--- a/llvm/test/Transforms/InstCombine/sub-xor.ll
+++ b/llvm/test/Transforms/InstCombine/sub-xor.ll
@@ -158,13 +158,15 @@ define <2 x i8> @xor_add_splat_undef(<2 x i8> %x) {
   ret <2 x i8> %add
 }
 
+; Make sure we don't convert sub to xor using dominating condition. That makes
+; it hard for other passe to reverse.
 define i32 @xor_dominating_cond(i32 %x) {
 ; CHECK-LABEL: @xor_dominating_cond(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[X:%.*]], 256
 ; CHECK-NEXT:    br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[A:%.*]] = xor i32 [[X]], 255
+; CHECK-NEXT:    [[A:%.*]] = sub nuw nsw i32 255, [[X]]
 ; CHECK-NEXT:    ret i32 [[A]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret i32 [[X]]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
index 3afa1904fb249a..1c16b37144f1eb 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
@@ -8,19 +8,18 @@ define void @foo(ptr noalias noundef %0, ptr noalias noundef %1) optsize {
 ; CHECK-LABEL: define void @foo(
 ; CHECK-SAME: ptr noalias nocapture noundef readonly [[TMP0:%.*]], ptr noalias nocapture noundef writeonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  vector.ph:
+; CHECK-NEXT:    [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -28
 ; CHECK-NEXT:    br label [[TMP4:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[TMP4]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[TMP2]] ], [ [[VEC_IND_NEXT:%.*]], [[TMP4]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = and <8 x i64> [[VEC_IND]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
-; CHECK-NEXT:    [[TMP3:%.*]] = xor <8 x i64> [[TMP6]], <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], <8 x i64> [[TMP3]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = tail call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
+; CHECK-NEXT:    [[TMP3:%.*]] = sub nuw nsw i64 255, [[INDVARS_IV]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[TMP3]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = load <8 x i32>, ptr [[GEP]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store <8 x i32> [[TMP5]], ptr [[TMP10]], align 4
+; CHECK-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP10]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 256
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[MIDDLE_BLOCK:%.*]], label [[TMP4]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block: