[llvm] 77394c1 - [X86] Don't attempt to fold sub(C1, xor(X, C2)) with opaque constants

Thu Mar 11 04:06:52 PST 2021

Author: Simon Pilgrim
Date: 2021-03-11T12:06:40Z
New Revision: 77394c12a48822bb7a9183408f922d90c1d563cd

URL: https://github.com/llvm/llvm-project/commit/77394c12a48822bb7a9183408f922d90c1d563cd
DIFF: https://github.com/llvm/llvm-project/commit/77394c12a48822bb7a9183408f922d90c1d563cd.diff

LOG: [X86] Don't attempt to fold sub(C1, xor(X, C2)) with opaque constants

Fixes PR49451

Added: 
    llvm/test/CodeGen/X86/pr49451.ll

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f752ba9c5ed7..ea1bc729719c 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49097,14 +49097,22 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
 
+  // TODO: Add NoOpaque handling to isConstantIntBuildVectorOrConstantInt.
+  auto IsNonOpaqueConstant = [&](SDValue Op) {
+    if (SDNode *C = DAG.isConstantIntBuildVectorOrConstantInt(Op)) {
+      if (auto *Cst = dyn_cast<ConstantSDNode>(C))
+        return !Cst->isOpaque();
+      return true;
+    }
+    return false;
+  };
+
   // X86 can't encode an immediate LHS of a sub. See if we can push the
   // negation into a preceding instruction. If the RHS of the sub is a XOR with
   // one use and a constant, invert the immediate, saving one register.
   // sub(C1, xor(X, C2)) -> add(xor(X, ~C2), C1+1)
-  if (Op1.getOpcode() == ISD::XOR &&
-      DAG.isConstantIntBuildVectorOrConstantInt(Op0) &&
-      DAG.isConstantIntBuildVectorOrConstantInt(Op1.getOperand(1)) &&
-      Op1->hasOneUse()) {
+  if (Op1.getOpcode() == ISD::XOR && IsNonOpaqueConstant(Op0) &&
+      IsNonOpaqueConstant(Op1.getOperand(1)) && Op1->hasOneUse()) {
     SDLoc DL(N);
     EVT VT = Op0.getValueType();
     SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, Op1.getOperand(0),

diff  --git a/llvm/test/CodeGen/X86/pr49451.ll b/llvm/test/CodeGen/X86/pr49451.ll
new file mode 100644
index 000000000000..396b92df3c0d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr49451.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+ at s_0 = external dso_local local_unnamed_addr global i16, align 2
+ at s_2 = external dso_local local_unnamed_addr global i16, align 2
+
+define void @func_6(i8 %uc_8, i64 %uli_10) nounwind {
+; X86-LABEL: func_6:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    # implicit-def: $si
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB0_1: # %for.body612
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    testb %dl, %dl
+; X86-NEXT:    je .LBB0_2
+; X86-NEXT:  # %bb.3: # %if.end1401
+; X86-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; X86-NEXT:    testb %dl, %dl
+; X86-NEXT:    addl %eax, %esi
+; X86-NEXT:    movw %si, s_2
+; X86-NEXT:    movw %bx, s_0
+; X86-NEXT:    incl %ecx
+; X86-NEXT:    incl %ebx
+; X86-NEXT:    cmpw $73, %cx
+; X86-NEXT:    jl .LBB0_1
+; X86-NEXT:  # %bb.4: # %for.body1703
+; X86-NEXT:  .LBB0_2: # %if.then671
+;
+; X64-LABEL: func_6:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl $23090, %eax # imm = 0x5A32
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    # implicit-def: $dx
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB0_1: # %for.body612
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    testb %cl, %cl
+; X64-NEXT:    je .LBB0_2
+; X64-NEXT:  # %bb.3: # %if.end1401
+; X64-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; X64-NEXT:    testb %cl, %cl
+; X64-NEXT:    addl %esi, %edx
+; X64-NEXT:    movw %dx, {{.*}}(%rip)
+; X64-NEXT:    leal -23090(%rax), %edi
+; X64-NEXT:    movw %di, {{.*}}(%rip)
+; X64-NEXT:    incq %rax
+; X64-NEXT:    leal -23091(%rax), %edi
+; X64-NEXT:    cmpw $73, %di
+; X64-NEXT:    jl .LBB0_1
+; X64-NEXT:  # %bb.4: # %for.body1703
+; X64-NEXT:  .LBB0_2: # %if.then671
+entry:
+  %conv649 = zext i8 %uc_8 to i64
+  %xor650 = xor i64 %conv649, 296357731680175678
+  %i = trunc i64 %uli_10 to i16
+  br label %for.body612
+
+for.body612:                                      ; preds = %for.inc1677, %entry
+  %i1 = phi i16 [ undef, %entry ], [ %conv1532, %for.inc1677 ]
+  %i2 = phi i16 [ -1, %entry ], [ %add1679, %for.inc1677 ]
+  br label %if.then635
+
+if.then635:                                       ; preds = %for.body612
+  %conv653 = sext i16 %i2 to i64
+  %cmp654.not = icmp eq i64 %xor650, %conv653
+  %conv653.op = xor i64 %conv653, 296357731680175678
+  %tobool670.not = icmp eq i64 undef, 0
+  br i1 %tobool670.not, label %if.end1401, label %if.then671
+
+if.then671:                                       ; preds = %if.then635
+  %cmp830 = icmp sgt i16 %i1, 21
+  unreachable
+
+if.end1401:                                       ; preds = %if.then635
+  %conv1421 = sext i16 %i2 to i32
+  %or1422 = or i32 %conv1421, undef
+  br label %if.end1510
+
+if.end1510:                                       ; preds = %if.end1401
+  br i1 undef, label %cond.false1514, label %cond.end1528
+
+cond.false1514:                                   ; preds = %if.end1510
+  %conv1525 = sext i16 %i2 to i64
+  %add1526 = add nsw i64 %conv1525, 23091
+  br label %cond.end1528
+
+cond.end1528:                                     ; preds = %cond.false1514, %if.end1510
+  %cond1529 = phi i64 [ %add1526, %cond.false1514 ], [ undef, %if.end1510 ]
+  %conv1532 = add i16 %i1, %i
+  store i16 %conv1532, i16* @s_2, align 2
+  br label %for.inc1677
+
+for.inc1677:                                      ; preds = %cond.end1528
+  %add1679 = add i16 %i2, 1
+  store i16 %add1679, i16* @s_0, align 2
+  %cmp610 = icmp slt i16 %add1679, 73
+  br i1 %cmp610, label %for.body612, label %for.body1703
+
+for.body1703:                                     ; preds = %for.inc1677
+  unreachable
+}