[llvm] 40866b7 - [DAGCombiner][X86] Fold sra (sub AddC, (shl X, N1C)), N1C --> sext (sub AddC1',(trunc X to (width - N1C)))

Sat Jul 9 11:54:05 PDT 2022

Author: Craig Topper
Date: 2022-07-09T11:53:44-07:00
New Revision: 40866b74bd422ae72ad53270d04568249ed53d69

URL: https://github.com/llvm/llvm-project/commit/40866b74bd422ae72ad53270d04568249ed53d69
DIFF: https://github.com/llvm/llvm-project/commit/40866b74bd422ae72ad53270d04568249ed53d69.diff

LOG: [DAGCombiner][X86] Fold sra (sub AddC, (shl X, N1C)), N1C --> sext (sub AddC1',(trunc X to (width - N1C)))

We already handled this case for add with a constant RHS. A
similar pattern can occur for sub with a constant left hand side.

Test cases use add and a mul representing (neg (shl X, C)) because
that's what I saw in the wild. The mul will be decomposed and then
the new transform can kick in.

Tests have not been committed, but this patch shows the changes.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D128769

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/X86/shift-combine.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 66c8741c27746..4736b9dd6aace 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9297,31 +9297,44 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
   //   sra (add (shl X, N1C), AddC), N1C -->
   //   sext (add (trunc X to (width - N1C)), AddC')
-  if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
-      N0.getOperand(0).getOpcode() == ISD::SHL &&
-      N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
-    if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
-      SDValue Shl = N0.getOperand(0);
-      // Determine what the truncate's type would be and ask the target if that
-      // is a free operation.
-      LLVMContext &Ctx = *DAG.getContext();
-      unsigned ShiftAmt = N1C->getZExtValue();
-      EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
-      if (VT.isVector())
-        TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
-
-      // TODO: The simple type check probably belongs in the default hook
-      //       implementation and/or target-specific overrides (because
-      //       non-simple types likely require masking when legalized), but that
-      //       restriction may conflict with other transforms.
-      if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
-          TLI.isTruncateFree(VT, TruncVT)) {
-        SDLoc DL(N);
-        SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
-        SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
-                             trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
-        SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
-        return DAG.getSExtOrTrunc(Add, DL, VT);
+  //   sra (sub AddC, (shl X, N1C)), N1C -->
+  //   sext (sub AddC1',(trunc X to (width - N1C)))
+  if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
+      N0.hasOneUse()) {
+    bool IsAdd = N0.getOpcode() == ISD::ADD;
+    SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
+    if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
+        Shl.hasOneUse()) {
+      // TODO: AddC does not need to be a splat.
+      if (ConstantSDNode *AddC =
+              isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
+        // Determine what the truncate's type would be and ask the target if
+        // that is a free operation.
+        LLVMContext &Ctx = *DAG.getContext();
+        unsigned ShiftAmt = N1C->getZExtValue();
+        EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
+        if (VT.isVector())
+          TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
+
+        // TODO: The simple type check probably belongs in the default hook
+        //       implementation and/or target-specific overrides (because
+        //       non-simple types likely require masking when legalized), but
+        //       that restriction may conflict with other transforms.
+        if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
+            TLI.isTruncateFree(VT, TruncVT)) {
+          SDLoc DL(N);
+          SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
+          SDValue ShiftC =
+              DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
+                                  TruncVT.getScalarSizeInBits()),
+                              DL, TruncVT);
+          SDValue Add;
+          if (IsAdd)
+            Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
+          else
+            Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
+          return DAG.getSExtOrTrunc(Add, DL, VT);
+        }
       }
     }
   }

diff  --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll
index c49176500cf68..075fbb29f4a95 100644
--- a/llvm/test/CodeGen/X86/shift-combine.ll
+++ b/llvm/test/CodeGen/X86/shift-combine.ll
@@ -419,6 +419,7 @@ if:
   unreachable
 }
 
+; The mul here is the equivalent of (neg (shl X, 32)).
 define i64 @ashr_add_neg_shl_i32(i64 %r) nounwind {
 ; X32-LABEL: ashr_add_neg_shl_i32:
 ; X32:       # %bb.0:
@@ -430,10 +431,9 @@ define i64 @ashr_add_neg_shl_i32(i64 %r) nounwind {
 ;
 ; X64-LABEL: ashr_add_neg_shl_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    shlq $32, %rdi
-; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
-; X64-NEXT:    subq %rdi, %rax
-; X64-NEXT:    sarq $32, %rax
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    cltq
 ; X64-NEXT:    retq
   %conv = mul i64 %r, -4294967296
   %sext = add i64 %conv, 4294967296
@@ -441,6 +441,7 @@ define i64 @ashr_add_neg_shl_i32(i64 %r) nounwind {
   ret i64 %conv1
 }
 
+; The mul here is the equivalent of (neg (shl X, 56)).
 define i64 @ashr_add_neg_shl_i8(i64 %r) nounwind {
 ; X32-LABEL: ashr_add_neg_shl_i8:
 ; X32:       # %bb.0:
@@ -455,10 +456,9 @@ define i64 @ashr_add_neg_shl_i8(i64 %r) nounwind {
 ;
 ; X64-LABEL: ashr_add_neg_shl_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    shlq $56, %rdi
-; X64-NEXT:    movabsq $144115188075855872, %rax # imm = 0x200000000000000
-; X64-NEXT:    subq %rdi, %rax
-; X64-NEXT:    sarq $56, %rax
+; X64-NEXT:    movb $2, %al
+; X64-NEXT:    subb %dil, %al
+; X64-NEXT:    movsbq %al, %rax
 ; X64-NEXT:    retq
   %conv = mul i64 %r, -72057594037927936
   %sext = add i64 %conv, 144115188075855872
@@ -466,42 +466,31 @@ define i64 @ashr_add_neg_shl_i8(i64 %r) nounwind {
   ret i64 %conv1
 }
 
+; The mul here is the equivalent of (neg (shl X, 24)).
 define <4 x i32> @ashr_add_neg_shl_v4i8(<4 x i32> %r) nounwind {
 ; X32-LABEL: ashr_add_neg_shl_v4i8:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %ebp
-; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %edi
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    shll $24, %edx
-; X32-NEXT:    shll $24, %esi
-; X32-NEXT:    shll $24, %ebx
-; X32-NEXT:    shll $24, %ebp
-; X32-NEXT:    movl $16777216, %ecx # imm = 0x1000000
-; X32-NEXT:    movl $16777216, %edi # imm = 0x1000000
-; X32-NEXT:    subl %ebp, %edi
-; X32-NEXT:    movl $16777216, %ebp # imm = 0x1000000
-; X32-NEXT:    subl %ebx, %ebp
-; X32-NEXT:    movl $16777216, %ebx # imm = 0x1000000
-; X32-NEXT:    subl %esi, %ebx
-; X32-NEXT:    subl %edx, %ecx
-; X32-NEXT:    sarl $24, %ecx
-; X32-NEXT:    sarl $24, %ebx
-; X32-NEXT:    sarl $24, %ebp
-; X32-NEXT:    sarl $24, %edi
-; X32-NEXT:    movl %edi, 12(%eax)
-; X32-NEXT:    movl %ebp, 8(%eax)
-; X32-NEXT:    movl %ebx, 4(%eax)
-; X32-NEXT:    movl %ecx, (%eax)
+; X32-NEXT:    movb $1, %cl
+; X32-NEXT:    movb $1, %dl
+; X32-NEXT:    subb {{[0-9]+}}(%esp), %dl
+; X32-NEXT:    movsbl %dl, %edx
+; X32-NEXT:    movb $1, %ch
+; X32-NEXT:    subb {{[0-9]+}}(%esp), %ch
+; X32-NEXT:    movsbl %ch, %esi
+; X32-NEXT:    movb $1, %ch
+; X32-NEXT:    subb {{[0-9]+}}(%esp), %ch
+; X32-NEXT:    movsbl %ch, %edi
+; X32-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X32-NEXT:    movsbl %cl, %ecx
+; X32-NEXT:    movl %ecx, 12(%eax)
+; X32-NEXT:    movl %edi, 8(%eax)
+; X32-NEXT:    movl %esi, 4(%eax)
+; X32-NEXT:    movl %edx, (%eax)
 ; X32-NEXT:    popl %esi
 ; X32-NEXT:    popl %edi
-; X32-NEXT:    popl %ebx
-; X32-NEXT:    popl %ebp
 ; X32-NEXT:    retl $4
 ;
 ; X64-LABEL: ashr_add_neg_shl_v4i8: