[llvm] r367710 - [DAGCombiner] try to convert opposing shifts to casts

Fri Aug 2 12:33:46 PDT 2019

Author: spatel
Date: Fri Aug  2 12:33:46 2019
New Revision: 367710

URL: http://llvm.org/viewvc/llvm-project?rev=367710&view=rev
Log:
[DAGCombiner] try to convert opposing shifts to casts

This reverses a questionable IR canonicalization when a truncate
is free:

sra (add (shl X, N1C), AddC), N1C -->
sext (add (trunc X to (width - N1C)), AddC')

https://rise4fun.com/Alive/slRC

More details in PR42644:
https://bugs.llvm.org/show_bug.cgi?id=42644

I limited this to pre-legalization for code simplicity because that
should be enough to reverse the IR patterns. I don't have any
evidence (no regression test diffs) that we need to try this later.

Differential Revision: https://reviews.llvm.org/D65607

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/AArch64/shift-mod.ll
    llvm/trunk/test/CodeGen/X86/shift-combine.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=367710&r1=367709&r2=367710&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Aug  2 12:33:46 2019
@@ -7616,6 +7616,32 @@ SDValue DAGCombiner::visitSRA(SDNode *N)
     }
   }
 
+  // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
+  //   sra (add (shl X, N1C), AddC), N1C -->
+  //   sext (add (trunc X to (width - N1C)), AddC')
+  if (!LegalOperations && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
+      N0.getOperand(0).getOpcode() == ISD::SHL &&
+      N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
+    if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
+      SDValue Shl = N0.getOperand(0);
+      // Determine what the truncate's type would be and ask the target if that
+      // is a free operation.
+      LLVMContext &Ctx = *DAG.getContext();
+      unsigned ShiftAmt = N1C->getZExtValue();
+      EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
+      if (VT.isVector())
+        TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
+      if (TLI.isTruncateFree(VT, TruncVT)) {
+        SDLoc DL(N);
+        SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
+        SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
+                             trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
+        SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
+        return DAG.getSExtOrTrunc(Add, DL, VT);
+      }
+    }
+  }
+
   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
   if (N1.getOpcode() == ISD::TRUNCATE &&
       N1.getOperand(0).getOpcode() == ISD::AND) {

Modified: llvm/trunk/test/CodeGen/AArch64/shift-mod.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/shift-mod.ll?rev=367710&r1=367709&r2=367710&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/shift-mod.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/shift-mod.ll Fri Aug  2 12:33:46 2019
@@ -78,9 +78,8 @@ entry:
 define i64 @ashr_add_shl_i32(i64 %r) {
 ; CHECK-LABEL: ashr_add_shl_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #4294967296
-; CHECK-NEXT:    add x8, x8, x0, lsl #32
-; CHECK-NEXT:    asr x0, x8, #32
+; CHECK-NEXT:    add w8, w0, #1 // =1
+; CHECK-NEXT:    sxtw x0, w8
 ; CHECK-NEXT:    ret
   %conv = shl i64 %r, 32
   %sext = add i64 %conv, 4294967296
@@ -91,9 +90,8 @@ define i64 @ashr_add_shl_i32(i64 %r) {
 define i64 @ashr_add_shl_i8(i64 %r) {
 ; CHECK-LABEL: ashr_add_shl_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #72057594037927936
-; CHECK-NEXT:    add x8, x8, x0, lsl #56
-; CHECK-NEXT:    asr x0, x8, #56
+; CHECK-NEXT:    add w8, w0, #1 // =1
+; CHECK-NEXT:    sxtb x0, w8
 ; CHECK-NEXT:    ret
   %conv = shl i64 %r, 56
   %sext = add i64 %conv, 72057594037927936

Modified: llvm/trunk/test/CodeGen/X86/shift-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shift-combine.ll?rev=367710&r1=367709&r2=367710&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shift-combine.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shift-combine.ll Fri Aug  2 12:33:46 2019
@@ -168,10 +168,8 @@ define i64 @ashr_add_shl_i32(i64 %r) nou
 ;
 ; X64-LABEL: ashr_add_shl_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    shlq $32, %rdi
-; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
-; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    sarq $32, %rax
+; X64-NEXT:    incl %edi
+; X64-NEXT:    movslq %edi, %rax
 ; X64-NEXT:    retq
   %conv = shl i64 %r, 32
   %sext = add i64 %conv, 4294967296
@@ -182,20 +180,17 @@ define i64 @ashr_add_shl_i32(i64 %r) nou
 define i64 @ashr_add_shl_i8(i64 %r) nounwind {
 ; X32-LABEL: ashr_add_shl_i8:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    shll $24, %edx
-; X32-NEXT:    addl $33554432, %edx # imm = 0x2000000
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    sarl $24, %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X32-NEXT:    addb $2, %al
+; X32-NEXT:    movsbl %al, %eax
+; X32-NEXT:    movl %eax, %edx
 ; X32-NEXT:    sarl $31, %edx
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: ashr_add_shl_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    shlq $56, %rdi
-; X64-NEXT:    movabsq $144115188075855872, %rax # imm = 0x200000000000000
-; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    sarq $56, %rax
+; X64-NEXT:    addb $2, %dil
+; X64-NEXT:    movsbq %dil, %rax
 ; X64-NEXT:    retq
   %conv = shl i64 %r, 56
   %sext = add i64 %conv, 144115188075855872
@@ -209,34 +204,31 @@ define <4 x i32> @ashr_add_shl_v4i8(<4 x
 ; X32-NEXT:    pushl %edi
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT:    shll $24, %edi
-; X32-NEXT:    shll $24, %esi
-; X32-NEXT:    shll $24, %edx
-; X32-NEXT:    shll $24, %ecx
-; X32-NEXT:    addl $16777216, %ecx # imm = 0x1000000
-; X32-NEXT:    addl $16777216, %edx # imm = 0x1000000
-; X32-NEXT:    addl $16777216, %esi # imm = 0x1000000
-; X32-NEXT:    addl $16777216, %edi # imm = 0x1000000
-; X32-NEXT:    sarl $24, %edi
-; X32-NEXT:    sarl $24, %esi
-; X32-NEXT:    sarl $24, %edx
-; X32-NEXT:    sarl $24, %ecx
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X32-NEXT:    incb %dh
+; X32-NEXT:    movsbl %dh, %esi
+; X32-NEXT:    incb %ch
+; X32-NEXT:    movsbl %ch, %edi
+; X32-NEXT:    incb %dl
+; X32-NEXT:    movsbl %dl, %edx
+; X32-NEXT:    incb %cl
+; X32-NEXT:    movsbl %cl, %ecx
 ; X32-NEXT:    movl %ecx, 12(%eax)
 ; X32-NEXT:    movl %edx, 8(%eax)
-; X32-NEXT:    movl %esi, 4(%eax)
-; X32-NEXT:    movl %edi, (%eax)
+; X32-NEXT:    movl %edi, 4(%eax)
+; X32-NEXT:    movl %esi, (%eax)
 ; X32-NEXT:    popl %esi
 ; X32-NEXT:    popl %edi
 ; X32-NEXT:    retl $4
 ;
 ; X64-LABEL: ashr_add_shl_v4i8:
 ; X64:       # %bb.0:
+; X64-NEXT:    pcmpeqd %xmm1, %xmm1
+; X64-NEXT:    psubd %xmm1, %xmm0
 ; X64-NEXT:    pslld $24, %xmm0
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    psrad $24, %xmm0
 ; X64-NEXT:    retq
   %conv = shl <4 x i32> %r, <i32 24, i32 24, i32 24, i32 24>