[llvm] r337875 - [X86] Use a shift plus an lea for multiplying by a constant that is a power of 2 plus 2/4/8.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 24 18:15:39 PDT 2018


Author: ctopper
Date: Tue Jul 24 18:15:38 2018
New Revision: 337875

URL: http://llvm.org/viewvc/llvm-project?rev=337875&view=rev
Log:
[X86] Use a shift plus an lea for multiplying by a constant that is a power of 2 plus 2/4/8.

The LEA allows us to combine an add and the multiply by 2/4/8 together so we just need a shift for the larger power of 2.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll
    llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll
    llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=337875&r1=337874&r2=337875&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jul 24 18:15:38 2018
@@ -33749,6 +33749,24 @@ static SDValue combineMulSpecial(uint64_
     return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
                        combineMulMulAddOrSub(9, 3, /*isAdd*/ true));
   }
+
+  // Another trick. If this is a power 2 + 2/4/8, we can use a shift followed
+  // by a single LEA.
+  // First check if this a sum of two power of 2s because that's easy. Then
+  // count how many zeros are up to the first bit.
+  // TODO: We can do this even without LEA at a cost of two shifts and an add.
+  if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
+    unsigned ScaleShift = countTrailingZeros(MulAmt);
+    if (ScaleShift >= 1 && ScaleShift < 4) {
+      unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
+      SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                                   DAG.getConstant(ShiftAmt, DL, MVT::i8));
+      SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                                   DAG.getConstant(ScaleShift, DL, MVT::i8));
+      return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2);
+    }
+  }
+
   return SDValue();
 }
 

Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll?rev=337875&r1=337874&r2=337875&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll Tue Jul 24 18:15:38 2018
@@ -705,11 +705,10 @@ define i16 @test_mul_by_62(i16 %x) {
 define i16 @test_mul_by_66(i16 %x) {
 ; X86-LABEL: test_mul_by_66:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    shll $6, %eax
-; X86-NEXT:    addl %ecx, %eax
-; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll $6, %ecx
+; X86-NEXT:    leal (%ecx,%eax,2), %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
@@ -718,8 +717,7 @@ define i16 @test_mul_by_66(i16 %x) {
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    shll $6, %eax
-; X64-NEXT:    leal (%rax,%rdi), %eax
-; X64-NEXT:    addl %edi, %eax
+; X64-NEXT:    leal (%rax,%rdi,2), %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 66
@@ -746,6 +744,28 @@ define i16 @test_mul_by_73(i16 %x) {
   ret i16 %mul
 }
 
+define i16 @test_mul_by_520(i16 %x) {
+; X86-LABEL: test_mul_by_520:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll $9, %ecx
+; X86-NEXT:    leal (%ecx,%eax,8), %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mul_by_520:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shll $9, %eax
+; X64-NEXT:    leal (%rax,%rdi,8), %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %mul = mul nsw i16 %x, 520
+  ret i16 %mul
+}
+
 ; (x*9+42)*(x*5+2)
 define i16 @test_mul_spec(i16 %x) nounwind {
 ; X86-LABEL: test_mul_spec:

Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll?rev=337875&r1=337874&r2=337875&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll Tue Jul 24 18:15:38 2018
@@ -1836,11 +1836,10 @@ define i32 @test_mul_by_62(i32 %x) {
 define i32 @test_mul_by_66(i32 %x) {
 ; X86-LABEL: test_mul_by_66:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    shll $6, %eax
-; X86-NEXT:    addl %ecx, %eax
-; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll $6, %ecx
+; X86-NEXT:    leal (%ecx,%eax,2), %eax
 ; X86-NEXT:    retl
 ;
 ; X64-HSW-LABEL: test_mul_by_66:
@@ -1848,8 +1847,7 @@ define i32 @test_mul_by_66(i32 %x) {
 ; X64-HSW-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-HSW-NEXT:    movl %edi, %eax # sched: [1:0.25]
 ; X64-HSW-NEXT:    shll $6, %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    leal (%rax,%rdi), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    leal (%rax,%rdi,2), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    retq # sched: [7:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_66:
@@ -1857,8 +1855,7 @@ define i32 @test_mul_by_66(i32 %x) {
 ; X64-JAG-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-JAG-NEXT:    movl %edi, %eax # sched: [1:0.50]
 ; X64-JAG-NEXT:    shll $6, %eax # sched: [1:0.50]
-; X64-JAG-NEXT:    leal (%rax,%rdi), %eax # sched: [1:0.50]
-; X64-JAG-NEXT:    addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rax,%rdi,2), %eax # sched: [2:1.00]
 ; X64-JAG-NEXT:    retq # sched: [4:1.00]
 ;
 ; X86-NOOPT-LABEL: test_mul_by_66:
@@ -1943,6 +1940,63 @@ define i32 @test_mul_by_73(i32 %x) {
   ret i32 %mul
 }
 
+define i32 @test_mul_by_520(i32 %x) {
+; X86-LABEL: test_mul_by_520:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll $9, %ecx
+; X86-NEXT:    leal (%ecx,%eax,8), %eax
+; X86-NEXT:    retl
+;
+; X64-HSW-LABEL: test_mul_by_520:
+; X64-HSW:       # %bb.0:
+; X64-HSW-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-HSW-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    shll $9, %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rax,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [7:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_520:
+; X64-JAG:       # %bb.0:
+; X64-JAG-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-JAG-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    shll $9, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rax,%rdi,8), %eax # sched: [2:1.00]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_520:
+; X86-NOOPT:       # %bb.0:
+; X86-NOOPT-NEXT:    imull $520, {{[0-9]+}}(%esp), %eax # imm = 0x208
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_520:
+; HSW-NOOPT:       # %bb.0:
+; HSW-NOOPT-NEXT:    imull $520, %edi, %eax # imm = 0x208
+; HSW-NOOPT-NEXT:    # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [7:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_520:
+; JAG-NOOPT:       # %bb.0:
+; JAG-NOOPT-NEXT:    imull $520, %edi, %eax # imm = 0x208
+; JAG-NOOPT-NEXT:    # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_520:
+; X64-SLM:       # %bb.0:
+; X64-SLM-NEXT:    imull $520, %edi, %eax # imm = 0x208
+; X64-SLM-NEXT:    # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_520:
+; SLM-NOOPT:       # %bb.0:
+; SLM-NOOPT-NEXT:    imull $520, %edi, %eax # imm = 0x208
+; SLM-NOOPT-NEXT:    # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
+  %mul = mul nsw i32 %x, 520
+  ret i32 %mul
+}
+
 ; (x*9+42)*(x*5+2)
 define i32 @test_mul_spec(i32 %x) nounwind {
 ; X86-LABEL: test_mul_spec:

Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll?rev=337875&r1=337874&r2=337875&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll Tue Jul 24 18:15:38 2018
@@ -1938,8 +1938,7 @@ define i64 @test_mul_by_66(i64 %x) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    shll $6, %ecx
-; X86-NEXT:    addl %eax, %ecx
-; X86-NEXT:    addl %eax, %ecx
+; X86-NEXT:    leal (%ecx,%eax,2), %ecx
 ; X86-NEXT:    movl $66, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
 ; X86-NEXT:    addl %ecx, %edx
@@ -1949,16 +1948,14 @@ define i64 @test_mul_by_66(i64 %x) {
 ; X64-HSW:       # %bb.0:
 ; X64-HSW-NEXT:    movq %rdi, %rax # sched: [1:0.25]
 ; X64-HSW-NEXT:    shlq $6, %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    leaq (%rax,%rdi), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    leaq (%rax,%rdi,2), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    retq # sched: [7:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_66:
 ; X64-JAG:       # %bb.0:
 ; X64-JAG-NEXT:    movq %rdi, %rax # sched: [1:0.50]
 ; X64-JAG-NEXT:    shlq $6, %rax # sched: [1:0.50]
-; X64-JAG-NEXT:    leaq (%rax,%rdi), %rax # sched: [1:0.50]
-; X64-JAG-NEXT:    addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rax,%rdi,2), %rax # sched: [2:1.00]
 ; X64-JAG-NEXT:    retq # sched: [4:1.00]
 ;
 ; X86-NOOPT-LABEL: test_mul_by_66:
@@ -2049,6 +2046,67 @@ define i64 @test_mul_by_73(i64 %x) {
   ret i64 %mul
 }
 
+define i64 @test_mul_by_520(i64 %x) {
+; X86-LABEL: test_mul_by_520:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll $9, %ecx
+; X86-NEXT:    leal (%ecx,%eax,8), %ecx
+; X86-NEXT:    movl $520, %eax # imm = 0x208
+; X86-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    retl
+;
+; X64-HSW-LABEL: test_mul_by_520:
+; X64-HSW:       # %bb.0:
+; X64-HSW-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    shlq $9, %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rax,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [7:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_520:
+; X64-JAG:       # %bb.0:
+; X64-JAG-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    shlq $9, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rax,%rdi,8), %rax # sched: [2:1.00]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_520:
+; X86-NOOPT:       # %bb.0:
+; X86-NOOPT-NEXT:    movl $520, %eax # imm = 0x208
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $520, {{[0-9]+}}(%esp), %ecx # imm = 0x208
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_520:
+; HSW-NOOPT:       # %bb.0:
+; HSW-NOOPT-NEXT:    imulq $520, %rdi, %rax # imm = 0x208
+; HSW-NOOPT-NEXT:    # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [7:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_520:
+; JAG-NOOPT:       # %bb.0:
+; JAG-NOOPT-NEXT:    imulq $520, %rdi, %rax # imm = 0x208
+; JAG-NOOPT-NEXT:    # sched: [6:4.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_520:
+; X64-SLM:       # %bb.0:
+; X64-SLM-NEXT:    imulq $520, %rdi, %rax # imm = 0x208
+; X64-SLM-NEXT:    # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_520:
+; SLM-NOOPT:       # %bb.0:
+; SLM-NOOPT-NEXT:    imulq $520, %rdi, %rax # imm = 0x208
+; SLM-NOOPT-NEXT:    # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
+  %mul = mul nsw i64 %x, 520
+  ret i64 %mul
+}
+
 ; (x*9+42)*(x*5+2)
 define i64 @test_mul_spec(i64 %x) nounwind {
 ; X86-LABEL: test_mul_spec:




More information about the llvm-commits mailing list