[llvm] r337875 - [X86] Use a shift plus an lea for multiplying by a constant that is a power of 2 plus 2/4/8.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 24 18:15:39 PDT 2018
Author: ctopper
Date: Tue Jul 24 18:15:38 2018
New Revision: 337875
URL: http://llvm.org/viewvc/llvm-project?rev=337875&view=rev
Log:
[X86] Use a shift plus an lea for multiplying by a constant that is a power of 2 plus 2/4/8.
The LEA allows us to combine an add and the multiply by 2/4/8 together so we just need a shift for the larger power of 2.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll
llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll
llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=337875&r1=337874&r2=337875&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jul 24 18:15:38 2018
@@ -33749,6 +33749,24 @@ static SDValue combineMulSpecial(uint64_
return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
combineMulMulAddOrSub(9, 3, /*isAdd*/ true));
}
+
+ // Another trick. If this is a power 2 + 2/4/8, we can use a shift followed
+ // by a single LEA.
+ // First check if this a sum of two power of 2s because that's easy. Then
+ // count how many zeros are up to the first bit.
+ // TODO: We can do this even without LEA at a cost of two shifts and an add.
+ if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
+ unsigned ScaleShift = countTrailingZeros(MulAmt);
+ if (ScaleShift >= 1 && ScaleShift < 4) {
+ unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
+ SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(ShiftAmt, DL, MVT::i8));
+ SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(ScaleShift, DL, MVT::i8));
+ return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2);
+ }
+ }
+
return SDValue();
}
Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll?rev=337875&r1=337874&r2=337875&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll Tue Jul 24 18:15:38 2018
@@ -705,11 +705,10 @@ define i16 @test_mul_by_62(i16 %x) {
define i16 @test_mul_by_66(i16 %x) {
; X86-LABEL: test_mul_by_66:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: shll $6, %eax
-; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $6, %ecx
+; X86-NEXT: leal (%ecx,%eax,2), %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
@@ -718,8 +717,7 @@ define i16 @test_mul_by_66(i16 %x) {
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shll $6, %eax
-; X64-NEXT: leal (%rax,%rdi), %eax
-; X64-NEXT: addl %edi, %eax
+; X64-NEXT: leal (%rax,%rdi,2), %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%mul = mul nsw i16 %x, 66
@@ -746,6 +744,28 @@ define i16 @test_mul_by_73(i16 %x) {
ret i16 %mul
}
+define i16 @test_mul_by_520(i16 %x) {
+; X86-LABEL: test_mul_by_520:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $9, %ecx
+; X86-NEXT: leal (%ecx,%eax,8), %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mul_by_520:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $9, %eax
+; X64-NEXT: leal (%rax,%rdi,8), %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+ %mul = mul nsw i16 %x, 520
+ ret i16 %mul
+}
+
; (x*9+42)*(x*5+2)
define i16 @test_mul_spec(i16 %x) nounwind {
; X86-LABEL: test_mul_spec:
Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll?rev=337875&r1=337874&r2=337875&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll Tue Jul 24 18:15:38 2018
@@ -1836,11 +1836,10 @@ define i32 @test_mul_by_62(i32 %x) {
define i32 @test_mul_by_66(i32 %x) {
; X86-LABEL: test_mul_by_66:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: shll $6, %eax
-; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $6, %ecx
+; X86-NEXT: leal (%ecx,%eax,2), %eax
; X86-NEXT: retl
;
; X64-HSW-LABEL: test_mul_by_66:
@@ -1848,8 +1847,7 @@ define i32 @test_mul_by_66(i32 %x) {
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: shll $6, %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [7:1.00]
;
; X64-JAG-LABEL: test_mul_by_66:
@@ -1857,8 +1855,7 @@ define i32 @test_mul_by_66(i32 %x) {
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
; X64-JAG-NEXT: shll $6, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
-; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rdi,2), %eax # sched: [2:1.00]
; X64-JAG-NEXT: retq # sched: [4:1.00]
;
; X86-NOOPT-LABEL: test_mul_by_66:
@@ -1943,6 +1940,63 @@ define i32 @test_mul_by_73(i32 %x) {
ret i32 %mul
}
+define i32 @test_mul_by_520(i32 %x) {
+; X86-LABEL: test_mul_by_520:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $9, %ecx
+; X86-NEXT: leal (%ecx,%eax,8), %eax
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_by_520:
+; X64-HSW: # %bb.0:
+; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: shll $9, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [7:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_520:
+; X64-JAG: # %bb.0:
+; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: shll $9, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rdi,8), %eax # sched: [2:1.00]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_520:
+; X86-NOOPT: # %bb.0:
+; X86-NOOPT-NEXT: imull $520, {{[0-9]+}}(%esp), %eax # imm = 0x208
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_520:
+; HSW-NOOPT: # %bb.0:
+; HSW-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208
+; HSW-NOOPT-NEXT: # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_520:
+; JAG-NOOPT: # %bb.0:
+; JAG-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208
+; JAG-NOOPT-NEXT: # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_520:
+; X64-SLM: # %bb.0:
+; X64-SLM-NEXT: imull $520, %edi, %eax # imm = 0x208
+; X64-SLM-NEXT: # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_520:
+; SLM-NOOPT: # %bb.0:
+; SLM-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208
+; SLM-NOOPT-NEXT: # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i32 %x, 520
+ ret i32 %mul
+}
+
; (x*9+42)*(x*5+2)
define i32 @test_mul_spec(i32 %x) nounwind {
; X86-LABEL: test_mul_spec:
Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll?rev=337875&r1=337874&r2=337875&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll Tue Jul 24 18:15:38 2018
@@ -1938,8 +1938,7 @@ define i64 @test_mul_by_66(i64 %x) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shll $6, %ecx
-; X86-NEXT: addl %eax, %ecx
-; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: leal (%ecx,%eax,2), %ecx
; X86-NEXT: movl $66, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %ecx, %edx
@@ -1949,16 +1948,14 @@ define i64 @test_mul_by_66(i64 %x) {
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: shlq $6, %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [7:1.00]
;
; X64-JAG-LABEL: test_mul_by_66:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
; X64-JAG-NEXT: shlq $6, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
-; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rdi,2), %rax # sched: [2:1.00]
; X64-JAG-NEXT: retq # sched: [4:1.00]
;
; X86-NOOPT-LABEL: test_mul_by_66:
@@ -2049,6 +2046,67 @@ define i64 @test_mul_by_73(i64 %x) {
ret i64 %mul
}
+define i64 @test_mul_by_520(i64 %x) {
+; X86-LABEL: test_mul_by_520:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $9, %ecx
+; X86-NEXT: leal (%ecx,%eax,8), %ecx
+; X86-NEXT: movl $520, %eax # imm = 0x208
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_by_520:
+; X64-HSW: # %bb.0:
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: shlq $9, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [7:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_520:
+; X64-JAG: # %bb.0:
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: shlq $9, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rdi,8), %rax # sched: [2:1.00]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_520:
+; X86-NOOPT: # %bb.0:
+; X86-NOOPT-NEXT: movl $520, %eax # imm = 0x208
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $520, {{[0-9]+}}(%esp), %ecx # imm = 0x208
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_520:
+; HSW-NOOPT: # %bb.0:
+; HSW-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208
+; HSW-NOOPT-NEXT: # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_520:
+; JAG-NOOPT: # %bb.0:
+; JAG-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208
+; JAG-NOOPT-NEXT: # sched: [6:4.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_520:
+; X64-SLM: # %bb.0:
+; X64-SLM-NEXT: imulq $520, %rdi, %rax # imm = 0x208
+; X64-SLM-NEXT: # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_520:
+; SLM-NOOPT: # %bb.0:
+; SLM-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208
+; SLM-NOOPT-NEXT: # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i64 %x, 520
+ ret i64 %mul
+}
+
; (x*9+42)*(x*5+2)
define i64 @test_mul_spec(i64 %x) nounwind {
; X86-LABEL: test_mul_spec:
More information about the llvm-commits
mailing list