[llvm] r338174 - [X86] Add support expanding multiplies by constant where the constant is -3/-5/-9 multplied by a power of 2.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 27 16:05:00 PDT 2018
Author: ctopper
Date: Fri Jul 27 16:04:59 2018
New Revision: 338174
URL: http://llvm.org/viewvc/llvm-project?rev=338174&view=rev
Log:
[X86] Add support expanding multiplies by constant where the constant is -3/-5/-9 multplied by a power of 2.
These can be replaced with an LEA, a shift, and a negate. This seems to match what gcc and icc would do.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll
llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll
llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=338174&r1=338173&r2=338174&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Jul 27 16:04:59 2018
@@ -33873,31 +33873,42 @@ static SDValue combineMul(SDNode *N, Sel
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!C)
return SDValue();
- uint64_t MulAmt = C->getZExtValue();
- if (isPowerOf2_64(MulAmt))
+ if (isPowerOf2_64(C->getZExtValue()))
return SDValue();
+ int64_t SignMulAmt = C->getSExtValue();
+ assert(SignMulAmt != INT64_MIN && "Int min should have been handled!");
+ uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
+
SDLoc DL(N);
- if (MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
- return DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
- N->getOperand(1));
+ if (AbsMulAmt == 3 || AbsMulAmt == 5 || AbsMulAmt == 9) {
+ SDValue NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+ DAG.getConstant(AbsMulAmt, DL, VT));
+ if (SignMulAmt < 0)
+ NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ NewMul);
+
+ return NewMul;
+ }
uint64_t MulAmt1 = 0;
uint64_t MulAmt2 = 0;
- if ((MulAmt % 9) == 0) {
+ if ((AbsMulAmt % 9) == 0) {
MulAmt1 = 9;
- MulAmt2 = MulAmt / 9;
- } else if ((MulAmt % 5) == 0) {
+ MulAmt2 = AbsMulAmt / 9;
+ } else if ((AbsMulAmt % 5) == 0) {
MulAmt1 = 5;
- MulAmt2 = MulAmt / 5;
- } else if ((MulAmt % 3) == 0) {
+ MulAmt2 = AbsMulAmt / 5;
+ } else if ((AbsMulAmt % 3) == 0) {
MulAmt1 = 3;
- MulAmt2 = MulAmt / 3;
+ MulAmt2 = AbsMulAmt / 3;
}
SDValue NewMul;
+ // For negative multiply amounts, only allow MulAmt2 to be a power of 2.
if (MulAmt2 &&
- (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
+ (isPowerOf2_64(MulAmt2) ||
+ (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) {
if (isPowerOf2_64(MulAmt2) &&
!(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
@@ -33919,17 +33930,19 @@ static SDValue combineMul(SDNode *N, Sel
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
DAG.getConstant(MulAmt2, DL, VT));
+
+ // Negate the result.
+ if (SignMulAmt < 0)
+ NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ NewMul);
} else if (!Subtarget.slowLEA())
- NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL);
+ NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL);
if (!NewMul) {
- assert(MulAmt != 0 &&
- MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
+ assert(C->getZExtValue() != 0 &&
+ C->getZExtValue() != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
"Both cases that could cause potential overflows should have "
"already been handled.");
- int64_t SignMulAmt = C->getSExtValue();
- assert(SignMulAmt != INT64_MIN && "Int min should have been handled!");
- uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
if (isPowerOf2_64(AbsMulAmt - 1)) {
// (mul x, 2^N + 1) => (add (shl x, N), x)
NewMul = DAG.getNode(
Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll?rev=338174&r1=338173&r2=338174&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll Fri Jul 27 16:04:59 2018
@@ -766,6 +766,50 @@ define i16 @test_mul_by_520(i16 %x) {
ret i16 %mul
}
+define i16 @test_mul_by_neg10(i16 %x) {
+; X86-LABEL: test_mul_by_neg10:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mul_by_neg10:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: addl %edi, %edi
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+ %mul = mul nsw i16 %x, -10
+ ret i16 %mul
+}
+
+define i16 @test_mul_by_neg36(i16 %x) {
+; X86-LABEL: test_mul_by_neg36:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: shll $2, %eax
+; X86-NEXT: leal (%eax,%eax,8), %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mul_by_neg36:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: shll $2, %edi
+; X64-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+ %mul = mul nsw i16 %x, -36
+ ret i16 %mul
+}
+
; (x*9+42)*(x*5+2)
define i16 @test_mul_spec(i16 %x) nounwind {
; X86-LABEL: test_mul_spec:
Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll?rev=338174&r1=338173&r2=338174&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll Fri Jul 27 16:04:59 2018
@@ -1997,6 +1997,118 @@ define i32 @test_mul_by_520(i32 %x) {
ret i32 %mul
}
+define i32 @test_mul_by_neg10(i32 %x) {
+; X86-LABEL: test_mul_by_neg10:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_by_neg10:
+; X64-HSW: # %bb.0:
+; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: negl %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [7:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_neg10:
+; X64-JAG: # %bb.0:
+; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00]
+; X64-JAG-NEXT: negl %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_neg10:
+; X86-NOOPT: # %bb.0:
+; X86-NOOPT-NEXT: imull $-10, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_neg10:
+; HSW-NOOPT: # %bb.0:
+; HSW-NOOPT-NEXT: imull $-10, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_neg10:
+; JAG-NOOPT: # %bb.0:
+; JAG-NOOPT-NEXT: imull $-10, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_neg10:
+; X64-SLM: # %bb.0:
+; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: negl %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_neg10:
+; SLM-NOOPT: # %bb.0:
+; SLM-NOOPT-NEXT: imull $-10, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i32 %x, -10
+ ret i32 %mul
+}
+
+define i32 @test_mul_by_neg36(i32 %x) {
+; X86-LABEL: test_mul_by_neg36:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: shll $2, %eax
+; X86-NEXT: leal (%eax,%eax,8), %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_by_neg36:
+; X64-HSW: # %bb.0:
+; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: negl %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [7:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_neg36:
+; X64-JAG: # %bb.0:
+; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00]
+; X64-JAG-NEXT: negl %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_neg36:
+; X86-NOOPT: # %bb.0:
+; X86-NOOPT-NEXT: imull $-36, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_neg36:
+; HSW-NOOPT: # %bb.0:
+; HSW-NOOPT-NEXT: imull $-36, %edi, %eax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_neg36:
+; JAG-NOOPT: # %bb.0:
+; JAG-NOOPT-NEXT: imull $-36, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_neg36:
+; X64-SLM: # %bb.0:
+; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: negl %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_neg36:
+; SLM-NOOPT: # %bb.0:
+; SLM-NOOPT-NEXT: imull $-36, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i32 %x, -36
+ ret i32 %mul
+}
+
; (x*9+42)*(x*5+2)
define i32 @test_mul_spec(i32 %x) nounwind {
; X86-LABEL: test_mul_spec:
Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll?rev=338174&r1=338173&r2=338174&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll Fri Jul 27 16:04:59 2018
@@ -2107,6 +2107,144 @@ define i64 @test_mul_by_520(i64 %x) {
ret i64 %mul
}
+define i64 @test_mul_by_neg10(i64 %x) {
+; X86-LABEL: test_mul_by_neg10:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl $-10, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: leal (%esi,%esi,4), %ecx
+; X86-NEXT: addl %ecx, %ecx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_by_neg10:
+; X64-HSW: # %bb.0:
+; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: negq %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [7:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_neg10:
+; X64-JAG: # %bb.0:
+; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00]
+; X64-JAG-NEXT: negq %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_neg10:
+; X86-NOOPT: # %bb.0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: movl $-10, %edx
+; X86-NOOPT-NEXT: movl %ecx, %eax
+; X86-NOOPT-NEXT: mull %edx
+; X86-NOOPT-NEXT: subl %ecx, %edx
+; X86-NOOPT-NEXT: imull $-10, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_neg10:
+; HSW-NOOPT: # %bb.0:
+; HSW-NOOPT-NEXT: imulq $-10, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_neg10:
+; JAG-NOOPT: # %bb.0:
+; JAG-NOOPT-NEXT: imulq $-10, %rdi, %rax # sched: [6:4.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_neg10:
+; X64-SLM: # %bb.0:
+; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: negq %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_neg10:
+; SLM-NOOPT: # %bb.0:
+; SLM-NOOPT-NEXT: imulq $-10, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i64 %x, -10
+ ret i64 %mul
+}
+
+define i64 @test_mul_by_neg36(i64 %x) {
+; X86-LABEL: test_mul_by_neg36:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl $-36, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: leal (%esi,%esi,8), %ecx
+; X86-NEXT: shll $2, %ecx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_by_neg36:
+; X64-HSW: # %bb.0:
+; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: negq %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [7:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_neg36:
+; X64-JAG: # %bb.0:
+; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00]
+; X64-JAG-NEXT: negq %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_neg36:
+; X86-NOOPT: # %bb.0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: movl $-36, %edx
+; X86-NOOPT-NEXT: movl %ecx, %eax
+; X86-NOOPT-NEXT: mull %edx
+; X86-NOOPT-NEXT: subl %ecx, %edx
+; X86-NOOPT-NEXT: imull $-36, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_neg36:
+; HSW-NOOPT: # %bb.0:
+; HSW-NOOPT-NEXT: imulq $-36, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_neg36:
+; JAG-NOOPT: # %bb.0:
+; JAG-NOOPT-NEXT: imulq $-36, %rdi, %rax # sched: [6:4.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_neg36:
+; X64-SLM: # %bb.0:
+; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: negq %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_neg36:
+; SLM-NOOPT: # %bb.0:
+; SLM-NOOPT-NEXT: imulq $-36, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i64 %x, -36
+ ret i64 %mul
+}
+
; (x*9+42)*(x*5+2)
define i64 @test_mul_spec(i64 %x) nounwind {
; X86-LABEL: test_mul_spec:
More information about the llvm-commits
mailing list