[llvm] r292358 - [X86] Improve mul combine for negative multiplayer (2^c - 1)
Michael Zuckerman via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 18 01:31:14 PST 2017
Author: mzuckerm
Date: Wed Jan 18 03:31:13 2017
New Revision: 292358
URL: http://llvm.org/viewvc/llvm-project?rev=292358&view=rev
Log:
[X86] Improve mul combine for negative multiplayer (2^c - 1)
This patch improves the mul instruction combine function (combineMul)
by adding new layer of logic.
In this patch, we are adding the ability to fold (mul x, -((1 << c) -1))
or (mul x, -((1 << c) +1)) into (neg(X << c) -x) or (neg((x << c) + x) respective.
Differential Revision: https://reviews.llvm.org/D28232
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/imul.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=292358&r1=292357&r2=292358&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 18 03:31:13 2017
@@ -30256,22 +30256,37 @@ static SDValue combineMul(SDNode *N, Sel
}
if (!NewMul) {
- assert(MulAmt != 0 && MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX)
- && "Both cases that could cause potential overflows should have "
- "already been handled.");
- if (isPowerOf2_64(MulAmt - 1))
- // (mul x, 2^N + 1) => (add (shl x, N), x)
- NewMul = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
- DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
- DAG.getConstant(Log2_64(MulAmt - 1), DL,
- MVT::i8)));
-
- else if (isPowerOf2_64(MulAmt + 1))
- // (mul x, 2^N - 1) => (sub (shl x, N), x)
- NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT,
- N->getOperand(0),
- DAG.getConstant(Log2_64(MulAmt + 1),
- DL, MVT::i8)), N->getOperand(0));
+ assert(MulAmt != 0 &&
+ MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
+ "Both cases that could cause potential overflows should have "
+ "already been handled.");
+ int64_t SignMulAmt = C->getSExtValue();
+ if ((SignMulAmt != INT64_MIN) && (SignMulAmt != INT64_MAX) &&
+ (SignMulAmt != -INT64_MAX)) {
+ int NumSign = SignMulAmt > 0 ? 1 : -1;
+ bool IsPowerOf2_64PlusOne = isPowerOf2_64(NumSign * SignMulAmt - 1);
+ bool IsPowerOf2_64MinusOne = isPowerOf2_64(NumSign * SignMulAmt + 1);
+ if (IsPowerOf2_64PlusOne) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ NewMul = DAG.getNode(
+ ISD::ADD, DL, VT, N->getOperand(0),
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(NumSign * SignMulAmt - 1), DL,
+ MVT::i8)));
+ } else if (IsPowerOf2_64MinusOne) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ NewMul = DAG.getNode(
+ ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(NumSign * SignMulAmt + 1), DL,
+ MVT::i8)),
+ N->getOperand(0));
+ }
+ // To negate, subtract the number from zero
+ if ((IsPowerOf2_64PlusOne || IsPowerOf2_64MinusOne) && NumSign == -1)
+ NewMul =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
+ }
}
if (NewMul)
Modified: llvm/trunk/test/CodeGen/X86/imul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/imul.ll?rev=292358&r1=292357&r2=292358&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/imul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/imul.ll Wed Jan 18 03:31:13 2017
@@ -171,3 +171,233 @@ define i64 @mul18446744073709551615_64(i
%mul = mul i64 %A, 18446744073709551615
ret i64 %mul
}
+
+define i32 @test(i32 %a) {
+; X64-LABEL: test:
+; X64: # BB#0: # %entry
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $5, %eax
+; X64-NEXT: subl %edi, %eax
+; X64-NEXT: retq
+;
+; X86-LABEL: test:
+; X86: # BB#0: # %entry
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shll $5, %eax
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: retl
+entry:
+ %tmp3 = mul i32 %a, 31
+ ret i32 %tmp3
+}
+
+define i32 @test1(i32 %a) {
+; X64-LABEL: test1:
+; X64: # BB#0: # %entry
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $5, %eax
+; X64-NEXT: subl %edi, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: retq
+;
+; X86-LABEL: test1:
+; X86: # BB#0: # %entry
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shll $5, %eax
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: retl
+entry:
+ %tmp3 = mul i32 %a, -31
+ ret i32 %tmp3
+}
+
+
+define i32 @test2(i32 %a) {
+; X64-LABEL: test2:
+; X64: # BB#0: # %entry
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $5, %eax
+; X64-NEXT: leal (%rax,%rdi), %eax
+; X64-NEXT: retq
+;
+; X86-LABEL: test2:
+; X86: # BB#0: # %entry
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shll $5, %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: retl
+entry:
+ %tmp3 = mul i32 %a, 33
+ ret i32 %tmp3
+}
+
+define i32 @test3(i32 %a) {
+; X64-LABEL: test3:
+; X64: # BB#0: # %entry
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $5, %eax
+; X64-NEXT: leal (%rax,%rdi), %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: retq
+;
+; X86-LABEL: test3:
+; X86: # BB#0: # %entry
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shll $5, %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: retl
+entry:
+ %tmp3 = mul i32 %a, -33
+ ret i32 %tmp3
+}
+
+define i64 @test4(i64 %a) {
+; X64-LABEL: test4:
+; X64: # BB#0: # %entry
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shlq $5, %rax
+; X64-NEXT: subq %rdi, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: test4:
+; X86: # BB#0: # %entry
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $5, %ecx
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: movl $31, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: retl
+entry:
+ %tmp3 = mul i64 %a, 31
+ ret i64 %tmp3
+}
+
+define i64 @test5(i64 %a) {
+; X64-LABEL: test5:
+; X64: # BB#0: # %entry
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shlq $5, %rax
+; X64-NEXT: subq %rdi, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: test5:
+; X86: # BB#0: # %entry
+; X86-NEXT: pushl %esi
+; X86-NEXT: .Lcfi0:
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .Lcfi1:
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: shll $5, %esi
+; X86-NEXT: subl %eax, %esi
+; X86-NEXT: movl $-31, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: subl %esi, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+entry:
+ %tmp3 = mul i64 %a, -31
+ ret i64 %tmp3
+}
+
+
+define i64 @test6(i64 %a) {
+; X64-LABEL: test6:
+; X64: # BB#0: # %entry
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shlq $5, %rax
+; X64-NEXT: leaq (%rax,%rdi), %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: test6:
+; X86: # BB#0: # %entry
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $5, %ecx
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: movl $33, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: retl
+entry:
+ %tmp3 = mul i64 %a, 33
+ ret i64 %tmp3
+}
+
+define i64 @test7(i64 %a) {
+; X64-LABEL: test7:
+; X64: # BB#0: # %entry
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shlq $5, %rax
+; X64-NEXT: leaq (%rax,%rdi), %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: test7:
+; X86: # BB#0: # %entry
+; X86-NEXT: pushl %esi
+; X86-NEXT: .Lcfi2:
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .Lcfi3:
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: shll $5, %esi
+; X86-NEXT: addl %eax, %esi
+; X86-NEXT: movl $-33, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: subl %esi, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+entry:
+ %tmp3 = mul i64 %a, -33
+ ret i64 %tmp3
+}
+
+define i64 @testOverflow(i64 %a) {
+; X64-LABEL: testOverflow:
+; X64: # BB#0: # %entry
+; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
+; X64-NEXT: imulq %rdi, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: testOverflow:
+; X86: # BB#0: # %entry
+; X86-NEXT: pushl %esi
+; X86-NEXT: .Lcfi4:
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .Lcfi5:
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-1, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: shll $31, %esi
+; X86-NEXT: subl %ecx, %esi
+; X86-NEXT: addl %esi, %edx
+; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+entry:
+ %tmp3 = mul i64 %a, 9223372036854775807
+ ret i64 %tmp3
+}
More information about the llvm-commits
mailing list