[llvm] r239802 - X86: optimized i64 vector multiply with constant
Elena Demikhovsky
elena.demikhovsky at intel.com
Mon Jun 15 23:07:24 PDT 2015
Author: delena
Date: Tue Jun 16 01:07:24 2015
New Revision: 239802
URL: http://llvm.org/viewvc/llvm-project?rev=239802&view=rev
Log:
X86: optimized i64 vector multiply with constant
When we multiply two 64-bit vectors, we extract lower and upper part and use the PMULUDQ instruction.
When one of the operands is a constant, the upper part may be zero, we know this at compile time.
Example: %a = mul <4 x i64> %b, <4 x i64> < i64 5, i64 5, i64 5, i64 5>.
I'm checking the value of the upper part and prevent redundant "multiply", "shift" and "add" operations.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/pmul.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=239802&r1=239801&r2=239802&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jun 16 01:07:24 2015
@@ -16530,6 +16530,8 @@ static SDValue LowerMUL(SDValue Op, cons
SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
+ SDValue AhiBlo = Ahi;
+ SDValue AloBhi = Bhi;
// Bit cast to 32-bit vectors for MULUDQ
EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
(VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
@@ -16539,11 +16541,15 @@ static SDValue LowerMUL(SDValue Op, cons
Bhi = DAG.getBitcast(MulVT, Bhi);
SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);
- SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
- SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
-
- AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG);
- AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
+ // After shifting right const values the result may be all-zero.
+ if (!ISD::isBuildVectorAllZeros(Ahi.getNode())) {
+ AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
+ AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
+ }
+ if (!ISD::isBuildVectorAllZeros(Bhi.getNode())) {
+ AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
+ AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG);
+ }
SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
Modified: llvm/trunk/test/CodeGen/X86/pmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pmul.ll?rev=239802&r1=239801&r2=239802&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pmul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pmul.ll Tue Jun 16 01:07:24 2015
@@ -1,5 +1,6 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX2
define <16 x i8> @mul8c(<16 x i8> %i) nounwind {
; SSE2-LABEL: mul8c:
@@ -75,10 +76,6 @@ define <2 x i64> @b(<2 x i64> %i) nounwi
; ALL-NEXT: movdqa {{.*#+}} xmm1 = [117,117]
; ALL-NEXT: movdqa %xmm0, %xmm2
; ALL-NEXT: pmuludq %xmm1, %xmm2
-; ALL-NEXT: pxor %xmm3, %xmm3
-; ALL-NEXT: pmuludq %xmm0, %xmm3
-; ALL-NEXT: psllq $32, %xmm3
-; ALL-NEXT: paddq %xmm3, %xmm2
; ALL-NEXT: psrlq $32, %xmm0
; ALL-NEXT: pmuludq %xmm1, %xmm0
; ALL-NEXT: psllq $32, %xmm0
@@ -248,3 +245,35 @@ entry:
%A = mul <2 x i64> %i, %j
ret <2 x i64> %A
}
+
+define <4 x i64> @b1(<4 x i64> %i) nounwind {
+; AVX2-LABEL: @b1
+; AVX2: vpbroadcastq
+; AVX2-NEXT: vpmuludq
+; AVX2-NEXT: vpsrlq $32
+; AVX2-NEXT: vpmuludq
+; AVX2-NEXT: vpsllq $32
+; AVX2-NEXT: vpaddq
+; AVX2-NEXT: retq
+entry:
+ %A = mul <4 x i64> %i, < i64 117, i64 117, i64 117, i64 117 >
+ ret <4 x i64> %A
+}
+
+define <4 x i64> @b2(<4 x i64> %i, <4 x i64> %j) nounwind {
+; AVX2-LABEL: @b2
+; AVX2: vpmuludq
+; AVX2-NEXT: vpsrlq $32
+; AVX2-NEXT: vpmuludq
+; AVX2-NEXT: vpsllq $32
+; AVX2-NEXT: vpaddq
+; AVX2-NEXT: vpsrlq $32
+; AVX2-NEXT: vpmuludq
+; AVX2-NEXT: vpsllq $32
+; AVX2-NEXT: vpaddq
+; AVX2-NEXT: retq
+entry:
+ %A = mul <4 x i64> %i, %j
+ ret <4 x i64> %A
+}
+
More information about the llvm-commits
mailing list