[llvm] r314221 - [X86] Add support for v16i32 UMUL_LOHI/SMUL_LOHI
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 26 09:43:58 PDT 2017
Author: ctopper
Date: Tue Sep 26 09:43:57 2017
New Revision: 314221
URL: http://llvm.org/viewvc/llvm-project?rev=314221&view=rev
Log:
[X86] Add support for v16i32 UMUL_LOHI/SMUL_LOHI
Summary: This patch extends the v8i32/v4i32 custom lowering to support v16i32
Reviewers: zvi, RKSimon
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D38274
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=314221&r1=314220&r2=314221&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Sep 26 09:43:57 2017
@@ -1298,6 +1298,10 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
setOperationAction(ISD::MUL, MVT::v8i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i32, Legal);
+
+ setOperationAction(ISD::UMUL_LOHI, MVT::v16i32, Custom);
+ setOperationAction(ISD::SMUL_LOHI, MVT::v16i32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
@@ -1306,7 +1310,6 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
- setOperationAction(ISD::MUL, MVT::v16i32, Legal);
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
setOperationAction(ISD::ABS, MVT::v4i64, Legal);
@@ -21800,7 +21803,10 @@ static SDValue LowerMUL_LOHI(SDValue Op,
}
assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
- (VT == MVT::v8i32 && Subtarget.hasInt256()));
+ (VT == MVT::v8i32 && Subtarget.hasInt256()) ||
+ (VT == MVT::v16i32 && Subtarget.hasAVX512()));
+
+ int NumElts = VT.getVectorNumElements();
// PMULxD operations multiply each even value (starting at 0) of LHS with
// the related value of RHS and produce a widen result.
@@ -21814,17 +21820,17 @@ static SDValue LowerMUL_LOHI(SDValue Op,
//
// Place the odd value at an even position (basically, shift all values 1
// step to the left):
- const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1};
+ const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1};
// <a|b|c|d> => <b|undef|d|undef>
SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0,
- makeArrayRef(&Mask[0], VT.getVectorNumElements()));
+ makeArrayRef(&Mask[0], NumElts));
// <e|f|g|h> => <f|undef|h|undef>
SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1,
- makeArrayRef(&Mask[0], VT.getVectorNumElements()));
+ makeArrayRef(&Mask[0], NumElts));
// Emit two multiplies, one for the lower 2 ints and one for the higher 2
// ints.
- MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64;
+ MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2);
bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI;
unsigned Opcode =
(!IsSigned || !Subtarget.hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
@@ -21836,19 +21842,16 @@ static SDValue LowerMUL_LOHI(SDValue Op,
SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1));
// Shuffle it back into the right order.
- SDValue Highs, Lows;
- if (VT == MVT::v8i32) {
- const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
- Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
- const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
- Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
- } else {
- const int HighMask[] = {1, 5, 3, 7};
- Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
- const int LowMask[] = {0, 4, 2, 6};
- Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
+ SmallVector<int, 16> HighMask(NumElts);
+ SmallVector<int, 16> LowMask(NumElts);
+ for (int i = 0; i != NumElts; ++i) {
+ HighMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1;
+ LowMask[i] = (i / 2) * 2 + ((i % 2) * NumElts);
}
+ SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
+ SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
+
// If we have a signed multiply but no PMULDQ fix up the high parts of a
// unsigned multiply.
if (IsSigned && !Subtarget.hasSSE41()) {
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll?rev=314221&r1=314220&r2=314221&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll Tue Sep 26 09:43:57 2017
@@ -84,172 +84,17 @@ define <8 x i64> @test_div7_8i64(<8 x i6
define <16 x i32> @test_div7_16i32(<16 x i32> %a) nounwind {
; AVX-LABEL: test_div7_16i32:
; AVX: # BB#0:
-; AVX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; AVX-NEXT: vpextrd $1, %xmm1, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: shrl $31, %ecx
-; AVX-NEXT: sarl $2, %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: vmovd %xmm1, %ecx
-; AVX-NEXT: movslq %ecx, %rcx
-; AVX-NEXT: imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm2
-; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX-NEXT: vpextrd $2, %xmm1, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: shrl $31, %ecx
-; AVX-NEXT: sarl $2, %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX-NEXT: vpextrd $3, %xmm1, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: shrl $31, %ecx
-; AVX-NEXT: sarl $2, %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
-; AVX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; AVX-NEXT: vpextrd $1, %xmm2, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: shrl $31, %ecx
-; AVX-NEXT: sarl $2, %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: vmovd %xmm2, %ecx
-; AVX-NEXT: movslq %ecx, %rcx
-; AVX-NEXT: imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm3
-; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $2, %xmm2, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: shrl $31, %ecx
-; AVX-NEXT: sarl $2, %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $3, %xmm2, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: shrl $31, %ecx
-; AVX-NEXT: sarl $2, %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2
-; AVX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX-NEXT: vpextrd $1, %xmm2, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: shrl $31, %ecx
-; AVX-NEXT: sarl $2, %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: vmovd %xmm2, %ecx
-; AVX-NEXT: movslq %ecx, %rcx
-; AVX-NEXT: imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm3
-; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $2, %xmm2, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: shrl $31, %ecx
-; AVX-NEXT: sarl $2, %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $3, %xmm2, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: shrl $31, %ecx
-; AVX-NEXT: sarl $2, %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2
-; AVX-NEXT: vpextrd $1, %xmm0, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: shrl $31, %ecx
-; AVX-NEXT: sarl $2, %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: vmovd %xmm0, %ecx
-; AVX-NEXT: movslq %ecx, %rcx
-; AVX-NEXT: imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm3
-; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $2, %xmm0, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: shrl $31, %ecx
-; AVX-NEXT: sarl $2, %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $3, %xmm0, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: shrl $31, %ecx
-; AVX-NEXT: sarl $2, %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm0
-; AVX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX-NEXT: vpbroadcastd {{.*#+}} zmm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027]
+; AVX-NEXT: vpmuldq %zmm1, %zmm0, %zmm2
+; AVX-NEXT: vpshufd {{.*#+}} zmm1 = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; AVX-NEXT: vpshufd {{.*#+}} zmm3 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; AVX-NEXT: vpmuldq %zmm1, %zmm3, %zmm1
+; AVX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31]
+; AVX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; AVX-NEXT: vpaddd %zmm0, %zmm3, %zmm0
+; AVX-NEXT: vpsrld $31, %zmm0, %zmm1
+; AVX-NEXT: vpsrad $2, %zmm0, %zmm0
+; AVX-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX-NEXT: retq
%res = sdiv <16 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
ret <16 x i32> %res
@@ -1159,220 +1004,19 @@ define <8 x i64> @test_rem7_8i64(<8 x i6
define <16 x i32> @test_rem7_16i32(<16 x i32> %a) nounwind {
; AVX-LABEL: test_rem7_16i32:
; AVX: # BB#0:
-; AVX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; AVX-NEXT: vpextrd $1, %xmm1, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %eax, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: leal (,%rcx,8), %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: subl %edx, %eax
-; AVX-NEXT: vmovd %xmm1, %ecx
-; AVX-NEXT: movslq %ecx, %rcx
-; AVX-NEXT: imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: movl %edx, %esi
-; AVX-NEXT: shrl $31, %esi
-; AVX-NEXT: sarl $2, %edx
-; AVX-NEXT: addl %esi, %edx
-; AVX-NEXT: leal (,%rdx,8), %esi
-; AVX-NEXT: subl %edx, %esi
-; AVX-NEXT: subl %esi, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm2
-; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX-NEXT: vpextrd $2, %xmm1, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %eax, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: leal (,%rcx,8), %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: subl %edx, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX-NEXT: vpextrd $3, %xmm1, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %eax, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: leal (,%rcx,8), %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: subl %edx, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
-; AVX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; AVX-NEXT: vpextrd $1, %xmm2, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %eax, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: leal (,%rcx,8), %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: subl %edx, %eax
-; AVX-NEXT: vmovd %xmm2, %ecx
-; AVX-NEXT: movslq %ecx, %rcx
-; AVX-NEXT: imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: movl %edx, %esi
-; AVX-NEXT: shrl $31, %esi
-; AVX-NEXT: sarl $2, %edx
-; AVX-NEXT: addl %esi, %edx
-; AVX-NEXT: leal (,%rdx,8), %esi
-; AVX-NEXT: subl %edx, %esi
-; AVX-NEXT: subl %esi, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm3
-; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $2, %xmm2, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %eax, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: leal (,%rcx,8), %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: subl %edx, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $3, %xmm2, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %eax, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: leal (,%rcx,8), %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: subl %edx, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2
-; AVX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX-NEXT: vpextrd $1, %xmm2, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %eax, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: leal (,%rcx,8), %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: subl %edx, %eax
-; AVX-NEXT: vmovd %xmm2, %ecx
-; AVX-NEXT: movslq %ecx, %rcx
-; AVX-NEXT: imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: movl %edx, %esi
-; AVX-NEXT: shrl $31, %esi
-; AVX-NEXT: sarl $2, %edx
-; AVX-NEXT: addl %esi, %edx
-; AVX-NEXT: leal (,%rdx,8), %esi
-; AVX-NEXT: subl %edx, %esi
-; AVX-NEXT: subl %esi, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm3
-; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $2, %xmm2, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %eax, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: leal (,%rcx,8), %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: subl %edx, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $3, %xmm2, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %eax, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: leal (,%rcx,8), %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: subl %edx, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2
-; AVX-NEXT: vpextrd $1, %xmm0, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %eax, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: leal (,%rcx,8), %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: subl %edx, %eax
-; AVX-NEXT: vmovd %xmm0, %ecx
-; AVX-NEXT: movslq %ecx, %rcx
-; AVX-NEXT: imulq $-1840700269, %rcx, %rdx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: movl %edx, %esi
-; AVX-NEXT: shrl $31, %esi
-; AVX-NEXT: sarl $2, %edx
-; AVX-NEXT: addl %esi, %edx
-; AVX-NEXT: leal (,%rdx,8), %esi
-; AVX-NEXT: subl %edx, %esi
-; AVX-NEXT: subl %esi, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm3
-; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $2, %xmm0, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %eax, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: leal (,%rcx,8), %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: subl %edx, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $3, %xmm0, %eax
-; AVX-NEXT: cltq
-; AVX-NEXT: imulq $-1840700269, %rax, %rcx # imm = 0x92492493
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: addl %eax, %ecx
-; AVX-NEXT: movl %ecx, %edx
-; AVX-NEXT: shrl $31, %edx
-; AVX-NEXT: sarl $2, %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: leal (,%rcx,8), %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: subl %edx, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm0
-; AVX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX-NEXT: vpbroadcastd {{.*#+}} zmm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027]
+; AVX-NEXT: vpmuldq %zmm1, %zmm0, %zmm2
+; AVX-NEXT: vpshufd {{.*#+}} zmm1 = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; AVX-NEXT: vpshufd {{.*#+}} zmm3 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; AVX-NEXT: vpmuldq %zmm1, %zmm3, %zmm1
+; AVX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31]
+; AVX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; AVX-NEXT: vpaddd %zmm0, %zmm3, %zmm1
+; AVX-NEXT: vpsrld $31, %zmm1, %zmm2
+; AVX-NEXT: vpsrad $2, %zmm1, %zmm1
+; AVX-NEXT: vpaddd %zmm2, %zmm1, %zmm1
+; AVX-NEXT: vpmulld {{.*}}(%rip){1to16}, %zmm1, %zmm1
+; AVX-NEXT: vpsubd %zmm1, %zmm0, %zmm0
; AVX-NEXT: retq
%res = srem <16 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
ret <16 x i32> %res
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll?rev=314221&r1=314220&r2=314221&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll Tue Sep 26 09:43:57 2017
@@ -92,140 +92,17 @@ define <8 x i64> @test_div7_8i64(<8 x i6
define <16 x i32> @test_div7_16i32(<16 x i32> %a) nounwind {
; AVX-LABEL: test_div7_16i32:
; AVX: # BB#0:
-; AVX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; AVX-NEXT: vpextrd $1, %xmm1, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: shrl %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: shrl $2, %eax
-; AVX-NEXT: vmovd %xmm1, %ecx
-; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: shrl %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: shrl $2, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm2
-; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX-NEXT: vpextrd $2, %xmm1, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: shrl %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: shrl $2, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX-NEXT: vpextrd $3, %xmm1, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: shrl %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: shrl $2, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
-; AVX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; AVX-NEXT: vpextrd $1, %xmm2, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: shrl %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: shrl $2, %eax
-; AVX-NEXT: vmovd %xmm2, %ecx
-; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: shrl %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: shrl $2, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm3
-; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $2, %xmm2, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: shrl %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: shrl $2, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $3, %xmm2, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: shrl %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: shrl $2, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2
-; AVX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX-NEXT: vpextrd $1, %xmm2, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: shrl %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: shrl $2, %eax
-; AVX-NEXT: vmovd %xmm2, %ecx
-; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: shrl %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: shrl $2, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm3
-; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $2, %xmm2, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: shrl %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: shrl $2, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $3, %xmm2, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: shrl %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: shrl $2, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2
-; AVX-NEXT: vpextrd $1, %xmm0, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: shrl %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: shrl $2, %eax
-; AVX-NEXT: vmovd %xmm0, %ecx
-; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: shrl %ecx
-; AVX-NEXT: addl %edx, %ecx
-; AVX-NEXT: shrl $2, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm3
-; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $2, %xmm0, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: shrl %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: shrl $2, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $3, %xmm0, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: shrl %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: shrl $2, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm0
-; AVX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX-NEXT: vpbroadcastd {{.*#+}} zmm1 = [613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757]
+; AVX-NEXT: vpmuludq %zmm1, %zmm0, %zmm2
+; AVX-NEXT: vpshufd {{.*#+}} zmm1 = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; AVX-NEXT: vpshufd {{.*#+}} zmm3 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; AVX-NEXT: vpmuludq %zmm1, %zmm3, %zmm1
+; AVX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31]
+; AVX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; AVX-NEXT: vpsubd %zmm3, %zmm0, %zmm0
+; AVX-NEXT: vpsrld $1, %zmm0, %zmm0
+; AVX-NEXT: vpaddd %zmm3, %zmm0, %zmm0
+; AVX-NEXT: vpsrld $2, %zmm0, %zmm0
; AVX-NEXT: retq
%res = udiv <16 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
ret <16 x i32> %res
@@ -1013,204 +890,19 @@ define <8 x i64> @test_rem7_8i64(<8 x i6
define <16 x i32> @test_rem7_16i32(<16 x i32> %a) nounwind {
; AVX-LABEL: test_rem7_16i32:
; AVX: # BB#0:
-; AVX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; AVX-NEXT: vpextrd $1, %xmm1, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: movl %eax, %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: shrl %edx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: shrl $2, %edx
-; AVX-NEXT: leal (,%rdx,8), %ecx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: vmovd %xmm1, %ecx
-; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: movl %ecx, %esi
-; AVX-NEXT: subl %edx, %esi
-; AVX-NEXT: shrl %esi
-; AVX-NEXT: addl %edx, %esi
-; AVX-NEXT: shrl $2, %esi
-; AVX-NEXT: leal (,%rsi,8), %edx
-; AVX-NEXT: subl %esi, %edx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm2
-; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX-NEXT: vpextrd $2, %xmm1, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: movl %eax, %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: shrl %edx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: shrl $2, %edx
-; AVX-NEXT: leal (,%rdx,8), %ecx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX-NEXT: vpextrd $3, %xmm1, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: movl %eax, %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: shrl %edx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: shrl $2, %edx
-; AVX-NEXT: leal (,%rdx,8), %ecx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
-; AVX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; AVX-NEXT: vpextrd $1, %xmm2, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: movl %eax, %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: shrl %edx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: shrl $2, %edx
-; AVX-NEXT: leal (,%rdx,8), %ecx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: vmovd %xmm2, %ecx
-; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: movl %ecx, %esi
-; AVX-NEXT: subl %edx, %esi
-; AVX-NEXT: shrl %esi
-; AVX-NEXT: addl %edx, %esi
-; AVX-NEXT: shrl $2, %esi
-; AVX-NEXT: leal (,%rsi,8), %edx
-; AVX-NEXT: subl %esi, %edx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm3
-; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $2, %xmm2, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: movl %eax, %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: shrl %edx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: shrl $2, %edx
-; AVX-NEXT: leal (,%rdx,8), %ecx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $3, %xmm2, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: movl %eax, %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: shrl %edx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: shrl $2, %edx
-; AVX-NEXT: leal (,%rdx,8), %ecx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2
-; AVX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX-NEXT: vpextrd $1, %xmm2, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: movl %eax, %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: shrl %edx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: shrl $2, %edx
-; AVX-NEXT: leal (,%rdx,8), %ecx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: vmovd %xmm2, %ecx
-; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: movl %ecx, %esi
-; AVX-NEXT: subl %edx, %esi
-; AVX-NEXT: shrl %esi
-; AVX-NEXT: addl %edx, %esi
-; AVX-NEXT: shrl $2, %esi
-; AVX-NEXT: leal (,%rsi,8), %edx
-; AVX-NEXT: subl %esi, %edx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm3
-; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $2, %xmm2, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: movl %eax, %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: shrl %edx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: shrl $2, %edx
-; AVX-NEXT: leal (,%rdx,8), %ecx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $3, %xmm2, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: movl %eax, %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: shrl %edx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: shrl $2, %edx
-; AVX-NEXT: leal (,%rdx,8), %ecx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2
-; AVX-NEXT: vpextrd $1, %xmm0, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: movl %eax, %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: shrl %edx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: shrl $2, %edx
-; AVX-NEXT: leal (,%rdx,8), %ecx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: vmovd %xmm0, %ecx
-; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rdx
-; AVX-NEXT: movl %ecx, %esi
-; AVX-NEXT: subl %edx, %esi
-; AVX-NEXT: shrl %esi
-; AVX-NEXT: addl %edx, %esi
-; AVX-NEXT: shrl $2, %esi
-; AVX-NEXT: leal (,%rsi,8), %edx
-; AVX-NEXT: subl %esi, %edx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm3
-; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $2, %xmm0, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: movl %eax, %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: shrl %edx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: shrl $2, %edx
-; AVX-NEXT: leal (,%rdx,8), %ecx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX-NEXT: vpextrd $3, %xmm0, %eax
-; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925
-; AVX-NEXT: shrq $32, %rcx
-; AVX-NEXT: movl %eax, %edx
-; AVX-NEXT: subl %ecx, %edx
-; AVX-NEXT: shrl %edx
-; AVX-NEXT: addl %ecx, %edx
-; AVX-NEXT: shrl $2, %edx
-; AVX-NEXT: leal (,%rdx,8), %ecx
-; AVX-NEXT: subl %edx, %ecx
-; AVX-NEXT: subl %ecx, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm0
-; AVX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX-NEXT: vpbroadcastd {{.*#+}} zmm1 = [613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757]
+; AVX-NEXT: vpmuludq %zmm1, %zmm0, %zmm2
+; AVX-NEXT: vpshufd {{.*#+}} zmm1 = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; AVX-NEXT: vpshufd {{.*#+}} zmm3 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; AVX-NEXT: vpmuludq %zmm1, %zmm3, %zmm1
+; AVX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31]
+; AVX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; AVX-NEXT: vpsubd %zmm3, %zmm0, %zmm1
+; AVX-NEXT: vpsrld $1, %zmm1, %zmm1
+; AVX-NEXT: vpaddd %zmm3, %zmm1, %zmm1
+; AVX-NEXT: vpsrld $2, %zmm1, %zmm1
+; AVX-NEXT: vpmulld {{.*}}(%rip){1to16}, %zmm1, %zmm1
+; AVX-NEXT: vpsubd %zmm1, %zmm0, %zmm0
; AVX-NEXT: retq
%res = urem <16 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
ret <16 x i32> %res
More information about the llvm-commits
mailing list