[llvm] r331300 - [DAGCombiner] Set the right SDLoc on a newly-created zextload (1/N)
Vedant Kumar via llvm-commits
llvm-commits at lists.llvm.org
Tue May 1 12:26:15 PDT 2018
Author: vedantk
Date: Tue May 1 12:26:15 2018
New Revision: 331300
URL: http://llvm.org/viewvc/llvm-project?rev=331300&view=rev
Log:
[DAGCombiner] Set the right SDLoc on a newly-created zextload (1/N)
Setting the right SDLoc on a newly-created zextload fixes a line table
bug which resulted in non-linear stepping behavior.
Several backend tests contained CHECK lines which relied on the IROrder
inherited from the wrong SDLoc. This patch breaks that dependence where
feasbile and regenerates test cases where not.
In some cases, changing a node's IROrder may alter register allocation
and spill behavior. This can affect performance. I have chosen not to
prevent this by applying a "known good" IROrder to SDLocs, as this may
hide a more general bug in the scheduler, or cause regressions on other
test inputs.
rdar://33755881, Part of: llvm.org/PR37262
Differential Revision: https://reviews.llvm.org/D45995
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-aapcs.ll
llvm/trunk/test/CodeGen/AArch64/arm64-ldp-cluster.ll
llvm/trunk/test/CodeGen/ARM/vector-load.ll
llvm/trunk/test/CodeGen/SPARC/32abi.ll
llvm/trunk/test/CodeGen/SPARC/64abi.ll
llvm/trunk/test/CodeGen/X86/avg.ll
llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll
llvm/trunk/test/CodeGen/X86/fold-zext-trunc.ll
llvm/trunk/test/CodeGen/X86/known-bits-vector.ll
llvm/trunk/test/CodeGen/X86/legalize-shift-64.ll
llvm/trunk/test/CodeGen/X86/load-combine.ll
llvm/trunk/test/CodeGen/X86/mulx32.ll
llvm/trunk/test/CodeGen/X86/promote-vec3.ll
llvm/trunk/test/CodeGen/X86/widen_conv-3.ll
llvm/trunk/test/CodeGen/X86/widen_conv-4.ll
llvm/trunk/test/CodeGen/X86/win-smallparams.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue May 1 12:26:15 2018
@@ -8094,7 +8094,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SD
DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-aapcs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-aapcs.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-aapcs.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-aapcs.ll Tue May 1 12:26:15 2018
@@ -24,36 +24,35 @@ define [2 x i64] @test_i64x2_align(i32,
@var64 = global i64 0, align 8
- ; Check stack slots are 64-bit at all times.
+; Check stack slots are 64-bit at all times.
define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,
i32 %int, i64 %long) {
+; CHECK-LABEL: test_stack_slots:
+; CHECK-DAG: ldr w[[ext1:[0-9]+]], [sp, #24]
+; CHECK-DAG: ldrh w[[ext2:[0-9]+]], [sp, #16]
+; CHECK-DAG: ldrb w[[ext3:[0-9]+]], [sp, #8]
+; CHECK-DAG: ldr x[[ext4:[0-9]+]], [sp, #32]
+; CHECK-DAG: ldrb w[[ext5:[0-9]+]], [sp]
+; CHECK-DAG: and x[[ext5]], x[[ext5]], #0x1
+
%ext_bool = zext i1 %bool to i64
store volatile i64 %ext_bool, i64* @var64, align 8
- ; Part of last store. Blasted scheduler.
-; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32]
-
-; CHECK: ldrb w[[EXT:[0-9]+]], [sp]
-
-; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1
-; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: str x[[ext5]], [{{x[0-9]+}}, :lo12:var64]
%ext_char = zext i8 %char to i64
store volatile i64 %ext_char, i64* @var64, align 8
-; CHECK: ldrb w[[EXT:[0-9]+]], [sp, #8]
-; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: str x[[ext3]], [{{x[0-9]+}}, :lo12:var64]
%ext_short = zext i16 %short to i64
store volatile i64 %ext_short, i64* @var64, align 8
-; CHECK: ldrh w[[EXT:[0-9]+]], [sp, #16]
-; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: str x[[ext2]], [{{x[0-9]+}}, :lo12:var64]
%ext_int = zext i32 %int to i64
store volatile i64 %ext_int, i64* @var64, align 8
-; CHECK: ldr{{b?}} w[[EXT:[0-9]+]], [sp, #24]
-; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: str x[[ext1]], [{{x[0-9]+}}, :lo12:var64]
store volatile i64 %long, i64* @var64, align 8
-; CHECK: str [[LONG]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: str x[[ext4]], [{{x[0-9]+}}, :lo12:var64]
ret void
}
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-ldp-cluster.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-ldp-cluster.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-ldp-cluster.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-ldp-cluster.ll Tue May 1 12:26:15 2018
@@ -67,14 +67,14 @@ define i32 @ldur_int(i32* %a) nounwind {
; Test sext + zext clustering.
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: ldp_half_sext_zext_int:%bb.0
-; CHECK: Cluster ld/st SU(3) - SU(4)
-; CHECK: SU(3): %{{[0-9]+}}:gpr64 = LDRSWui
-; CHECK: SU(4): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui
+; CHECK: Cluster ld/st SU(4) - SU(3)
+; CHECK: SU(3): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui
+; CHECK: SU(4): %{{[0-9]+}}:gpr64 = LDRSWui
; EXYNOSM1: ********** MI Scheduling **********
; EXYNOSM1-LABEL: ldp_half_sext_zext_int:%bb.0
-; EXYNOSM1: Cluster ld/st SU(3) - SU(4)
-; EXYNOSM1: SU(3): %{{[0-9]+}}:gpr64 = LDRSWui
-; EXYNOSM1: SU(4): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui
+; EXYNOSM1: Cluster ld/st SU(4) - SU(3)
+; EXYNOSM1: SU(3): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui
+; EXYNOSM1: SU(4): %{{[0-9]+}}:gpr64 = LDRSWui
define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind {
%tmp0 = load i64, i64* %q, align 4
%tmp = load i32, i32* %p, align 4
Modified: llvm/trunk/test/CodeGen/ARM/vector-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vector-load.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vector-load.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vector-load.ll Tue May 1 12:26:15 2018
@@ -240,9 +240,9 @@ define <4 x i32> @zextload_v8i8tov8i32_f
;CHECK-LABEL: zextload_v8i8tov8i32_fake_update:
;CHECK: ldr r[[PTRREG:[0-9]+]], [r0]
;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r[[PTRREG]]:32]
-;CHECK: add.w r[[INCREG:[0-9]+]], r[[PTRREG]], #16
;CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
;CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
+;CHECK: add.w r[[INCREG:[0-9]+]], r[[PTRREG]], #16
;CHECK: str r[[INCREG]], [r0]
%A = load <4 x i8>*, <4 x i8>** %ptr
%lA = load <4 x i8>, <4 x i8>* %A, align 4
Modified: llvm/trunk/test/CodeGen/SPARC/32abi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SPARC/32abi.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SPARC/32abi.ll (original)
+++ llvm/trunk/test/CodeGen/SPARC/32abi.ll Tue May 1 12:26:15 2018
@@ -178,34 +178,34 @@ define void @call_floatarg(float %f1, do
;; endian, since the 64-bit math needs to be split
; CHECK-LABEL: i64arg:
; CHECK: save %sp, -96, %sp
-; CHECK-BE: ld [%fp+100], %g2
-; CHECK-BE-NEXT: ld [%fp+96], %g3
-; CHECK-BE-NEXT: ld [%fp+92], %g4
+; CHECK-BE: ld [%fp+104], %g2
+; CHECK-BE-NEXT: ld [%fp+100], %g3
+; CHECK-BE-NEXT: ld [%fp+96], %g4
+; CHECK-BE-NEXT: ld [%fp+92], %l0
; CHECK-BE-NEXT: addcc %i1, %i2, %i1
; CHECK-BE-NEXT: addxcc %i0, 0, %i0
; CHECK-BE-NEXT: addcc %i4, %i1, %i1
; CHECK-BE-NEXT: addxcc %i3, %i0, %i0
-; CHECK-BE-NEXT: addcc %g4, %i1, %i1
-; CHECK-BE-NEXT: ld [%fp+104], %i2
+; CHECK-BE-NEXT: addcc %l0, %i1, %i1
; CHECK-BE-NEXT: addxcc %i5, %i0, %i0
+; CHECK-BE-NEXT: addcc %g3, %i1, %i1
+; CHECK-BE-NEXT: addxcc %g4, %i0, %i0
; CHECK-BE-NEXT: addcc %g2, %i1, %i1
-; CHECK-BE-NEXT: addxcc %g3, %i0, %i0
-; CHECK-BE-NEXT: addcc %i2, %i1, %i1
; CHECK-BE-NEXT: addxcc %i0, 0, %i0
;
-; CHECK-LE: ld [%fp+96], %g2
-; CHECK-LE-NEXT: ld [%fp+100], %g3
-; CHECK-LE-NEXT: ld [%fp+92], %g4
+; CHECK-LE: ld [%fp+104], %g2
+; CHECK-LE-NEXT: ld [%fp+96], %g3
+; CHECK-LE-NEXT: ld [%fp+100], %g4
+; CHECK-LE-NEXT: ld [%fp+92], %l0
; CHECK-LE-NEXT: addcc %i0, %i2, %i0
; CHECK-LE-NEXT: addxcc %i1, 0, %i1
; CHECK-LE-NEXT: addcc %i3, %i0, %i0
; CHECK-LE-NEXT: addxcc %i4, %i1, %i1
; CHECK-LE-NEXT: addcc %i5, %i0, %i0
-; CHECK-LE-NEXT: ld [%fp+104], %i2
+; CHECK-LE-NEXT: addxcc %l0, %i1, %i1
+; CHECK-LE-NEXT: addcc %g3, %i0, %i0
; CHECK-LE-NEXT: addxcc %g4, %i1, %i1
; CHECK-LE-NEXT: addcc %g2, %i0, %i0
-; CHECK-LE-NEXT: addxcc %g3, %i1, %i1
-; CHECK-LE-NEXT: addcc %i2, %i0, %i0
; CHECK-LE-NEXT: addxcc %i1, 0, %i1
; CHECK-NEXT: restore
Modified: llvm/trunk/test/CodeGen/SPARC/64abi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SPARC/64abi.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SPARC/64abi.ll (original)
+++ llvm/trunk/test/CodeGen/SPARC/64abi.ll Tue May 1 12:26:15 2018
@@ -63,14 +63,14 @@ define void @call_intarg(i32 %i0, i8* %i
; HARD: faddd %f6,
; HARD: fadds %f31, [[F]]
; SOFT: save %sp, -176, %sp
+; SOFT: ld [%fp+2299], %i4
+; SOFT: ld [%fp+2307], %i5
; SOFT: srl %i0, 0, %o0
; SOFT-NEXT: call __extendsfdf2
; SOFT: mov %o0, %o1
; SOFT: mov %i1, %o0
; SOFT: mov %i2, %o0
; SOFT: mov %i3, %o0
-; SOFT: ld [%fp+2299], %o0
-; SOFT: ld [%fp+2307], %o1
define double @floatarg(float %a0, ; %f1
double %a1, ; %d2
double %a2, ; %d4
Modified: llvm/trunk/test/CodeGen/X86/avg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avg.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avg.ll Tue May 1 12:26:15 2018
@@ -2408,71 +2408,70 @@ define void @not_avg_v16i8_wide_constant
; AVX2-NEXT: pushq %r12
; AVX2-NEXT: pushq %rbx
; AVX2-NEXT: subq $16, %rsp
+; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4
+; AVX2-NEXT: vpextrq $1, %xmm4, %rbx
+; AVX2-NEXT: vmovq %xmm4, %rbp
+; AVX2-NEXT: vpextrq $1, %xmm3, %rdi
+; AVX2-NEXT: vmovq %xmm3, %rcx
+; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
+; AVX2-NEXT: vpextrq $1, %xmm3, %rdx
+; AVX2-NEXT: vmovq %xmm3, %r9
+; AVX2-NEXT: vpextrq $1, %xmm2, %r11
+; AVX2-NEXT: vmovq %xmm2, %r12
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
-; AVX2-NEXT: vpextrq $1, %xmm3, %rcx
-; AVX2-NEXT: vmovq %xmm3, %rax
-; AVX2-NEXT: vpextrq $1, %xmm2, %rbx
-; AVX2-NEXT: vmovq %xmm2, %rdx
+; AVX2-NEXT: vpextrq $1, %xmm3, %r15
+; AVX2-NEXT: vmovq %xmm3, %rsi
+; AVX2-NEXT: vpextrq $1, %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: vmovq %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpextrq $1, %xmm2, %rdi
-; AVX2-NEXT: vmovq %xmm2, %r11
-; AVX2-NEXT: vpextrq $1, %xmm1, %r13
-; AVX2-NEXT: vmovq %xmm1, %r12
-; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpextrq $1, %xmm2, %rbp
-; AVX2-NEXT: vmovq %xmm2, %r10
-; AVX2-NEXT: vpextrq $1, %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
-; AVX2-NEXT: vmovq %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpextrq $1, %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
-; AVX2-NEXT: vmovq %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
-; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpextrq $1, %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: vmovq %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4
-; AVX2-NEXT: vpextrq $1, %xmm4, %r15
-; AVX2-NEXT: addq %rcx, %r15
-; AVX2-NEXT: vmovq %xmm4, %r9
-; AVX2-NEXT: addq %rax, %r9
-; AVX2-NEXT: vpextrq $1, %xmm3, %rax
+; AVX2-NEXT: vpextrq $1, %xmm4, %rax
; AVX2-NEXT: addq %rbx, %rax
; AVX2-NEXT: movq %rax, %rbx
-; AVX2-NEXT: vmovq %xmm3, %rax
-; AVX2-NEXT: addq %rdx, %rax
-; AVX2-NEXT: movq %rax, %r8
+; AVX2-NEXT: vmovq %xmm4, %r13
+; AVX2-NEXT: addq %rbp, %r13
+; AVX2-NEXT: vpextrq $1, %xmm3, %r10
+; AVX2-NEXT: addq %rdi, %r10
+; AVX2-NEXT: vmovq %xmm3, %r14
+; AVX2-NEXT: addq %rcx, %r14
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX2-NEXT: vpextrq $1, %xmm3, %rax
-; AVX2-NEXT: addq %rdi, %rax
+; AVX2-NEXT: addq %rdx, %rax
; AVX2-NEXT: movq %rax, %rcx
-; AVX2-NEXT: vmovq %xmm3, %rax
+; AVX2-NEXT: vmovq %xmm3, %r8
+; AVX2-NEXT: addq %r9, %r8
+; AVX2-NEXT: vpextrq $1, %xmm2, %rax
; AVX2-NEXT: addq %r11, %rax
; AVX2-NEXT: movq %rax, %r11
-; AVX2-NEXT: vpextrq $1, %xmm2, %r14
-; AVX2-NEXT: addq %r13, %r14
; AVX2-NEXT: vmovq %xmm2, %rax
; AVX2-NEXT: addq %r12, %rax
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX2-NEXT: vpextrq $1, %xmm3, %rax
-; AVX2-NEXT: addq %rbp, %rax
+; AVX2-NEXT: addq %r15, %rax
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: vmovq %xmm3, %rax
-; AVX2-NEXT: addq %r10, %rax
+; AVX2-NEXT: addq %rsi, %rax
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: vpextrq $1, %xmm2, %rax
; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
@@ -2480,36 +2479,36 @@ define void @not_avg_v16i8_wide_constant
; AVX2-NEXT: vmovq %xmm2, %rax
; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
-; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX2-NEXT: vpextrq $1, %xmm2, %rbp
; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
-; AVX2-NEXT: vmovq %xmm2, %r10
-; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
-; AVX2-NEXT: vpextrq $1, %xmm0, %rax
-; AVX2-NEXT: vpextrq $1, %xmm1, %rdi
+; AVX2-NEXT: vmovq %xmm2, %r9
+; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; AVX2-NEXT: vpextrq $1, %xmm1, %rax
+; AVX2-NEXT: vpextrq $1, %xmm0, %rdi
; AVX2-NEXT: addq %rax, %rdi
-; AVX2-NEXT: vmovq %xmm0, %rdx
-; AVX2-NEXT: vmovq %xmm1, %rsi
+; AVX2-NEXT: vmovq %xmm1, %rdx
+; AVX2-NEXT: vmovq %xmm0, %rsi
; AVX2-NEXT: addq %rdx, %rsi
-; AVX2-NEXT: addq $-1, %r15
-; AVX2-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: addq $-1, %rbx
+; AVX2-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl $0, %eax
; AVX2-NEXT: adcq $-1, %rax
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX2-NEXT: addq $-1, %r9
-; AVX2-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: addq $-1, %r13
+; AVX2-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl $0, %eax
; AVX2-NEXT: adcq $-1, %rax
; AVX2-NEXT: movq %rax, (%rsp) # 8-byte Spill
-; AVX2-NEXT: addq $-1, %rbx
-; AVX2-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: addq $-1, %r10
+; AVX2-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl $0, %eax
; AVX2-NEXT: adcq $-1, %rax
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX2-NEXT: addq $-1, %r8
-; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: addq $-1, %r14
+; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl $0, %r13d
; AVX2-NEXT: adcq $-1, %r13
; AVX2-NEXT: addq $-1, %rcx
@@ -2517,12 +2516,12 @@ define void @not_avg_v16i8_wide_constant
; AVX2-NEXT: movl $0, %eax
; AVX2-NEXT: adcq $-1, %rax
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX2-NEXT: addq $-1, %r11
-; AVX2-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: addq $-1, %r8
+; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl $0, %r15d
; AVX2-NEXT: adcq $-1, %r15
-; AVX2-NEXT: addq $-1, %r14
-; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: addq $-1, %r11
+; AVX2-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl $0, %ebx
; AVX2-NEXT: adcq $-1, %rbx
; AVX2-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
@@ -2546,9 +2545,9 @@ define void @not_avg_v16i8_wide_constant
; AVX2-NEXT: addq $-1, %rbp
; AVX2-NEXT: movl $0, %r14d
; AVX2-NEXT: adcq $-1, %r14
-; AVX2-NEXT: addq $-1, %r10
-; AVX2-NEXT: movl $0, %r9d
-; AVX2-NEXT: adcq $-1, %r9
+; AVX2-NEXT: addq $-1, %r9
+; AVX2-NEXT: movl $0, %r10d
+; AVX2-NEXT: adcq $-1, %r10
; AVX2-NEXT: addq $-1, %rdi
; AVX2-NEXT: movl $0, %edx
; AVX2-NEXT: adcq $-1, %rdx
@@ -2558,7 +2557,7 @@ define void @not_avg_v16i8_wide_constant
; AVX2-NEXT: shldq $63, %rsi, %rax
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: shldq $63, %rdi, %rdx
-; AVX2-NEXT: shldq $63, %r10, %r9
+; AVX2-NEXT: shldq $63, %r9, %r10
; AVX2-NEXT: shldq $63, %rbp, %r14
; AVX2-NEXT: shldq $63, %rcx, %r11
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
@@ -2566,8 +2565,8 @@ define void @not_avg_v16i8_wide_constant
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; AVX2-NEXT: shldq $63, %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
-; AVX2-NEXT: shldq $63, %rcx, %r10
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; AVX2-NEXT: shldq $63, %rcx, %r9
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; AVX2-NEXT: shldq $63, %rcx, %r8
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
@@ -2596,13 +2595,13 @@ define void @not_avg_v16i8_wide_constant
; AVX2-NEXT: vmovq %r15, %xmm13
; AVX2-NEXT: vmovq %rbx, %xmm14
; AVX2-NEXT: vmovq %r8, %xmm15
-; AVX2-NEXT: vmovq %r10, %xmm0
+; AVX2-NEXT: vmovq %r9, %xmm0
; AVX2-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Folded Reload
; AVX2-NEXT: # xmm1 = mem[0],zero
; AVX2-NEXT: vmovq %r12, %xmm2
; AVX2-NEXT: vmovq %r11, %xmm3
; AVX2-NEXT: vmovq %r14, %xmm4
-; AVX2-NEXT: vmovq %r9, %xmm5
+; AVX2-NEXT: vmovq %r10, %xmm5
; AVX2-NEXT: vmovq %rdx, %xmm6
; AVX2-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 8-byte Folded Reload
; AVX2-NEXT: # xmm7 = mem[0],zero
@@ -2658,58 +2657,58 @@ define void @not_avg_v16i8_wide_constant
; AVX512-NEXT: pushq %rbx
; AVX512-NEXT: subq $24, %rsp
; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
+; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
+; AVX512-NEXT: vextracti128 $1, %ymm3, %xmm4
+; AVX512-NEXT: vpextrq $1, %xmm4, %rbx
+; AVX512-NEXT: vmovq %xmm4, %rbp
+; AVX512-NEXT: vpextrq $1, %xmm3, %rdi
+; AVX512-NEXT: vmovq %xmm3, %rsi
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
-; AVX512-NEXT: vpextrq $1, %xmm3, %rcx
-; AVX512-NEXT: vmovq %xmm3, %rax
-; AVX512-NEXT: vpextrq $1, %xmm2, %rbx
-; AVX512-NEXT: vmovq %xmm2, %rbp
-; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512-NEXT: vpextrq $1, %xmm2, %rdi
-; AVX512-NEXT: vmovq %xmm2, %r8
-; AVX512-NEXT: vpextrq $1, %xmm1, %r13
-; AVX512-NEXT: vmovq %xmm1, %r12
+; AVX512-NEXT: vpextrq $1, %xmm3, %rdx
+; AVX512-NEXT: vmovq %xmm3, %r8
+; AVX512-NEXT: vpextrq $1, %xmm2, %r13
+; AVX512-NEXT: vmovq %xmm2, %r12
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512-NEXT: vpextrq $1, %xmm2, %r15
-; AVX512-NEXT: vmovq %xmm2, %r14
-; AVX512-NEXT: vpextrq $1, %xmm1, %rdx
-; AVX512-NEXT: vmovq %xmm1, %r9
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
+; AVX512-NEXT: vpextrq $1, %xmm3, %r15
+; AVX512-NEXT: vmovq %xmm3, %r14
+; AVX512-NEXT: vpextrq $1, %xmm2, %r9
+; AVX512-NEXT: vmovq %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpextrq $1, %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
-; AVX512-NEXT: vmovq %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
-; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX512-NEXT: vpextrq $1, %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: vmovq %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
; AVX512-NEXT: vextracti128 $1, %ymm3, %xmm4
-; AVX512-NEXT: vpextrq $1, %xmm4, %rsi
-; AVX512-NEXT: addq %rcx, %rsi
-; AVX512-NEXT: vmovq %xmm4, %rcx
-; AVX512-NEXT: addq %rax, %rcx
-; AVX512-NEXT: vpextrq $1, %xmm3, %rax
+; AVX512-NEXT: vpextrq $1, %xmm4, %rax
; AVX512-NEXT: addq %rbx, %rax
; AVX512-NEXT: movq %rax, %rbx
-; AVX512-NEXT: vmovq %xmm3, %rax
+; AVX512-NEXT: vmovq %xmm4, %rax
; AVX512-NEXT: addq %rbp, %rax
-; AVX512-NEXT: movq %rax, %r10
-; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
-; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
+; AVX512-NEXT: movq %rax, %rbp
; AVX512-NEXT: vpextrq $1, %xmm3, %rax
; AVX512-NEXT: addq %rdi, %rax
; AVX512-NEXT: movq %rax, %rdi
+; AVX512-NEXT: vmovq %xmm3, %r10
+; AVX512-NEXT: addq %rsi, %r10
+; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
+; AVX512-NEXT: vpextrq $1, %xmm3, %rcx
+; AVX512-NEXT: addq %rdx, %rcx
; AVX512-NEXT: vmovq %xmm3, %rax
; AVX512-NEXT: addq %r8, %rax
; AVX512-NEXT: movq %rax, %r8
-; AVX512-NEXT: vpextrq $1, %xmm2, %rbp
-; AVX512-NEXT: addq %r13, %rbp
+; AVX512-NEXT: vpextrq $1, %xmm2, %rsi
+; AVX512-NEXT: addq %r13, %rsi
; AVX512-NEXT: vmovq %xmm2, %r11
; AVX512-NEXT: addq %r12, %r11
; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm1
@@ -2724,10 +2723,10 @@ define void @not_avg_v16i8_wide_constant
; AVX512-NEXT: addq %r14, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: vpextrq $1, %xmm2, %rax
-; AVX512-NEXT: addq %rdx, %rax
+; AVX512-NEXT: addq %r9, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: vmovq %xmm2, %rax
-; AVX512-NEXT: addq %r9, %rax
+; AVX512-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2
@@ -2742,28 +2741,28 @@ define void @not_avg_v16i8_wide_constant
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vmovq %xmm1, %rdx
; AVX512-NEXT: addq %rax, %rdx
-; AVX512-NEXT: addq $-1, %rsi
-; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: addq $-1, %rbx
+; AVX512-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX512-NEXT: addq $-1, %rcx
-; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: addq $-1, %rbp
+; AVX512-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX512-NEXT: addq $-1, %rbx
-; AVX512-NEXT: movq %rbx, (%rsp) # 8-byte Spill
+; AVX512-NEXT: addq $-1, %rdi
+; AVX512-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
-; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq %rax, (%rsp) # 8-byte Spill
; AVX512-NEXT: addq $-1, %r10
; AVX512-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX512-NEXT: addq $-1, %rdi
-; AVX512-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: addq $-1, %rcx
+; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -2772,8 +2771,8 @@ define void @not_avg_v16i8_wide_constant
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX512-NEXT: addq $-1, %rbp
-; AVX512-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: addq $-1, %rsi
+; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %r13d
; AVX512-NEXT: adcq $-1, %r13
; AVX512-NEXT: addq $-1, %r11
@@ -2833,8 +2832,8 @@ define void @not_avg_v16i8_wide_constant
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX512-NEXT: shldq $63, %rdx, %rax
-; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
-; AVX512-NEXT: movq (%rsp), %rdx # 8-byte Reload
+; AVX512-NEXT: movq (%rsp), %r14 # 8-byte Reload
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX512-NEXT: shldq $63, %rdx, %r14
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
Modified: llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/dagcombine-cse.ll Tue May 1 12:26:15 2018
@@ -24,13 +24,13 @@ define i32 @t(i8* %ref_frame_ptr, i32 %r
; X64-NEXT: imull %ecx, %esi
; X64-NEXT: leal (%rsi,%rdx), %eax
; X64-NEXT: cltq
+; X64-NEXT: movl (%rdi,%rax), %eax
; X64-NEXT: leal 4(%rsi,%rdx), %ecx
; X64-NEXT: movslq %ecx, %rcx
; X64-NEXT: movzwl (%rdi,%rcx), %ecx
; X64-NEXT: shlq $32, %rcx
-; X64-NEXT: movl (%rdi,%rax), %eax
-; X64-NEXT: orq %rcx, %rax
-; X64-NEXT: movq %rax, %xmm0
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: movq %rcx, %xmm0
; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7]
; X64-NEXT: movd %xmm0, %eax
Modified: llvm/trunk/test/CodeGen/X86/fold-zext-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-zext-trunc.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fold-zext-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fold-zext-trunc.ll Tue May 1 12:26:15 2018
@@ -1,4 +1,5 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s | FileCheck %s -check-prefix=ASM
+; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -stop-after livedebugvalues -o - | FileCheck %s -check-prefix=MIR
; PR9055
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i686-pc-linux-gnu"
@@ -7,17 +8,51 @@ target triple = "i686-pc-linux-gnu"
@g_98 = common global %struct.S0 zeroinitializer, align 4
-define void @foo() nounwind {
-; CHECK: movzbl
-; CHECK-NOT: movzbl
-; CHECK: calll
+define void @foo() nounwind !dbg !5 {
+; ASM: movzbl
+; ASM-NOT: movzbl
+; ASM: calll
entry:
- %tmp17 = load i8, i8* getelementptr inbounds (%struct.S0, %struct.S0* @g_98, i32 0, i32 1, i32 0), align 4
- %tmp54 = zext i8 %tmp17 to i32
- %foo = load i32, i32* bitcast (i8* getelementptr inbounds (%struct.S0, %struct.S0* @g_98, i32 0, i32 1, i32 0) to i32*), align 4
- %conv.i = trunc i32 %foo to i8
- tail call void @func_12(i32 %tmp54, i8 zeroext %conv.i) nounwind
- ret void
+ %tmp17 = load i8, i8* getelementptr inbounds (%struct.S0, %struct.S0* @g_98, i32 0, i32 1, i32 0), align 4, !dbg !14
+ %tmp54 = zext i8 %tmp17 to i32, !dbg !15
+ %foo = load i32, i32* bitcast (i8* getelementptr inbounds (%struct.S0, %struct.S0* @g_98, i32 0, i32 1, i32 0) to i32*), align 4, !dbg !16
+; MIR: renamable $edi = MOVZX32rr8 renamable $al, debug-location !16
+ %conv.i = trunc i32 %foo to i8, !dbg !17
+
+ tail call void @func_12(i32 %tmp54, i8 zeroext %conv.i) #0, !dbg !18
+ call void @llvm.dbg.value(metadata i8 %tmp17, metadata !8, metadata !DIExpression()), !dbg !14
+ call void @llvm.dbg.value(metadata i32 %tmp54, metadata !10, metadata !DIExpression()), !dbg !15
+ call void @llvm.dbg.value(metadata i32 %foo, metadata !12, metadata !DIExpression()), !dbg !16
+ call void @llvm.dbg.value(metadata i8 %conv.i, metadata !13, metadata !DIExpression()), !dbg !17
+ ret void, !dbg !19
}
declare void @func_12(i32, i8 zeroext)
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.debugify = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "/Users/vsk/src/llvm.org-master/llvm/test/CodeGen/X86/fold-zext-trunc.ll", directory: "/")
+!2 = !{}
+!3 = !{i32 6}
+!4 = !{i32 4}
+!5 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: null, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, variables: !7)
+!6 = !DISubroutineType(types: !2)
+!7 = !{!8, !10, !12, !13}
+!8 = !DILocalVariable(name: "1", scope: !5, file: !1, line: 1, type: !9)
+!9 = !DIBasicType(name: "ty8", size: 8, encoding: DW_ATE_unsigned)
+!10 = !DILocalVariable(name: "2", scope: !5, file: !1, line: 2, type: !11)
+!11 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned)
+!12 = !DILocalVariable(name: "3", scope: !5, file: !1, line: 3, type: !11)
+!13 = !DILocalVariable(name: "4", scope: !5, file: !1, line: 4, type: !9)
+!14 = !DILocation(line: 1, column: 1, scope: !5)
+!15 = !DILocation(line: 2, column: 1, scope: !5)
+!16 = !DILocation(line: 3, column: 1, scope: !5)
+!17 = !DILocation(line: 4, column: 1, scope: !5)
+!18 = !DILocation(line: 5, column: 1, scope: !5)
+!19 = !DILocation(line: 6, column: 1, scope: !5)
+!20 = !{i32 2, !"Debug Info Version", i32 3}
+!llvm.module.flags = !{!20}
Modified: llvm/trunk/test/CodeGen/X86/known-bits-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-bits-vector.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-bits-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-bits-vector.ll Tue May 1 12:26:15 2018
@@ -51,8 +51,8 @@ define <4 x float> @knownbits_insert_uit
; X32: # %bb.0:
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: vmovd %eax, %xmm0
-; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; X32-NEXT: vmovd %ecx, %xmm0
+; X32-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
; X32-NEXT: retl
Modified: llvm/trunk/test/CodeGen/X86/legalize-shift-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/legalize-shift-64.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/legalize-shift-64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/legalize-shift-64.ll Tue May 1 12:26:15 2018
@@ -4,8 +4,8 @@
define i64 @test1(i32 %xx, i32 %test) nounwind {
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
; CHECK-NEXT: andb $7, %cl
; CHECK-NEXT: movl %edx, %eax
; CHECK-NEXT: shll %cl, %eax
Modified: llvm/trunk/test/CodeGen/X86/load-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/load-combine.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/load-combine.ll (original)
+++ llvm/trunk/test/CodeGen/X86/load-combine.ll Tue May 1 12:26:15 2018
@@ -915,7 +915,7 @@ define i32 @load_i32_by_i8_base_offset_i
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl 12(%eax,%ecx), %eax
+; CHECK-NEXT: movl 12(%ecx,%eax), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_base_offset_index:
@@ -960,7 +960,7 @@ define i32 @load_i32_by_i8_base_offset_i
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl 13(%eax,%ecx), %eax
+; CHECK-NEXT: movl 13(%ecx,%eax), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
@@ -1016,7 +1016,7 @@ define i32 @load_i32_by_i8_zaext_loads(i
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl 12(%eax,%ecx), %eax
+; CHECK-NEXT: movl 12(%ecx,%eax), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
@@ -1072,7 +1072,7 @@ define i32 @load_i32_by_i8_zsext_loads(i
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl 12(%eax,%ecx), %eax
+; CHECK-NEXT: movl 12(%ecx,%eax), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
Modified: llvm/trunk/test/CodeGen/X86/mulx32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mulx32.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mulx32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mulx32.ll Tue May 1 12:26:15 2018
@@ -17,8 +17,8 @@ define i64 @f1(i32 %a, i32 %b) {
define i64 @f2(i32 %a, i32* %p) {
; CHECK-LABEL: f2:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: mulxl (%eax), %eax, %edx
; CHECK-NEXT: retl
%b = load i32, i32* %p
Modified: llvm/trunk/test/CodeGen/X86/promote-vec3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/promote-vec3.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/promote-vec3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/promote-vec3.ll Tue May 1 12:26:15 2018
@@ -9,10 +9,10 @@ define <3 x i16> @zext_i8(<3 x i8>) {
; SSE3-LABEL: zext_i8:
; SSE3: # %bb.0:
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; SSE3-NEXT: movd %eax, %xmm0
-; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; SSE3-NEXT: pinsrw $1, %eax, %xmm0
-; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; SSE3-NEXT: movd %edx, %xmm0
+; SSE3-NEXT: pinsrw $1, %ecx, %xmm0
; SSE3-NEXT: pinsrw $2, %eax, %xmm0
; SSE3-NEXT: pextrw $0, %xmm0, %eax
; SSE3-NEXT: pextrw $1, %xmm0, %edx
@@ -71,10 +71,10 @@ define <3 x i16> @sext_i8(<3 x i8>) {
; SSE3-LABEL: sext_i8:
; SSE3: # %bb.0:
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; SSE3-NEXT: movd %eax, %xmm0
-; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; SSE3-NEXT: pinsrw $1, %eax, %xmm0
-; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; SSE3-NEXT: movd %edx, %xmm0
+; SSE3-NEXT: pinsrw $1, %ecx, %xmm0
; SSE3-NEXT: pinsrw $2, %eax, %xmm0
; SSE3-NEXT: psllw $8, %xmm0
; SSE3-NEXT: psraw $8, %xmm0
Modified: llvm/trunk/test/CodeGen/X86/widen_conv-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_conv-3.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_conv-3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_conv-3.ll Tue May 1 12:26:15 2018
@@ -60,13 +60,13 @@ define void @convert_v3i8_to_v3f32(<3 x
; X86-SSE2-NEXT: movd %edx, %xmm0
; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx
; X86-SSE2-NEXT: movdqa %xmm0, (%esp)
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: shll $8, %edx
; X86-SSE2-NEXT: movzbl (%esp), %esi
; X86-SSE2-NEXT: orl %edx, %esi
; X86-SSE2-NEXT: movd %esi, %xmm0
-; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx
; X86-SSE2-NEXT: pinsrw $1, %ecx, %xmm0
; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
@@ -108,13 +108,13 @@ define void @convert_v3i8_to_v3f32(<3 x
; X64-SSE2-NEXT: movq %rax, %xmm0
; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; X64-SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; X64-SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE2-NEXT: shll $8, %eax
-; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
-; X64-SSE2-NEXT: orl %eax, %ecx
-; X64-SSE2-NEXT: movd %ecx, %xmm0
; X64-SSE2-NEXT: movzbl 2(%rsi), %eax
+; X64-SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; X64-SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
+; X64-SSE2-NEXT: shll $8, %ecx
+; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; X64-SSE2-NEXT: orl %ecx, %edx
+; X64-SSE2-NEXT: movd %edx, %xmm0
; X64-SSE2-NEXT: pinsrw $1, %eax, %xmm0
; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
Modified: llvm/trunk/test/CodeGen/X86/widen_conv-4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_conv-4.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_conv-4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_conv-4.ll Tue May 1 12:26:15 2018
@@ -86,13 +86,13 @@ define void @convert_v3i8_to_v3f32(<3 x
; X86-SSE2-NEXT: movd %edx, %xmm0
; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx
; X86-SSE2-NEXT: movdqa %xmm0, (%esp)
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: shll $8, %edx
; X86-SSE2-NEXT: movzbl (%esp), %esi
; X86-SSE2-NEXT: orl %edx, %esi
; X86-SSE2-NEXT: movd %esi, %xmm0
-; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx
; X86-SSE2-NEXT: pinsrw $1, %ecx, %xmm0
; X86-SSE2-NEXT: pxor %xmm1, %xmm1
; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
@@ -133,13 +133,13 @@ define void @convert_v3i8_to_v3f32(<3 x
; X64-SSE2-NEXT: movq %rax, %xmm0
; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; X64-SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; X64-SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE2-NEXT: shll $8, %eax
-; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
-; X64-SSE2-NEXT: orl %eax, %ecx
-; X64-SSE2-NEXT: movd %ecx, %xmm0
; X64-SSE2-NEXT: movzbl 2(%rsi), %eax
+; X64-SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; X64-SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
+; X64-SSE2-NEXT: shll $8, %ecx
+; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; X64-SSE2-NEXT: orl %ecx, %edx
+; X64-SSE2-NEXT: movd %edx, %xmm0
; X64-SSE2-NEXT: pinsrw $1, %eax, %xmm0
; X64-SSE2-NEXT: pxor %xmm1, %xmm1
; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
Modified: llvm/trunk/test/CodeGen/X86/win-smallparams.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/win-smallparams.ll?rev=331300&r1=331299&r2=331300&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/win-smallparams.ll (original)
+++ llvm/trunk/test/CodeGen/X86/win-smallparams.ll Tue May 1 12:26:15 2018
@@ -57,11 +57,14 @@ entry:
; WIN32: calll _manyargs
; WIN32-LABEL: _manyargs:
-; WIN32-DAG: movsbl 4(%esp),
-; WIN32-DAG: movswl 8(%esp),
-; WIN32-DAG: movzbl 12(%esp),
-; WIN32-DAG: movzwl 16(%esp),
-; WIN32-DAG: movzbl 20(%esp),
-; WIN32-DAG: movzwl 24(%esp),
+; WIN32: pushl %ebx
+; WIN32: pushl %edi
+; WIN32: pushl %esi
+; WIN32-DAG: movsbl 16(%esp),
+; WIN32-DAG: movswl 20(%esp),
+; WIN32-DAG: movzbl 24(%esp),
+; WIN32-DAG: movzwl 28(%esp),
+; WIN32-DAG: movzbl 32(%esp),
+; WIN32-DAG: movzwl 36(%esp),
; WIN32: retl
More information about the llvm-commits
mailing list