[llvm] b0319ab - [PR52475] Ensure a correct chain in copies to/from hidden sret parameter
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 13 02:56:05 PST 2021
Author: Fraser Cormack
Date: 2021-12-13T10:46:32Z
New Revision: b0319ab79bf59669803cc2475fae1d12f8eeaca9
URL: https://github.com/llvm/llvm-project/commit/b0319ab79bf59669803cc2475fae1d12f8eeaca9
DIFF: https://github.com/llvm/llvm-project/commit/b0319ab79bf59669803cc2475fae1d12f8eeaca9.diff
LOG: [PR52475] Ensure a correct chain in copies to/from hidden sret parameter
This patch fixes an issue during SelectionDAG construction. When the
target is unable to lower the function's return value, a hidden sret
parameter is created. It is initialized and copied to a stored variable
(DemoteRegister) with CopyToReg and is later fetched with
CopyFromReg. The bug is that the chains used for each copy are
inconsistent, and thus in rare cases the scheduler may issue them out of
order.
The fix is to ensure that the CopyFromReg uses the DAG root which is set
as the chain corresponding to the initial CopyToReg.
Fixes https://llvm.org/PR52475
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D114795
Added:
llvm/test/CodeGen/RISCV/rvv/pr52475.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
llvm/test/CodeGen/WebAssembly/multi-return.ll
llvm/test/CodeGen/WebAssembly/simd-conversions.ll
llvm/test/CodeGen/X86/addcarry.ll
llvm/test/CodeGen/X86/fp128-cast.ll
llvm/test/CodeGen/X86/i128-add.ll
llvm/test/CodeGen/X86/masked_load.ll
llvm/test/CodeGen/X86/mul128.ll
llvm/test/CodeGen/X86/pmulh.ll
llvm/test/CodeGen/X86/scheduler-backtracking.ll
llvm/test/CodeGen/X86/subcarry.ll
llvm/test/CodeGen/X86/umul-with-overflow.ll
llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e511663337f3d..d0e2b7ee882d3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1920,8 +1920,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
DAG.getDataLayout().getAllocaAddrSpace()),
PtrValueVTs);
- SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
- DemoteReg, PtrValueVTs[0]);
+ SDValue RetPtr =
+ DAG.getCopyFromReg(Chain, getCurSDLoc(), DemoteReg, PtrValueVTs[0]);
SDValue RetOp = getValue(I.getOperand(0));
SmallVector<EVT, 4> ValueVTs, MemVTs;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
index 6fe4cae6ebcef..1afa7b94221ae 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
@@ -101,56 +101,56 @@ define fastcc <64 x i32> @ret_split_v64i32(<64 x i32>* %x) {
define fastcc <128 x i32> @ret_split_v128i32(<128 x i32>* %x) {
; LMULMAX8-LABEL: ret_split_v128i32:
; LMULMAX8: # %bb.0:
-; LMULMAX8-NEXT: li a2, 32
-; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; LMULMAX8-NEXT: vle32.v v8, (a1)
; LMULMAX8-NEXT: addi a2, a1, 128
-; LMULMAX8-NEXT: vle32.v v16, (a2)
-; LMULMAX8-NEXT: addi a2, a1, 384
-; LMULMAX8-NEXT: vle32.v v24, (a2)
-; LMULMAX8-NEXT: addi a1, a1, 256
-; LMULMAX8-NEXT: vle32.v v0, (a1)
+; LMULMAX8-NEXT: li a3, 32
+; LMULMAX8-NEXT: vsetvli zero, a3, e32, m8, ta, mu
+; LMULMAX8-NEXT: vle32.v v8, (a2)
+; LMULMAX8-NEXT: addi a2, a1, 256
+; LMULMAX8-NEXT: vle32.v v16, (a1)
+; LMULMAX8-NEXT: addi a1, a1, 384
+; LMULMAX8-NEXT: vle32.v v24, (a1)
+; LMULMAX8-NEXT: vle32.v v0, (a2)
+; LMULMAX8-NEXT: vse32.v v16, (a0)
; LMULMAX8-NEXT: addi a1, a0, 384
; LMULMAX8-NEXT: vse32.v v24, (a1)
; LMULMAX8-NEXT: addi a1, a0, 256
; LMULMAX8-NEXT: vse32.v v0, (a1)
-; LMULMAX8-NEXT: addi a1, a0, 128
-; LMULMAX8-NEXT: vse32.v v16, (a1)
+; LMULMAX8-NEXT: addi a0, a0, 128
; LMULMAX8-NEXT: vse32.v v8, (a0)
; LMULMAX8-NEXT: ret
;
; LMULMAX4-LABEL: ret_split_v128i32:
; LMULMAX4: # %bb.0:
-; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, mu
-; LMULMAX4-NEXT: vle32.v v8, (a1)
; LMULMAX4-NEXT: addi a2, a1, 64
-; LMULMAX4-NEXT: vle32.v v12, (a2)
+; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; LMULMAX4-NEXT: vle32.v v8, (a2)
; LMULMAX4-NEXT: addi a2, a1, 128
-; LMULMAX4-NEXT: vle32.v v16, (a2)
+; LMULMAX4-NEXT: vle32.v v12, (a2)
; LMULMAX4-NEXT: addi a2, a1, 192
-; LMULMAX4-NEXT: vle32.v v20, (a2)
+; LMULMAX4-NEXT: vle32.v v16, (a2)
; LMULMAX4-NEXT: addi a2, a1, 256
-; LMULMAX4-NEXT: vle32.v v24, (a2)
+; LMULMAX4-NEXT: vle32.v v20, (a2)
; LMULMAX4-NEXT: addi a2, a1, 320
-; LMULMAX4-NEXT: vle32.v v28, (a2)
-; LMULMAX4-NEXT: addi a2, a1, 448
-; LMULMAX4-NEXT: vle32.v v0, (a2)
-; LMULMAX4-NEXT: addi a1, a1, 384
-; LMULMAX4-NEXT: vle32.v v4, (a1)
+; LMULMAX4-NEXT: vle32.v v24, (a2)
+; LMULMAX4-NEXT: addi a2, a1, 384
+; LMULMAX4-NEXT: vle32.v v28, (a1)
+; LMULMAX4-NEXT: addi a1, a1, 448
+; LMULMAX4-NEXT: vle32.v v0, (a1)
+; LMULMAX4-NEXT: vle32.v v4, (a2)
+; LMULMAX4-NEXT: vse32.v v28, (a0)
; LMULMAX4-NEXT: addi a1, a0, 448
; LMULMAX4-NEXT: vse32.v v0, (a1)
; LMULMAX4-NEXT: addi a1, a0, 384
; LMULMAX4-NEXT: vse32.v v4, (a1)
; LMULMAX4-NEXT: addi a1, a0, 320
-; LMULMAX4-NEXT: vse32.v v28, (a1)
-; LMULMAX4-NEXT: addi a1, a0, 256
; LMULMAX4-NEXT: vse32.v v24, (a1)
-; LMULMAX4-NEXT: addi a1, a0, 192
+; LMULMAX4-NEXT: addi a1, a0, 256
; LMULMAX4-NEXT: vse32.v v20, (a1)
-; LMULMAX4-NEXT: addi a1, a0, 128
+; LMULMAX4-NEXT: addi a1, a0, 192
; LMULMAX4-NEXT: vse32.v v16, (a1)
-; LMULMAX4-NEXT: addi a1, a0, 64
+; LMULMAX4-NEXT: addi a1, a0, 128
; LMULMAX4-NEXT: vse32.v v12, (a1)
+; LMULMAX4-NEXT: addi a0, a0, 64
; LMULMAX4-NEXT: vse32.v v8, (a0)
; LMULMAX4-NEXT: ret
%v = load <128 x i32>, <128 x i32>* %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
index 73c8ffa3ae79c..6c4c0e783854e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
@@ -233,254 +233,254 @@ define <64 x i32> @ret_split_v64i32(<64 x i32>* %x) {
define <128 x i32> @ret_split_v128i32(<128 x i32>* %x) {
; LMULMAX8-LABEL: ret_split_v128i32:
; LMULMAX8: # %bb.0:
-; LMULMAX8-NEXT: li a2, 32
-; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; LMULMAX8-NEXT: vle32.v v8, (a1)
; LMULMAX8-NEXT: addi a2, a1, 128
-; LMULMAX8-NEXT: vle32.v v16, (a2)
-; LMULMAX8-NEXT: addi a2, a1, 384
-; LMULMAX8-NEXT: vle32.v v24, (a2)
-; LMULMAX8-NEXT: addi a1, a1, 256
-; LMULMAX8-NEXT: vle32.v v0, (a1)
+; LMULMAX8-NEXT: li a3, 32
+; LMULMAX8-NEXT: vsetvli zero, a3, e32, m8, ta, mu
+; LMULMAX8-NEXT: vle32.v v8, (a2)
+; LMULMAX8-NEXT: addi a2, a1, 256
+; LMULMAX8-NEXT: vle32.v v16, (a1)
+; LMULMAX8-NEXT: addi a1, a1, 384
+; LMULMAX8-NEXT: vle32.v v24, (a1)
+; LMULMAX8-NEXT: vle32.v v0, (a2)
+; LMULMAX8-NEXT: vse32.v v16, (a0)
; LMULMAX8-NEXT: addi a1, a0, 384
; LMULMAX8-NEXT: vse32.v v24, (a1)
; LMULMAX8-NEXT: addi a1, a0, 256
; LMULMAX8-NEXT: vse32.v v0, (a1)
-; LMULMAX8-NEXT: addi a1, a0, 128
-; LMULMAX8-NEXT: vse32.v v16, (a1)
+; LMULMAX8-NEXT: addi a0, a0, 128
; LMULMAX8-NEXT: vse32.v v8, (a0)
; LMULMAX8-NEXT: ret
;
; LMULMAX4-LABEL: ret_split_v128i32:
; LMULMAX4: # %bb.0:
-; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, mu
-; LMULMAX4-NEXT: vle32.v v8, (a1)
; LMULMAX4-NEXT: addi a2, a1, 64
-; LMULMAX4-NEXT: vle32.v v12, (a2)
+; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; LMULMAX4-NEXT: vle32.v v8, (a2)
; LMULMAX4-NEXT: addi a2, a1, 128
-; LMULMAX4-NEXT: vle32.v v16, (a2)
+; LMULMAX4-NEXT: vle32.v v12, (a2)
; LMULMAX4-NEXT: addi a2, a1, 192
-; LMULMAX4-NEXT: vle32.v v20, (a2)
+; LMULMAX4-NEXT: vle32.v v16, (a2)
; LMULMAX4-NEXT: addi a2, a1, 256
-; LMULMAX4-NEXT: vle32.v v24, (a2)
+; LMULMAX4-NEXT: vle32.v v20, (a2)
; LMULMAX4-NEXT: addi a2, a1, 320
-; LMULMAX4-NEXT: vle32.v v28, (a2)
-; LMULMAX4-NEXT: addi a2, a1, 448
-; LMULMAX4-NEXT: vle32.v v0, (a2)
-; LMULMAX4-NEXT: addi a1, a1, 384
-; LMULMAX4-NEXT: vle32.v v4, (a1)
+; LMULMAX4-NEXT: vle32.v v24, (a2)
+; LMULMAX4-NEXT: addi a2, a1, 384
+; LMULMAX4-NEXT: vle32.v v28, (a1)
+; LMULMAX4-NEXT: addi a1, a1, 448
+; LMULMAX4-NEXT: vle32.v v0, (a1)
+; LMULMAX4-NEXT: vle32.v v4, (a2)
+; LMULMAX4-NEXT: vse32.v v28, (a0)
; LMULMAX4-NEXT: addi a1, a0, 448
; LMULMAX4-NEXT: vse32.v v0, (a1)
; LMULMAX4-NEXT: addi a1, a0, 384
; LMULMAX4-NEXT: vse32.v v4, (a1)
; LMULMAX4-NEXT: addi a1, a0, 320
-; LMULMAX4-NEXT: vse32.v v28, (a1)
-; LMULMAX4-NEXT: addi a1, a0, 256
; LMULMAX4-NEXT: vse32.v v24, (a1)
-; LMULMAX4-NEXT: addi a1, a0, 192
+; LMULMAX4-NEXT: addi a1, a0, 256
; LMULMAX4-NEXT: vse32.v v20, (a1)
-; LMULMAX4-NEXT: addi a1, a0, 128
+; LMULMAX4-NEXT: addi a1, a0, 192
; LMULMAX4-NEXT: vse32.v v16, (a1)
-; LMULMAX4-NEXT: addi a1, a0, 64
+; LMULMAX4-NEXT: addi a1, a0, 128
; LMULMAX4-NEXT: vse32.v v12, (a1)
+; LMULMAX4-NEXT: addi a0, a0, 64
; LMULMAX4-NEXT: vse32.v v8, (a0)
; LMULMAX4-NEXT: ret
;
; LMULMAX2-LABEL: ret_split_v128i32:
; LMULMAX2: # %bb.0:
-; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX2-NEXT: vle32.v v8, (a1)
; LMULMAX2-NEXT: addi a2, a1, 32
-; LMULMAX2-NEXT: vle32.v v10, (a2)
+; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
+; LMULMAX2-NEXT: vle32.v v8, (a2)
; LMULMAX2-NEXT: addi a2, a1, 64
-; LMULMAX2-NEXT: vle32.v v12, (a2)
+; LMULMAX2-NEXT: vle32.v v10, (a2)
; LMULMAX2-NEXT: addi a2, a1, 96
-; LMULMAX2-NEXT: vle32.v v14, (a2)
+; LMULMAX2-NEXT: vle32.v v12, (a2)
; LMULMAX2-NEXT: addi a2, a1, 128
-; LMULMAX2-NEXT: vle32.v v16, (a2)
+; LMULMAX2-NEXT: vle32.v v14, (a2)
; LMULMAX2-NEXT: addi a2, a1, 160
-; LMULMAX2-NEXT: vle32.v v18, (a2)
+; LMULMAX2-NEXT: vle32.v v16, (a2)
; LMULMAX2-NEXT: addi a2, a1, 192
-; LMULMAX2-NEXT: vle32.v v20, (a2)
+; LMULMAX2-NEXT: vle32.v v18, (a2)
; LMULMAX2-NEXT: addi a2, a1, 224
-; LMULMAX2-NEXT: vle32.v v22, (a2)
+; LMULMAX2-NEXT: vle32.v v20, (a2)
; LMULMAX2-NEXT: addi a2, a1, 256
-; LMULMAX2-NEXT: vle32.v v24, (a2)
+; LMULMAX2-NEXT: vle32.v v22, (a2)
; LMULMAX2-NEXT: addi a2, a1, 288
-; LMULMAX2-NEXT: vle32.v v26, (a2)
+; LMULMAX2-NEXT: vle32.v v24, (a2)
; LMULMAX2-NEXT: addi a2, a1, 320
-; LMULMAX2-NEXT: vle32.v v28, (a2)
+; LMULMAX2-NEXT: vle32.v v26, (a2)
; LMULMAX2-NEXT: addi a2, a1, 352
-; LMULMAX2-NEXT: vle32.v v30, (a2)
+; LMULMAX2-NEXT: vle32.v v28, (a2)
; LMULMAX2-NEXT: addi a2, a1, 384
-; LMULMAX2-NEXT: vle32.v v0, (a2)
+; LMULMAX2-NEXT: vle32.v v30, (a2)
; LMULMAX2-NEXT: addi a2, a1, 416
-; LMULMAX2-NEXT: vle32.v v2, (a2)
-; LMULMAX2-NEXT: addi a2, a1, 480
-; LMULMAX2-NEXT: vle32.v v4, (a2)
-; LMULMAX2-NEXT: addi a1, a1, 448
-; LMULMAX2-NEXT: vle32.v v6, (a1)
+; LMULMAX2-NEXT: vle32.v v0, (a2)
+; LMULMAX2-NEXT: addi a2, a1, 448
+; LMULMAX2-NEXT: vle32.v v2, (a1)
+; LMULMAX2-NEXT: addi a1, a1, 480
+; LMULMAX2-NEXT: vle32.v v4, (a1)
+; LMULMAX2-NEXT: vle32.v v6, (a2)
+; LMULMAX2-NEXT: vse32.v v2, (a0)
; LMULMAX2-NEXT: addi a1, a0, 480
; LMULMAX2-NEXT: vse32.v v4, (a1)
; LMULMAX2-NEXT: addi a1, a0, 448
; LMULMAX2-NEXT: vse32.v v6, (a1)
; LMULMAX2-NEXT: addi a1, a0, 416
-; LMULMAX2-NEXT: vse32.v v2, (a1)
-; LMULMAX2-NEXT: addi a1, a0, 384
; LMULMAX2-NEXT: vse32.v v0, (a1)
-; LMULMAX2-NEXT: addi a1, a0, 352
+; LMULMAX2-NEXT: addi a1, a0, 384
; LMULMAX2-NEXT: vse32.v v30, (a1)
-; LMULMAX2-NEXT: addi a1, a0, 320
+; LMULMAX2-NEXT: addi a1, a0, 352
; LMULMAX2-NEXT: vse32.v v28, (a1)
-; LMULMAX2-NEXT: addi a1, a0, 288
+; LMULMAX2-NEXT: addi a1, a0, 320
; LMULMAX2-NEXT: vse32.v v26, (a1)
-; LMULMAX2-NEXT: addi a1, a0, 256
+; LMULMAX2-NEXT: addi a1, a0, 288
; LMULMAX2-NEXT: vse32.v v24, (a1)
-; LMULMAX2-NEXT: addi a1, a0, 224
+; LMULMAX2-NEXT: addi a1, a0, 256
; LMULMAX2-NEXT: vse32.v v22, (a1)
-; LMULMAX2-NEXT: addi a1, a0, 192
+; LMULMAX2-NEXT: addi a1, a0, 224
; LMULMAX2-NEXT: vse32.v v20, (a1)
-; LMULMAX2-NEXT: addi a1, a0, 160
+; LMULMAX2-NEXT: addi a1, a0, 192
; LMULMAX2-NEXT: vse32.v v18, (a1)
-; LMULMAX2-NEXT: addi a1, a0, 128
+; LMULMAX2-NEXT: addi a1, a0, 160
; LMULMAX2-NEXT: vse32.v v16, (a1)
-; LMULMAX2-NEXT: addi a1, a0, 96
+; LMULMAX2-NEXT: addi a1, a0, 128
; LMULMAX2-NEXT: vse32.v v14, (a1)
-; LMULMAX2-NEXT: addi a1, a0, 64
+; LMULMAX2-NEXT: addi a1, a0, 96
; LMULMAX2-NEXT: vse32.v v12, (a1)
-; LMULMAX2-NEXT: addi a1, a0, 32
+; LMULMAX2-NEXT: addi a1, a0, 64
; LMULMAX2-NEXT: vse32.v v10, (a1)
+; LMULMAX2-NEXT: addi a0, a0, 32
; LMULMAX2-NEXT: vse32.v v8, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: ret_split_v128i32:
; LMULMAX1: # %bb.0:
-; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-NEXT: vle32.v v8, (a1)
; LMULMAX1-NEXT: addi a2, a1, 16
-; LMULMAX1-NEXT: vle32.v v9, (a2)
+; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; LMULMAX1-NEXT: vle32.v v8, (a2)
; LMULMAX1-NEXT: addi a2, a1, 32
-; LMULMAX1-NEXT: vle32.v v10, (a2)
+; LMULMAX1-NEXT: vle32.v v9, (a2)
; LMULMAX1-NEXT: addi a2, a1, 48
-; LMULMAX1-NEXT: vle32.v v11, (a2)
+; LMULMAX1-NEXT: vle32.v v10, (a2)
; LMULMAX1-NEXT: addi a2, a1, 64
-; LMULMAX1-NEXT: vle32.v v12, (a2)
+; LMULMAX1-NEXT: vle32.v v11, (a2)
; LMULMAX1-NEXT: addi a2, a1, 80
-; LMULMAX1-NEXT: vle32.v v13, (a2)
+; LMULMAX1-NEXT: vle32.v v12, (a2)
; LMULMAX1-NEXT: addi a2, a1, 96
-; LMULMAX1-NEXT: vle32.v v14, (a2)
+; LMULMAX1-NEXT: vle32.v v13, (a2)
; LMULMAX1-NEXT: addi a2, a1, 112
-; LMULMAX1-NEXT: vle32.v v15, (a2)
+; LMULMAX1-NEXT: vle32.v v14, (a2)
; LMULMAX1-NEXT: addi a2, a1, 128
-; LMULMAX1-NEXT: vle32.v v16, (a2)
+; LMULMAX1-NEXT: vle32.v v15, (a2)
; LMULMAX1-NEXT: addi a2, a1, 144
-; LMULMAX1-NEXT: vle32.v v17, (a2)
+; LMULMAX1-NEXT: vle32.v v16, (a2)
; LMULMAX1-NEXT: addi a2, a1, 160
-; LMULMAX1-NEXT: vle32.v v18, (a2)
+; LMULMAX1-NEXT: vle32.v v17, (a2)
; LMULMAX1-NEXT: addi a2, a1, 176
-; LMULMAX1-NEXT: vle32.v v19, (a2)
+; LMULMAX1-NEXT: vle32.v v18, (a2)
; LMULMAX1-NEXT: addi a2, a1, 192
-; LMULMAX1-NEXT: vle32.v v20, (a2)
+; LMULMAX1-NEXT: vle32.v v19, (a2)
; LMULMAX1-NEXT: addi a2, a1, 208
-; LMULMAX1-NEXT: vle32.v v21, (a2)
+; LMULMAX1-NEXT: vle32.v v20, (a2)
; LMULMAX1-NEXT: addi a2, a1, 224
-; LMULMAX1-NEXT: vle32.v v22, (a2)
+; LMULMAX1-NEXT: vle32.v v21, (a2)
; LMULMAX1-NEXT: addi a2, a1, 240
-; LMULMAX1-NEXT: vle32.v v23, (a2)
+; LMULMAX1-NEXT: vle32.v v22, (a2)
; LMULMAX1-NEXT: addi a2, a1, 256
-; LMULMAX1-NEXT: vle32.v v24, (a2)
+; LMULMAX1-NEXT: vle32.v v23, (a2)
; LMULMAX1-NEXT: addi a2, a1, 272
-; LMULMAX1-NEXT: vle32.v v25, (a2)
+; LMULMAX1-NEXT: vle32.v v24, (a2)
; LMULMAX1-NEXT: addi a2, a1, 288
-; LMULMAX1-NEXT: vle32.v v26, (a2)
+; LMULMAX1-NEXT: vle32.v v25, (a2)
; LMULMAX1-NEXT: addi a2, a1, 304
-; LMULMAX1-NEXT: vle32.v v27, (a2)
+; LMULMAX1-NEXT: vle32.v v26, (a2)
; LMULMAX1-NEXT: addi a2, a1, 320
-; LMULMAX1-NEXT: vle32.v v28, (a2)
+; LMULMAX1-NEXT: vle32.v v27, (a2)
; LMULMAX1-NEXT: addi a2, a1, 336
-; LMULMAX1-NEXT: vle32.v v29, (a2)
+; LMULMAX1-NEXT: vle32.v v28, (a2)
; LMULMAX1-NEXT: addi a2, a1, 352
-; LMULMAX1-NEXT: vle32.v v30, (a2)
+; LMULMAX1-NEXT: vle32.v v29, (a2)
; LMULMAX1-NEXT: addi a2, a1, 368
-; LMULMAX1-NEXT: vle32.v v31, (a2)
+; LMULMAX1-NEXT: vle32.v v30, (a2)
; LMULMAX1-NEXT: addi a2, a1, 384
-; LMULMAX1-NEXT: vle32.v v0, (a2)
+; LMULMAX1-NEXT: vle32.v v31, (a2)
; LMULMAX1-NEXT: addi a2, a1, 400
-; LMULMAX1-NEXT: vle32.v v1, (a2)
+; LMULMAX1-NEXT: vle32.v v0, (a2)
; LMULMAX1-NEXT: addi a2, a1, 416
-; LMULMAX1-NEXT: vle32.v v2, (a2)
+; LMULMAX1-NEXT: vle32.v v1, (a2)
; LMULMAX1-NEXT: addi a2, a1, 432
-; LMULMAX1-NEXT: vle32.v v3, (a2)
+; LMULMAX1-NEXT: vle32.v v2, (a2)
; LMULMAX1-NEXT: addi a2, a1, 448
-; LMULMAX1-NEXT: vle32.v v4, (a2)
+; LMULMAX1-NEXT: vle32.v v3, (a2)
; LMULMAX1-NEXT: addi a2, a1, 464
-; LMULMAX1-NEXT: vle32.v v5, (a2)
-; LMULMAX1-NEXT: addi a2, a1, 496
-; LMULMAX1-NEXT: vle32.v v6, (a2)
-; LMULMAX1-NEXT: addi a1, a1, 480
-; LMULMAX1-NEXT: vle32.v v7, (a1)
+; LMULMAX1-NEXT: vle32.v v4, (a2)
+; LMULMAX1-NEXT: addi a2, a1, 480
+; LMULMAX1-NEXT: vle32.v v5, (a1)
+; LMULMAX1-NEXT: addi a1, a1, 496
+; LMULMAX1-NEXT: vle32.v v6, (a1)
+; LMULMAX1-NEXT: vle32.v v7, (a2)
+; LMULMAX1-NEXT: vse32.v v5, (a0)
; LMULMAX1-NEXT: addi a1, a0, 496
; LMULMAX1-NEXT: vse32.v v6, (a1)
; LMULMAX1-NEXT: addi a1, a0, 480
; LMULMAX1-NEXT: vse32.v v7, (a1)
; LMULMAX1-NEXT: addi a1, a0, 464
-; LMULMAX1-NEXT: vse32.v v5, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 448
; LMULMAX1-NEXT: vse32.v v4, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 432
+; LMULMAX1-NEXT: addi a1, a0, 448
; LMULMAX1-NEXT: vse32.v v3, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 416
+; LMULMAX1-NEXT: addi a1, a0, 432
; LMULMAX1-NEXT: vse32.v v2, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 400
+; LMULMAX1-NEXT: addi a1, a0, 416
; LMULMAX1-NEXT: vse32.v v1, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 384
+; LMULMAX1-NEXT: addi a1, a0, 400
; LMULMAX1-NEXT: vse32.v v0, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 368
+; LMULMAX1-NEXT: addi a1, a0, 384
; LMULMAX1-NEXT: vse32.v v31, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 352
+; LMULMAX1-NEXT: addi a1, a0, 368
; LMULMAX1-NEXT: vse32.v v30, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 336
+; LMULMAX1-NEXT: addi a1, a0, 352
; LMULMAX1-NEXT: vse32.v v29, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 320
+; LMULMAX1-NEXT: addi a1, a0, 336
; LMULMAX1-NEXT: vse32.v v28, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 304
+; LMULMAX1-NEXT: addi a1, a0, 320
; LMULMAX1-NEXT: vse32.v v27, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 288
+; LMULMAX1-NEXT: addi a1, a0, 304
; LMULMAX1-NEXT: vse32.v v26, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 272
+; LMULMAX1-NEXT: addi a1, a0, 288
; LMULMAX1-NEXT: vse32.v v25, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 256
+; LMULMAX1-NEXT: addi a1, a0, 272
; LMULMAX1-NEXT: vse32.v v24, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 240
+; LMULMAX1-NEXT: addi a1, a0, 256
; LMULMAX1-NEXT: vse32.v v23, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 224
+; LMULMAX1-NEXT: addi a1, a0, 240
; LMULMAX1-NEXT: vse32.v v22, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 208
+; LMULMAX1-NEXT: addi a1, a0, 224
; LMULMAX1-NEXT: vse32.v v21, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 192
+; LMULMAX1-NEXT: addi a1, a0, 208
; LMULMAX1-NEXT: vse32.v v20, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 176
+; LMULMAX1-NEXT: addi a1, a0, 192
; LMULMAX1-NEXT: vse32.v v19, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 160
+; LMULMAX1-NEXT: addi a1, a0, 176
; LMULMAX1-NEXT: vse32.v v18, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 144
+; LMULMAX1-NEXT: addi a1, a0, 160
; LMULMAX1-NEXT: vse32.v v17, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 128
+; LMULMAX1-NEXT: addi a1, a0, 144
; LMULMAX1-NEXT: vse32.v v16, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 112
+; LMULMAX1-NEXT: addi a1, a0, 128
; LMULMAX1-NEXT: vse32.v v15, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 96
+; LMULMAX1-NEXT: addi a1, a0, 112
; LMULMAX1-NEXT: vse32.v v14, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 80
+; LMULMAX1-NEXT: addi a1, a0, 96
; LMULMAX1-NEXT: vse32.v v13, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 64
+; LMULMAX1-NEXT: addi a1, a0, 80
; LMULMAX1-NEXT: vse32.v v12, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 48
+; LMULMAX1-NEXT: addi a1, a0, 64
; LMULMAX1-NEXT: vse32.v v11, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 32
+; LMULMAX1-NEXT: addi a1, a0, 48
; LMULMAX1-NEXT: vse32.v v10, (a1)
-; LMULMAX1-NEXT: addi a1, a0, 16
+; LMULMAX1-NEXT: addi a1, a0, 32
; LMULMAX1-NEXT: vse32.v v9, (a1)
+; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vse32.v v8, (a0)
; LMULMAX1-NEXT: ret
%v = load <128 x i32>, <128 x i32>* %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr52475.ll b/llvm/test/CodeGen/RISCV/rvv/pr52475.ll
new file mode 100644
index 0000000000000..5e1e94965f1fa
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/pr52475.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 \
+; RUN: -pre-RA-sched=list-burr -disable-machine-cse -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 \
+; RUN: -pre-RA-sched=list-burr -disable-machine-cse -verify-machineinstrs < %s | FileCheck %s
+
+define <128 x i32> @ret_split_v128i32(<128 x i32>* %x) {
+; CHECK-LABEL: ret_split_v128i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; CHECK-NEXT: vle32.v v8, (a1)
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: addi a2, a1, 448
+; CHECK-NEXT: vle32.v v8, (a2)
+; CHECK-NEXT: addi a2, a0, 448
+; CHECK-NEXT: vse32.v v8, (a2)
+; CHECK-NEXT: addi a2, a1, 384
+; CHECK-NEXT: vle32.v v8, (a2)
+; CHECK-NEXT: addi a2, a0, 384
+; CHECK-NEXT: vse32.v v8, (a2)
+; CHECK-NEXT: addi a2, a1, 320
+; CHECK-NEXT: vle32.v v8, (a2)
+; CHECK-NEXT: addi a2, a0, 320
+; CHECK-NEXT: vse32.v v8, (a2)
+; CHECK-NEXT: addi a2, a1, 256
+; CHECK-NEXT: vle32.v v8, (a2)
+; CHECK-NEXT: addi a2, a0, 256
+; CHECK-NEXT: vse32.v v8, (a2)
+; CHECK-NEXT: addi a2, a1, 192
+; CHECK-NEXT: vle32.v v8, (a2)
+; CHECK-NEXT: addi a2, a0, 192
+; CHECK-NEXT: vse32.v v8, (a2)
+; CHECK-NEXT: addi a2, a1, 128
+; CHECK-NEXT: vle32.v v8, (a2)
+; CHECK-NEXT: addi a2, a0, 128
+; CHECK-NEXT: vse32.v v8, (a2)
+; CHECK-NEXT: addi a1, a1, 64
+; CHECK-NEXT: vle32.v v8, (a1)
+; CHECK-NEXT: addi a0, a0, 64
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = load <128 x i32>, <128 x i32>* %x
+ ret <128 x i32> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
index feb66c5dbe02e..84af77b944f69 100644
--- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
@@ -842,10 +842,10 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
; RV32I-NEXT: addi a1, a0, 1327
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __modsi3 at plt
-; RV32I-NEXT: sh zero, 0(s0)
; RV32I-NEXT: sh a0, 6(s0)
; RV32I-NEXT: sh s1, 4(s0)
; RV32I-NEXT: sh s3, 2(s0)
+; RV32I-NEXT: sh zero, 0(s0)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -919,10 +919,10 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
; RV64I-NEXT: addiw a1, a0, 1327
; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __moddi3 at plt
-; RV64I-NEXT: sh zero, 0(s0)
; RV64I-NEXT: sh a0, 6(s0)
; RV64I-NEXT: sh s1, 4(s0)
; RV64I-NEXT: sh s3, 2(s0)
+; RV64I-NEXT: sh zero, 0(s0)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -1018,9 +1018,9 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
; RV32I-NEXT: addi a1, a0, 1327
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __modsi3 at plt
-; RV32I-NEXT: sh zero, 0(s0)
; RV32I-NEXT: sh a0, 6(s0)
; RV32I-NEXT: sh s1, 4(s0)
+; RV32I-NEXT: sh zero, 0(s0)
; RV32I-NEXT: sh s3, 2(s0)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -1090,9 +1090,9 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
; RV64I-NEXT: addiw a1, a0, 1327
; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __moddi3 at plt
-; RV64I-NEXT: sh zero, 0(s0)
; RV64I-NEXT: sh a0, 6(s0)
; RV64I-NEXT: sh s1, 4(s0)
+; RV64I-NEXT: sh zero, 0(s0)
; RV64I-NEXT: sh s3, 2(s0)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -1321,10 +1321,10 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
; RV64I-NEXT: addiw a1, a0, 1327
; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __moddi3 at plt
-; RV64I-NEXT: sd zero, 0(s0)
; RV64I-NEXT: sd a0, 24(s0)
; RV64I-NEXT: sd s1, 16(s0)
; RV64I-NEXT: sd s3, 8(s0)
+; RV64I-NEXT: sd zero, 0(s0)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
index 330797320787e..1bcb906968137 100644
--- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
@@ -799,10 +799,10 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
; RV32I-NEXT: addi a1, a0, 1327
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __umodsi3 at plt
-; RV32I-NEXT: sh zero, 0(s0)
; RV32I-NEXT: sh a0, 6(s0)
; RV32I-NEXT: sh s1, 4(s0)
; RV32I-NEXT: sh s3, 2(s0)
+; RV32I-NEXT: sh zero, 0(s0)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -869,10 +869,10 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
; RV64I-NEXT: addiw a1, a0, 1327
; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __umoddi3 at plt
-; RV64I-NEXT: sh zero, 0(s0)
; RV64I-NEXT: sh a0, 6(s0)
; RV64I-NEXT: sh s1, 4(s0)
; RV64I-NEXT: sh s3, 2(s0)
+; RV64I-NEXT: sh zero, 0(s0)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -1116,10 +1116,10 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
; RV64I-NEXT: addiw a1, a0, 1327
; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __umoddi3 at plt
-; RV64I-NEXT: sd zero, 0(s0)
; RV64I-NEXT: sd a0, 24(s0)
; RV64I-NEXT: sd s1, 16(s0)
; RV64I-NEXT: sd s3, 8(s0)
+; RV64I-NEXT: sd zero, 0(s0)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/WebAssembly/multi-return.ll b/llvm/test/CodeGen/WebAssembly/multi-return.ll
index d4b848c76ff83..5eed2ec4f247d 100644
--- a/llvm/test/CodeGen/WebAssembly/multi-return.ll
+++ b/llvm/test/CodeGen/WebAssembly/multi-return.ll
@@ -86,10 +86,10 @@ define { i64, i128 } @test5() {
; CHECK: i64.load $[[L2:[0-9]+]]=, 8($[[SP]])
; CHECK: i64.load $push2=, 16($[[SP]])
; CHECK: i64.store 8($0), $pop2
+; CHECK: i64.store 0($0), $[[L2]]
; CHECK: i32.const $push12=, 16
; CHECK: i32.add $push3=, $0, $pop12
; CHECK: i64.store 0($pop3), $[[L1]]
-; CHECK: i64.store 0($0), $[[L2]]
%t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi()
%r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0
%r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1
@@ -101,20 +101,20 @@ define { i64, i128 } @test5() {
define { i128, i128 } @test6() {
; CHECK-LABEL: test6
; CHECK: call return_multi_multi
-; CHECK: i32.const $push0=, 24
+; CHECK: i32.const $push0=, 64
; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0
; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1)
-; CHECK: i32.const $push2=, 64
+; CHECK: i32.const $push2=, 24
; CHECK: i32.add $push3=, $[[SP]], $pop2
; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3)
; CHECK: i64.load $[[L3:[0-9]+]]=, 16($[[SP]])
; CHECK: i64.load $push4=, 56($[[SP]])
; CHECK: i64.store 16($0), $pop4
+; CHECK: i64.store 0($0), $[[L3]]
+; CHECK: i64.store 8($0), $[[L2]]
; CHECK: i32.const $push5=, 24
; CHECK: i32.add $push6=, $0, $pop5
-; CHECK: i64.store 0($pop6), $[[L2]]
-; CHECK: i64.store 0($0), $[[L3]]
-; CHECK: i64.store 8($0), $[[L1]]
+; CHECK: i64.store 0($pop6), $[[L1]]
%t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi()
%r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1
%r3 = extractvalue { i64, i128, i192, i128, i64 } %t0, 3
@@ -126,22 +126,22 @@ define { i128, i128 } @test6() {
define { i64, i192 } @test7() {
; CHECK-LABEL: test7
; CHECK: call return_multi_multi
-; CHECK: i32.const $push2=, 40
-; CHECK: i32.add $push3=, $[[SP:[0-9]+]], $pop2
-; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop3)
-; CHECK: i64.load $[[L2:[0-9]+]]=, 8($[[SP]])
-; CHECK: i64.load $[[L3:[0-9]+]]=, 32($[[SP]])
-; CHECK: i32.const $push0=, 24
-; CHECK: i32.add $push1=, $0, $pop0
-; CHECK: i32.const $push4=, 48
-; CHECK: i32.add $push5=, $[[SP]], $pop4
-; CHECK: i64.load $push6=, 0($pop5)
-; CHECK: i64.store 0($pop1), $pop6
-; CHECK: i64.store 8($0), $[[L3]]
+; CHECK: i32.const $push0=, 40
+; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0
+; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1)
+; CHECK: i32.const $push2=, 48
+; CHECK: i32.add $push3=, $[[SP]], $pop2
+; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3)
+; CHECK: i64.load $[[L3:[0-9]+]]=, 8($[[SP]])
+; CHECK: i64.load $push4=, 32($[[SP]])
+; CHECK: i64.store 8($0), $pop4
+; CHECK: i64.store 0($0), $[[L3]]
+; CHECK: i32.const $push5=, 24
+; CHECK: i32.add $push6=, $0, $pop5
+; CHECK: i64.store 0($pop6), $[[L2]]
; CHECK: i32.const $push7=, 16
; CHECK: i32.add $push8=, $0, $pop7
; CHECK: i64.store 0($pop8), $[[L1]]
-; CHECK: i64.store 0($0), $[[L2]]
%t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi()
%r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0
%r2 = extractvalue { i64, i128, i192, i128, i64 } %t0, 2
@@ -153,18 +153,18 @@ define { i64, i192 } @test7() {
define { i128, i192, i128, i64 } @test8() {
; CHECK-LABEL: test8
; CHECK: call return_multi_multi
-; CHECK: i32.const $push0=, 64
-; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0
-; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1)
; CHECK: i32.const $push20=, 8
-; CHECK: i32.add $push21=, $[[SP]], $pop20
-; CHECK: i32.const $push2=, 32
-; CHECK: i32.add $push3=, $pop21, $pop2
+; CHECK: i32.add $push21=, $[[SP:[0-9]+]], $pop20
+; CHECK: i32.const $push0=, 32
+; CHECK: i32.add $push1=, $pop21, $pop0
+; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1)
+; CHECK: i32.const $push2=, 48
+; CHECK: i32.add $push3=, $[[SP]], $pop2
; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3)
-; CHECK: i32.const $push4=, 48
+; CHECK: i32.const $push4=, 24
; CHECK: i32.add $push5=, $[[SP]], $pop4
; CHECK: i64.load $[[L3:[0-9]+]]=, 0($pop5)
-; CHECK: i32.const $push6=, 24
+; CHECK: i32.const $push6=, 64
; CHECK: i32.add $push7=, $[[SP]], $pop6
; CHECK: i64.load $[[L4:[0-9]+]]=, 0($pop7)
; CHECK: i64.load $[[L5:[0-9]+]]=, 8($[[SP]])
@@ -172,19 +172,19 @@ define { i128, i192, i128, i64 } @test8() {
; CHECK: i64.load $[[L7:[0-9]+]]=, 32($[[SP]])
; CHECK: i64.load $push8=, 16($[[SP]])
; CHECK: i64.store 40($0), $pop8
+; CHECK: i64.store 16($0), $[[L7]]
+; CHECK: i64.store 0($0), $[[L6]]
+; CHECK: i64.store 8($0), $[[L4]]
+; CHECK: i64.store 56($0), $[[L5]]
; CHECK: i32.const $push9=, 48
; CHECK: i32.add $push10=, $0, $pop9
-; CHECK: i64.store 0($pop10), $[[L4]]
+; CHECK: i64.store 0($pop10), $[[L3]]
; CHECK: i32.const $push22=, 32
; CHECK: i32.add $push11=, $0, $pop22
-; CHECK: i64.store 0($pop11), $[[L3]]
-; CHECK: i64.store 16($0), $[[L7]]
+; CHECK: i64.store 0($pop11), $[[L2]]
; CHECK: i32.const $push12=, 24
; CHECK: i32.add $push13=, $0, $pop12
-; CHECK: i64.store 0($pop13), $[[L2]]
-; CHECK: i64.store 0($0), $[[L6]]
-; CHECK: i64.store 8($0), $[[L1]]
-; CHECK: i64.store 56($0), $[[L5]]
+; CHECK: i64.store 0($pop13), $[[L1]]
%t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi()
%r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0
%r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1
diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll
index aacbce01f3826..aec6b9496e1a2 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll
@@ -313,14 +313,14 @@ define <4 x double> @convert_low_s_v4f64(<8 x i32> %x) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: f64x2.convert_low_i32x4_s
-; CHECK-NEXT: v128.store 0
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: f64x2.convert_low_i32x4_s
; CHECK-NEXT: v128.store 16
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.convert_low_i32x4_s
+; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%v = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%a = sitofp <4 x i32> %v to <4 x double>
@@ -333,14 +333,14 @@ define <4 x double> @convert_low_u_v4f64(<8 x i32> %x) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: f64x2.convert_low_i32x4_u
-; CHECK-NEXT: v128.store 0
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: f64x2.convert_low_i32x4_u
; CHECK-NEXT: v128.store 16
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.convert_low_i32x4_u
+; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%v = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%a = uitofp <4 x i32> %v to <4 x double>
@@ -354,14 +354,14 @@ define <4 x double> @convert_low_s_v4f64_2(<8 x i32> %x) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: f64x2.convert_low_i32x4_s
-; CHECK-NEXT: v128.store 0
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: f64x2.convert_low_i32x4_s
; CHECK-NEXT: v128.store 16
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.convert_low_i32x4_s
+; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%v = sitofp <8 x i32> %x to <8 x double>
%a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -374,14 +374,14 @@ define <4 x double> @convert_low_u_v4f64_2(<8 x i32> %x) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: f64x2.convert_low_i32x4_u
-; CHECK-NEXT: v128.store 0
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: f64x2.convert_low_i32x4_u
; CHECK-NEXT: v128.store 16
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.convert_low_i32x4_u
+; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%v = uitofp <8 x i32> %x to <8 x double>
%a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -394,14 +394,14 @@ define <4 x double> @promote_low_v4f64(<8 x float> %x) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: f64x2.promote_low_f32x4
-; CHECK-NEXT: v128.store 0
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: f64x2.promote_low_f32x4
; CHECK-NEXT: v128.store 16
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.promote_low_f32x4
+; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%v = shufflevector <8 x float> %x, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%a = fpext <4 x float> %v to <4 x double>
@@ -414,14 +414,14 @@ define <4 x double> @promote_low_v4f64_2(<8 x float> %x) {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: f64x2.promote_low_f32x4
-; CHECK-NEXT: v128.store 0
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: f64x2.promote_low_f32x4
; CHECK-NEXT: v128.store 16
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.promote_low_f32x4
+; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
%v = fpext <8 x float> %x to <8 x double>
%a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll
index 8b6a7f628b91e..f8f7d7ed2c556 100644
--- a/llvm/test/CodeGen/X86/addcarry.ll
+++ b/llvm/test/CodeGen/X86/addcarry.ll
@@ -51,9 +51,9 @@ define i256 @add256(i256 %a, i256 %b) nounwind {
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %r8
+; CHECK-NEXT: movq %rcx, 16(%rdi)
; CHECK-NEXT: movq %rdx, 8(%rdi)
; CHECK-NEXT: movq %rsi, (%rdi)
-; CHECK-NEXT: movq %rcx, 16(%rdi)
; CHECK-NEXT: movq %r8, 24(%rdi)
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll
index b2c7828f6e8ea..500cb0c677ff5 100644
--- a/llvm/test/CodeGen/X86/fp128-cast.ll
+++ b/llvm/test/CodeGen/X86/fp128-cast.ll
@@ -1225,9 +1225,9 @@ define fp128 @TestPair128(i64 %a, i64 %b) nounwind {
; X32-NEXT: adcl $0, %edx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %esi, 8(%eax)
; X32-NEXT: movl %edx, 4(%eax)
; X32-NEXT: movl %ecx, (%eax)
-; X32-NEXT: movl %esi, 8(%eax)
; X32-NEXT: movl %edi, 12(%eax)
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/i128-add.ll b/llvm/test/CodeGen/X86/i128-add.ll
index d128d75e64573..b033fc155e700 100644
--- a/llvm/test/CodeGen/X86/i128-add.ll
+++ b/llvm/test/CodeGen/X86/i128-add.ll
@@ -20,8 +20,8 @@ define i128 @add_i128(i128 %x, i128 %y) nounwind {
; X86-NEXT: adcl $0, %edi
; X86-NEXT: adcl $0, %edx
; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: popl %esi
@@ -61,8 +61,8 @@ define <1 x i128> @add_v1i128(<1 x i128> %x, <1 x i128> %y) nounwind {
; X86-NEXT: adcl $0, %edi
; X86-NEXT: adcl $0, %edx
; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: popl %esi
diff --git a/llvm/test/CodeGen/X86/masked_load.ll b/llvm/test/CodeGen/X86/masked_load.ll
index e8e45a1567362..3051abd172b50 100644
--- a/llvm/test/CodeGen/X86/masked_load.ll
+++ b/llvm/test/CodeGen/X86/masked_load.ll
@@ -7179,12 +7179,12 @@ define <16 x i64> @load_one_mask_bit_set6(<16 x i64>* %addr, <16 x i64> %val) {
; SSE2-NEXT: movsd {{.*#+}} xmm8 = mem[0],zero
; SSE2-NEXT: movlhps {{.*#+}} xmm6 = xmm6[0],xmm8[0]
; SSE2-NEXT: movaps %xmm7, 112(%rdi)
+; SSE2-NEXT: movaps %xmm5, 80(%rdi)
; SSE2-NEXT: movaps %xmm4, 64(%rdi)
; SSE2-NEXT: movaps %xmm3, 48(%rdi)
; SSE2-NEXT: movaps %xmm2, 32(%rdi)
-; SSE2-NEXT: movaps %xmm0, (%rdi)
-; SSE2-NEXT: movaps %xmm5, 80(%rdi)
; SSE2-NEXT: movaps %xmm1, 16(%rdi)
+; SSE2-NEXT: movaps %xmm0, (%rdi)
; SSE2-NEXT: movaps %xmm6, 96(%rdi)
; SSE2-NEXT: retq
;
@@ -7195,13 +7195,13 @@ define <16 x i64> @load_one_mask_bit_set6(<16 x i64>* %addr, <16 x i64> %val) {
; SSE42-NEXT: pinsrq $0, 80(%rsi), %xmm5
; SSE42-NEXT: pinsrq $1, 104(%rsi), %xmm6
; SSE42-NEXT: movaps %xmm7, 112(%rdi)
+; SSE42-NEXT: movdqa %xmm6, 96(%rdi)
+; SSE42-NEXT: movdqa %xmm5, 80(%rdi)
; SSE42-NEXT: movaps %xmm4, 64(%rdi)
; SSE42-NEXT: movaps %xmm3, 48(%rdi)
; SSE42-NEXT: movaps %xmm2, 32(%rdi)
-; SSE42-NEXT: movaps %xmm0, (%rdi)
-; SSE42-NEXT: movdqa %xmm6, 96(%rdi)
-; SSE42-NEXT: movdqa %xmm5, 80(%rdi)
; SSE42-NEXT: movdqa %xmm1, 16(%rdi)
+; SSE42-NEXT: movaps %xmm0, (%rdi)
; SSE42-NEXT: retq
;
; AVX1-LABEL: load_one_mask_bit_set6:
diff --git a/llvm/test/CodeGen/X86/mul128.ll b/llvm/test/CodeGen/X86/mul128.ll
index eab7fc8c5b004..492438edbce37 100644
--- a/llvm/test/CodeGen/X86/mul128.ll
+++ b/llvm/test/CodeGen/X86/mul128.ll
@@ -52,13 +52,13 @@ define i128 @foo(i128 %t, i128 %u) {
; X86-NEXT: imull %esi, %ecx
; X86-NEXT: addl %edx, %ecx
; X86-NEXT: addl %ebx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: adcl %edi, %ecx
; X86-NEXT: movl %esi, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %ebx
@@ -77,12 +77,12 @@ define i128 @foo(i128 %t, i128 %u) {
; X86-NEXT: addl %edi, %eax
; X86-NEXT: movzbl %bl, %esi
; X86-NEXT: adcl %esi, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%esp), %esi # 4-byte Reload
-; X86-NEXT: movl %esi, (%ecx)
; X86-NEXT: movl %ebp, 4(%ecx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %esi, (%ecx)
; X86-NEXT: movl %eax, 8(%ecx)
; X86-NEXT: movl %edx, 12(%ecx)
; X86-NEXT: movl %ecx, %eax
diff --git a/llvm/test/CodeGen/X86/pmulh.ll b/llvm/test/CodeGen/X86/pmulh.ll
index 6d28b75fd1984..7eb192dca1211 100644
--- a/llvm/test/CodeGen/X86/pmulh.ll
+++ b/llvm/test/CodeGen/X86/pmulh.ll
@@ -1279,29 +1279,29 @@ define <32 x i32> @mulhsw_v32i16_ashr(<32 x i16> %a, <32 x i16> %b) {
; SSE41: # %bb.0:
; SSE41-NEXT: movq %rdi, %rax
; SSE41-NEXT: pmulhw %xmm4, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
-; SSE41-NEXT: pmovsxwd %xmm4, %xmm4
-; SSE41-NEXT: pmulhw %xmm5, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,2,3]
-; SSE41-NEXT: pmovsxwd %xmm5, %xmm5
-; SSE41-NEXT: pmulhw %xmm6, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
-; SSE41-NEXT: pmovsxwd %xmm6, %xmm6
-; SSE41-NEXT: pmulhw %xmm7, %xmm3
-; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,2,3]
-; SSE41-NEXT: pmovsxwd %xmm7, %xmm7
+; SSE41-NEXT: pmovsxwd %xmm0, %xmm4
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
+; SSE41-NEXT: pmulhw %xmm5, %xmm1
+; SSE41-NEXT: pmovsxwd %xmm1, %xmm5
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE41-NEXT: pmovsxwd %xmm1, %xmm1
+; SSE41-NEXT: pmulhw %xmm6, %xmm2
+; SSE41-NEXT: pmovsxwd %xmm2, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; SSE41-NEXT: pmovsxwd %xmm2, %xmm2
+; SSE41-NEXT: pmulhw %xmm7, %xmm3
+; SSE41-NEXT: pmovsxwd %xmm3, %xmm7
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
; SSE41-NEXT: pmovsxwd %xmm3, %xmm3
-; SSE41-NEXT: movdqa %xmm3, 96(%rdi)
-; SSE41-NEXT: movdqa %xmm2, 64(%rdi)
-; SSE41-NEXT: movdqa %xmm1, 32(%rdi)
-; SSE41-NEXT: movdqa %xmm0, (%rdi)
-; SSE41-NEXT: movdqa %xmm7, 112(%rdi)
-; SSE41-NEXT: movdqa %xmm6, 80(%rdi)
-; SSE41-NEXT: movdqa %xmm5, 48(%rdi)
-; SSE41-NEXT: movdqa %xmm4, 16(%rdi)
+; SSE41-NEXT: movdqa %xmm3, 112(%rdi)
+; SSE41-NEXT: movdqa %xmm7, 96(%rdi)
+; SSE41-NEXT: movdqa %xmm2, 80(%rdi)
+; SSE41-NEXT: movdqa %xmm6, 64(%rdi)
+; SSE41-NEXT: movdqa %xmm1, 48(%rdi)
+; SSE41-NEXT: movdqa %xmm5, 32(%rdi)
+; SSE41-NEXT: movdqa %xmm0, 16(%rdi)
+; SSE41-NEXT: movdqa %xmm4, (%rdi)
; SSE41-NEXT: retq
;
; AVX2-LABEL: mulhsw_v32i16_ashr:
@@ -1770,53 +1770,53 @@ define <64 x i32> @mulhsw_v64i16_ashr(<64 x i16> %a, <64 x i16> %b) {
; SSE41: # %bb.0:
; SSE41-NEXT: movq %rdi, %rax
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
-; SSE41-NEXT: pmovsxwd %xmm8, %xmm8
+; SSE41-NEXT: pmovsxwd %xmm0, %xmm8
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; SSE41-NEXT: pmovsxwd %xmm0, %xmm9
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm1[2,3,2,3]
-; SSE41-NEXT: pmovsxwd %xmm9, %xmm9
+; SSE41-NEXT: pmovsxwd %xmm1, %xmm10
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; SSE41-NEXT: pmovsxwd %xmm0, %xmm11
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm2[2,3,2,3]
-; SSE41-NEXT: pmovsxwd %xmm10, %xmm10
+; SSE41-NEXT: pmovsxwd %xmm2, %xmm12
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
+; SSE41-NEXT: pmovsxwd %xmm2, %xmm13
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm3
-; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm3[2,3,2,3]
-; SSE41-NEXT: pmovsxwd %xmm11, %xmm11
+; SSE41-NEXT: pmovsxwd %xmm3, %xmm14
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
+; SSE41-NEXT: pmovsxwd %xmm3, %xmm15
; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm4
-; SSE41-NEXT: pshufd {{.*#+}} xmm12 = xmm4[2,3,2,3]
-; SSE41-NEXT: pmovsxwd %xmm12, %xmm12
-; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm5
-; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm5[2,3,2,3]
-; SSE41-NEXT: pmovsxwd %xmm13, %xmm13
-; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm6
-; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm6[2,3,2,3]
-; SSE41-NEXT: pmovsxwd %xmm14, %xmm14
-; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm7
-; SSE41-NEXT: pshufd {{.*#+}} xmm15 = xmm7[2,3,2,3]
-; SSE41-NEXT: pmovsxwd %xmm15, %xmm15
-; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
-; SSE41-NEXT: pmovsxwd %xmm1, %xmm1
-; SSE41-NEXT: pmovsxwd %xmm2, %xmm2
-; SSE41-NEXT: pmovsxwd %xmm3, %xmm3
+; SSE41-NEXT: pmovsxwd %xmm4, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; SSE41-NEXT: pmovsxwd %xmm4, %xmm4
+; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm5
+; SSE41-NEXT: pmovsxwd %xmm5, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
; SSE41-NEXT: pmovsxwd %xmm5, %xmm5
+; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm6
+; SSE41-NEXT: pmovsxwd %xmm6, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
; SSE41-NEXT: pmovsxwd %xmm6, %xmm6
+; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm7
+; SSE41-NEXT: pmovsxwd %xmm7, %xmm3
+; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm7[2,3,2,3]
; SSE41-NEXT: pmovsxwd %xmm7, %xmm7
-; SSE41-NEXT: movdqa %xmm7, 224(%rdi)
-; SSE41-NEXT: movdqa %xmm6, 192(%rdi)
-; SSE41-NEXT: movdqa %xmm5, 160(%rdi)
-; SSE41-NEXT: movdqa %xmm4, 128(%rdi)
-; SSE41-NEXT: movdqa %xmm3, 96(%rdi)
-; SSE41-NEXT: movdqa %xmm2, 64(%rdi)
-; SSE41-NEXT: movdqa %xmm1, 32(%rdi)
-; SSE41-NEXT: movdqa %xmm0, (%rdi)
-; SSE41-NEXT: movdqa %xmm15, 240(%rdi)
-; SSE41-NEXT: movdqa %xmm14, 208(%rdi)
-; SSE41-NEXT: movdqa %xmm13, 176(%rdi)
-; SSE41-NEXT: movdqa %xmm12, 144(%rdi)
-; SSE41-NEXT: movdqa %xmm11, 112(%rdi)
-; SSE41-NEXT: movdqa %xmm10, 80(%rdi)
-; SSE41-NEXT: movdqa %xmm9, 48(%rdi)
-; SSE41-NEXT: movdqa %xmm8, 16(%rdi)
+; SSE41-NEXT: movdqa %xmm7, 240(%rdi)
+; SSE41-NEXT: movdqa %xmm3, 224(%rdi)
+; SSE41-NEXT: movdqa %xmm6, 208(%rdi)
+; SSE41-NEXT: movdqa %xmm1, 192(%rdi)
+; SSE41-NEXT: movdqa %xmm5, 176(%rdi)
+; SSE41-NEXT: movdqa %xmm2, 160(%rdi)
+; SSE41-NEXT: movdqa %xmm4, 144(%rdi)
+; SSE41-NEXT: movdqa %xmm0, 128(%rdi)
+; SSE41-NEXT: movdqa %xmm15, 112(%rdi)
+; SSE41-NEXT: movdqa %xmm14, 96(%rdi)
+; SSE41-NEXT: movdqa %xmm13, 80(%rdi)
+; SSE41-NEXT: movdqa %xmm12, 64(%rdi)
+; SSE41-NEXT: movdqa %xmm11, 48(%rdi)
+; SSE41-NEXT: movdqa %xmm10, 32(%rdi)
+; SSE41-NEXT: movdqa %xmm9, 16(%rdi)
+; SSE41-NEXT: movdqa %xmm8, (%rdi)
; SSE41-NEXT: retq
;
; AVX2-LABEL: mulhsw_v64i16_ashr:
diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
index e6115ab0b2ba1..36f17a6628ac0 100644
--- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll
+++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
@@ -18,42 +18,40 @@ define i256 @test1(i256 %a) nounwind {
; ILP-NEXT: xorl %r8d, %r8d
; ILP-NEXT: addl %esi, %esi
; ILP-NEXT: leal 3(%rsi), %r9d
-; ILP-NEXT: movb $125, %r10b
-; ILP-NEXT: movl $1, %edi
-; ILP-NEXT: xorl %r11d, %r11d
+; ILP-NEXT: movl $1, %r11d
+; ILP-NEXT: xorl %r14d, %r14d
; ILP-NEXT: movl %r9d, %ecx
-; ILP-NEXT: shldq %cl, %rdi, %r11
-; ILP-NEXT: subb %sil, %r10b
-; ILP-NEXT: addb $-125, %sil
-; ILP-NEXT: xorl %ebx, %ebx
-; ILP-NEXT: movl %esi, %ecx
-; ILP-NEXT: shldq %cl, %rdi, %rbx
+; ILP-NEXT: shldq %cl, %r11, %r14
; ILP-NEXT: movl $1, %edx
; ILP-NEXT: shlq %cl, %rdx
-; ILP-NEXT: movl $1, %r14d
+; ILP-NEXT: leal -125(%rsi), %r10d
+; ILP-NEXT: xorl %ebx, %ebx
; ILP-NEXT: movl %r10d, %ecx
-; ILP-NEXT: shrdq %cl, %r8, %r14
-; ILP-NEXT: movl %r9d, %ecx
-; ILP-NEXT: shlq %cl, %rdi
+; ILP-NEXT: shldq %cl, %r11, %rbx
; ILP-NEXT: testb $64, %r9b
-; ILP-NEXT: cmovneq %rdi, %r11
-; ILP-NEXT: cmovneq %r8, %rdi
-; ILP-NEXT: testb $64, %r10b
-; ILP-NEXT: cmovneq %r8, %r14
-; ILP-NEXT: testb $64, %sil
-; ILP-NEXT: cmovneq %rdx, %rbx
+; ILP-NEXT: cmovneq %rdx, %r14
; ILP-NEXT: cmovneq %r8, %rdx
+; ILP-NEXT: movl $1, %edi
+; ILP-NEXT: shlq %cl, %rdi
+; ILP-NEXT: movb $125, %cl
+; ILP-NEXT: subb %sil, %cl
+; ILP-NEXT: shrdq %cl, %r8, %r11
+; ILP-NEXT: testb $64, %cl
+; ILP-NEXT: cmovneq %r8, %r11
+; ILP-NEXT: testb $64, %r10b
+; ILP-NEXT: cmovneq %rdi, %rbx
+; ILP-NEXT: cmovneq %r8, %rdi
; ILP-NEXT: testb %r9b, %r9b
-; ILP-NEXT: cmovsq %r8, %r11
-; ILP-NEXT: cmovsq %r8, %rdi
-; ILP-NEXT: movq %r11, 8(%rax)
-; ILP-NEXT: movq %rdi, (%rax)
+; ILP-NEXT: cmovsq %r8, %r14
+; ILP-NEXT: cmovsq %r8, %rdx
+; ILP-NEXT: movq %r14, 8(%rax)
+; ILP-NEXT: movq %rdx, (%rax)
; ILP-NEXT: cmovnsq %r8, %rbx
; ILP-NEXT: cmoveq %r8, %rbx
; ILP-NEXT: movq %rbx, 24(%rax)
-; ILP-NEXT: cmovnsq %r14, %rdx
-; ILP-NEXT: cmoveq %r8, %rdx
-; ILP-NEXT: movq %rdx, 16(%rax)
+; ILP-NEXT: cmovnsq %r11, %rdi
+; ILP-NEXT: cmoveq %r8, %rdi
+; ILP-NEXT: movq %rdi, 16(%rax)
; ILP-NEXT: popq %rbx
; ILP-NEXT: popq %r14
; ILP-NEXT: retq
@@ -252,27 +250,24 @@ define i256 @test2(i256 %a) nounwind {
; ILP-LABEL: test2:
; ILP: # %bb.0:
; ILP-NEXT: movq %rdi, %rax
-; ILP-NEXT: xorl %edi, %edi
+; ILP-NEXT: xorl %r9d, %r9d
; ILP-NEXT: movq %rsi, %r11
; ILP-NEXT: negq %r11
; ILP-NEXT: movl $0, %r10d
; ILP-NEXT: sbbq %rdx, %r10
-; ILP-NEXT: movl $0, %r9d
-; ILP-NEXT: sbbq %rcx, %r9
-; ILP-NEXT: sbbq %r8, %rdi
-; ILP-NEXT: andq %rcx, %r9
-; ILP-NEXT: bsrq %r9, %rcx
-; ILP-NEXT: xorq $63, %rcx
-; ILP-NEXT: andq %r8, %rdi
-; ILP-NEXT: bsrq %rdi, %r8
+; ILP-NEXT: movl $0, %edi
+; ILP-NEXT: sbbq %rcx, %rdi
+; ILP-NEXT: sbbq %r8, %r9
+; ILP-NEXT: andq %r8, %r9
+; ILP-NEXT: bsrq %r9, %r8
; ILP-NEXT: andq %rdx, %r10
; ILP-NEXT: bsrq %r10, %rdx
; ILP-NEXT: xorq $63, %r8
+; ILP-NEXT: andq %rcx, %rdi
+; ILP-NEXT: bsrq %rdi, %rcx
+; ILP-NEXT: xorq $63, %rcx
; ILP-NEXT: addq $64, %rcx
-; ILP-NEXT: testq %rdi, %rdi
-; ILP-NEXT: movq $0, 24(%rax)
-; ILP-NEXT: movq $0, 16(%rax)
-; ILP-NEXT: movq $0, 8(%rax)
+; ILP-NEXT: testq %r9, %r9
; ILP-NEXT: cmovneq %r8, %rcx
; ILP-NEXT: xorq $63, %rdx
; ILP-NEXT: andq %rsi, %r11
@@ -287,6 +282,9 @@ define i256 @test2(i256 %a) nounwind {
; ILP-NEXT: orq %r9, %rdi
; ILP-NEXT: cmovneq %rcx, %rsi
; ILP-NEXT: movq %rsi, (%rax)
+; ILP-NEXT: movq $0, 24(%rax)
+; ILP-NEXT: movq $0, 16(%rax)
+; ILP-NEXT: movq $0, 8(%rax)
; ILP-NEXT: retq
;
; HYBRID-LABEL: test2:
@@ -457,46 +455,48 @@ define i256 @test2(i256 %a) nounwind {
define i256 @test3(i256 %n) nounwind {
; ILP-LABEL: test3:
; ILP: # %bb.0:
+; ILP-NEXT: pushq %rbx
; ILP-NEXT: movq %rdi, %rax
-; ILP-NEXT: xorl %r10d, %r10d
+; ILP-NEXT: xorl %edi, %edi
; ILP-NEXT: movq %rsi, %r9
; ILP-NEXT: negq %r9
+; ILP-NEXT: movl $0, %r10d
+; ILP-NEXT: sbbq %rdx, %r10
; ILP-NEXT: movl $0, %r11d
-; ILP-NEXT: sbbq %rdx, %r11
-; ILP-NEXT: movl $0, %edi
-; ILP-NEXT: sbbq %rcx, %rdi
-; ILP-NEXT: sbbq %r8, %r10
+; ILP-NEXT: sbbq %rcx, %r11
+; ILP-NEXT: sbbq %r8, %rdi
+; ILP-NEXT: notq %r8
+; ILP-NEXT: andq %rdi, %r8
+; ILP-NEXT: bsrq %r8, %rbx
+; ILP-NEXT: notq %rdx
+; ILP-NEXT: andq %r10, %rdx
+; ILP-NEXT: bsrq %rdx, %r10
+; ILP-NEXT: notq %rsi
+; ILP-NEXT: xorq $63, %rbx
; ILP-NEXT: notq %rcx
-; ILP-NEXT: andq %rdi, %rcx
+; ILP-NEXT: andq %r11, %rcx
; ILP-NEXT: bsrq %rcx, %rdi
-; ILP-NEXT: notq %rdx
-; ILP-NEXT: andq %r11, %rdx
; ILP-NEXT: xorq $63, %rdi
-; ILP-NEXT: notq %r8
-; ILP-NEXT: andq %r10, %r8
-; ILP-NEXT: bsrq %r8, %r10
-; ILP-NEXT: xorq $63, %r10
; ILP-NEXT: addq $64, %rdi
-; ILP-NEXT: bsrq %rdx, %r11
-; ILP-NEXT: notq %rsi
; ILP-NEXT: testq %r8, %r8
-; ILP-NEXT: movq $0, 24(%rax)
-; ILP-NEXT: movq $0, 16(%rax)
-; ILP-NEXT: movq $0, 8(%rax)
-; ILP-NEXT: cmovneq %r10, %rdi
-; ILP-NEXT: xorq $63, %r11
+; ILP-NEXT: cmovneq %rbx, %rdi
+; ILP-NEXT: xorq $63, %r10
; ILP-NEXT: andq %r9, %rsi
-; ILP-NEXT: movl $127, %r9d
+; ILP-NEXT: movl $127, %ebx
; ILP-NEXT: bsrq %rsi, %rsi
-; ILP-NEXT: cmoveq %r9, %rsi
+; ILP-NEXT: cmoveq %rbx, %rsi
; ILP-NEXT: xorq $63, %rsi
; ILP-NEXT: addq $64, %rsi
; ILP-NEXT: testq %rdx, %rdx
-; ILP-NEXT: cmovneq %r11, %rsi
+; ILP-NEXT: cmovneq %r10, %rsi
; ILP-NEXT: subq $-128, %rsi
-; ILP-NEXT: orq %rcx, %r8
+; ILP-NEXT: orq %r8, %rcx
; ILP-NEXT: cmovneq %rdi, %rsi
; ILP-NEXT: movq %rsi, (%rax)
+; ILP-NEXT: movq $0, 24(%rax)
+; ILP-NEXT: movq $0, 16(%rax)
+; ILP-NEXT: movq $0, 8(%rax)
+; ILP-NEXT: popq %rbx
; ILP-NEXT: retq
;
; HYBRID-LABEL: test3:
diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll
index 18167458bd610..6a2681210b465 100644
--- a/llvm/test/CodeGen/X86/subcarry.ll
+++ b/llvm/test/CodeGen/X86/subcarry.ll
@@ -25,9 +25,9 @@ define i256 @sub256(i256 %a, i256 %b) nounwind {
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
+; CHECK-NEXT: movq %rcx, 16(%rdi)
; CHECK-NEXT: movq %rdx, 8(%rdi)
; CHECK-NEXT: movq %rsi, (%rdi)
-; CHECK-NEXT: movq %rcx, 16(%rdi)
; CHECK-NEXT: movq %r8, 24(%rdi)
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/umul-with-overflow.ll b/llvm/test/CodeGen/X86/umul-with-overflow.ll
index bc93d02e3deee..a17551deb7656 100644
--- a/llvm/test/CodeGen/X86/umul-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/umul-with-overflow.ll
@@ -494,10 +494,10 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: movl %esi, (%edx)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, 4(%edx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %esi, (%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, 8(%edx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, 12(%edx)
@@ -530,17 +530,17 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X64-NEXT: pushq %rbx
; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %r8, %r11
-; X64-NEXT: movq %rcx, %r8
+; X64-NEXT: movq %rcx, %r10
; X64-NEXT: movq %rdx, %r13
; X64-NEXT: movq %rdi, %r12
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp
; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: mulq %r10
+; X64-NEXT: mulq %r8
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %r13, %rax
-; X64-NEXT: mulq %r10
+; X64-NEXT: mulq %r8
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: movq %rax, %rdi
; X64-NEXT: addq %r14, %rdi
@@ -559,22 +559,22 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: addq %rcx, %rbp
; X64-NEXT: adcq %rdi, %rbx
-; X64-NEXT: movq %r8, %rax
-; X64-NEXT: mulq %r10
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: mulq %r8
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, %r15
; X64-NEXT: movq %r11, %rax
-; X64-NEXT: mulq %r10
-; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: mulq %r8
+; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movq %rax, %r14
; X64-NEXT: addq %rcx, %r14
-; X64-NEXT: adcq $0, %r10
-; X64-NEXT: movq %r8, %rax
+; X64-NEXT: adcq $0, %r8
+; X64-NEXT: movq %r10, %rax
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; X64-NEXT: mulq %rcx
; X64-NEXT: movq %rax, %rdi
; X64-NEXT: addq %r14, %rdi
-; X64-NEXT: adcq %r10, %rdx
+; X64-NEXT: adcq %r8, %rdx
; X64-NEXT: imulq %rcx, %r11
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14
; X64-NEXT: addq %rbp, %r15
@@ -583,7 +583,7 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: mulq %r14
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq %rax, %r10
+; X64-NEXT: movq %rax, %r8
; X64-NEXT: movq %r13, %rax
; X64-NEXT: mulq %r14
; X64-NEXT: movq %rdx, %rbp
@@ -597,20 +597,20 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X64-NEXT: adcq %rbp, %rdx
; X64-NEXT: imulq %rcx, %r13
; X64-NEXT: addq %rdx, %r13
-; X64-NEXT: addq %r15, %r10
+; X64-NEXT: addq %r15, %r8
; X64-NEXT: adcq %rdi, %rax
; X64-NEXT: adcq %r11, %r13
-; X64-NEXT: imulq %r14, %r8
-; X64-NEXT: addq %r13, %r8
+; X64-NEXT: imulq %r14, %r10
+; X64-NEXT: addq %r13, %r10
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; X64-NEXT: imulq {{[0-9]+}}(%rsp), %rcx
; X64-NEXT: imulq {{[0-9]+}}(%rsp), %rsi
; X64-NEXT: addq %rcx, %rsi
-; X64-NEXT: addq %r8, %rsi
+; X64-NEXT: addq %r10, %rsi
+; X64-NEXT: movq %r9, 8(%r12)
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; X64-NEXT: movq %rcx, (%r12)
-; X64-NEXT: movq %r9, 8(%r12)
-; X64-NEXT: movq %r10, 16(%r12)
+; X64-NEXT: movq %r8, 16(%r12)
; X64-NEXT: movq %rax, 24(%r12)
; X64-NEXT: movl %esi, 32(%r12)
; X64-NEXT: shrq $32, %rsi
diff --git a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
index e9ce636160553..40fc6db7fe6b2 100644
--- a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
@@ -38,8 +38,8 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 20
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: .cfi_def_cfa_offset 44
+; X86-NEXT: subl $28, %esp
+; X86-NEXT: .cfi_def_cfa_offset 48
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
@@ -60,45 +60,42 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %ebx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %esi, %ecx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %esi, %ebp
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %esi
-; X86-NEXT: movl %esi, %ebx
-; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl %eax, %ecx
; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: addl %esi, %ebp
-; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %ebx
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: addl %ebp, %edi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT: addl %ecx, %edi
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %edi, %ebx
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT: adcl %ecx, %edi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl %ebp, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %esi, %ecx
; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ecx
+; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: addl %esi, %ecx
+; X86-NEXT: addl %edi, %ecx
; X86-NEXT: adcl $0, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: mull %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %esi
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -106,12 +103,12 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X86-NEXT: setb %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: mull %edi
; X86-NEXT: addl %esi, %eax
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: adcl %ecx, %edx
-; X86-NEXT: addl %ebx, %eax
-; X86-NEXT: adcl %edi, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: adcl %ebx, %edx
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: testl %ebp, %ebp
; X86-NEXT: setne %cl
@@ -124,10 +121,10 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
; X86-NEXT: orb %ch, %cl
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: setne %cl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: testl %edi, %edi
+; X86-NEXT: setne %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: testl %ebp, %ebp
; X86-NEXT: setne %bh
; X86-NEXT: andb %cl, %bh
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
@@ -136,12 +133,12 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: orl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: setne %bl
-; X86-NEXT: orl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: orl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: movl %esi, (%ecx)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, 4(%ecx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %esi, (%ecx)
; X86-NEXT: movl %eax, 8(%ecx)
; X86-NEXT: movl %edx, 12(%ecx)
; X86-NEXT: setne %al
@@ -153,7 +150,7 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X86-NEXT: andb $1, %al
; X86-NEXT: movb %al, 16(%ecx)
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $28, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 16
More information about the llvm-commits
mailing list