[llvm] [LoongArch] Switch to the Machine Scheduler (PR #83759)

via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 3 19:49:05 PST 2024


https://github.com/wangleiat created https://github.com/llvm/llvm-project/pull/83759

The SelectionDAG scheduling preference now becomes source order scheduling (machine scheduler generates better code -- even without there being a machine model defined for LoongArch yet).

Most of the test changes are trivial instruction reorderings and differing register allocations, without any obvious performance impact.

This is similar to commit: 3d0fbafd0bce43bb9106230a45d1130f7a40e5ec

>From 58bc68dad7fb97a6aee4334b9dc688fcb4588ac1 Mon Sep 17 00:00:00 2001
From: wanglei <wanglei at loongson.cn>
Date: Fri, 1 Mar 2024 15:20:07 +0800
Subject: [PATCH] [LoongArch] Switch to the Machine Scheduler

The SelectionDAG scheduling preference now becomes source order
scheduling (machine scheduler generates better code -- even without
there being a machine model defined for LoongArch yet).

Most of the test changes are trivial instruction reorderings and
differing register allocations, without any obvious performance impact.

This is similar to commit: 3d0fbafd0bce43bb9106230a45d1130f7a40e5ec
---
 .../lib/Target/LoongArch/LoongArchSubtarget.h |    1 +
 llvm/test/CodeGen/LoongArch/alloca.ll         |    8 +-
 llvm/test/CodeGen/LoongArch/alsl.ll           |   16 +-
 .../LoongArch/atomicrmw-uinc-udec-wrap.ll     |  216 +--
 llvm/test/CodeGen/LoongArch/bitreverse.ll     |   46 +-
 .../CodeGen/LoongArch/branch-relaxation.ll    |   12 +-
 .../CodeGen/LoongArch/bswap-bitreverse.ll     |   12 +-
 llvm/test/CodeGen/LoongArch/bswap.ll          |   40 +-
 llvm/test/CodeGen/LoongArch/bytepick.ll       |   15 +-
 .../CodeGen/LoongArch/calling-conv-common.ll  |  136 +-
 .../CodeGen/LoongArch/calling-conv-lp64d.ll   |   12 +-
 .../CodeGen/LoongArch/calling-conv-lp64s.ll   |    4 +-
 .../LoongArch/can-not-realign-stack.ll        |   24 +-
 .../CodeGen/LoongArch/cfr-pseudo-copy.mir     |    4 +-
 .../test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll |   98 +-
 llvm/test/CodeGen/LoongArch/fcopysign.ll      |    8 +-
 .../LoongArch/get-setcc-result-type.ll        |   24 +-
 llvm/test/CodeGen/LoongArch/ghc-cc.ll         |   96 +-
 .../CodeGen/LoongArch/intrinsic-memcpy.ll     |   16 +-
 .../CodeGen/LoongArch/ir-instruction/and.ll   |   40 +-
 .../CodeGen/LoongArch/ir-instruction/ashr.ll  |    4 +-
 .../ir-instruction/atomic-cmpxchg.ll          |  210 +--
 .../LoongArch/ir-instruction/atomicrmw-fp.ll  |  736 +++++----
 .../ir-instruction/atomicrmw-minmax.ll        |  520 +++---
 .../LoongArch/ir-instruction/atomicrmw.ll     | 1470 ++++++++---------
 .../ir-instruction/double-convert.ll          |   32 +-
 .../LoongArch/ir-instruction/float-convert.ll |   46 +-
 .../LoongArch/ir-instruction/load-store.ll    | 1124 ++++++++-----
 .../CodeGen/LoongArch/ir-instruction/lshr.ll  |    4 +-
 .../CodeGen/LoongArch/ir-instruction/mul.ll   |  103 +-
 .../CodeGen/LoongArch/ir-instruction/shl.ll   |    4 +-
 .../CodeGen/LoongArch/ir-instruction/sub.ll   |    4 +-
 .../CodeGen/LoongArch/lasx/build-vector.ll    |   88 +-
 llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll |  448 ++---
 llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll |  448 ++---
 .../LoongArch/lasx/ir-instruction/add.ll      |   24 +-
 .../LoongArch/lasx/ir-instruction/and.ll      |   24 +-
 .../LoongArch/lasx/ir-instruction/ashr.ll     |   24 +-
 .../LoongArch/lasx/ir-instruction/fadd.ll     |   12 +-
 .../LoongArch/lasx/ir-instruction/fcmp.ll     |  168 +-
 .../LoongArch/lasx/ir-instruction/fdiv.ll     |   12 +-
 .../LoongArch/lasx/ir-instruction/fmul.ll     |   12 +-
 .../LoongArch/lasx/ir-instruction/fsub.ll     |   12 +-
 .../LoongArch/lasx/ir-instruction/icmp.ll     |  144 +-
 .../lasx/ir-instruction/insertelement.ll      |   16 +-
 .../LoongArch/lasx/ir-instruction/lshr.ll     |   24 +-
 .../LoongArch/lasx/ir-instruction/mul.ll      |   24 +-
 .../LoongArch/lasx/ir-instruction/or.ll       |   24 +-
 .../LoongArch/lasx/ir-instruction/sdiv.ll     |   24 +-
 .../LoongArch/lasx/ir-instruction/shl.ll      |   24 +-
 .../LoongArch/lasx/ir-instruction/sub.ll      |   24 +-
 .../LoongArch/lasx/ir-instruction/udiv.ll     |   24 +-
 .../LoongArch/lasx/ir-instruction/xor.ll      |   24 +-
 llvm/test/CodeGen/LoongArch/lasx/mulh.ll      |   48 +-
 llvm/test/CodeGen/LoongArch/lasx/vselect.ll   |   22 +-
 .../CodeGen/LoongArch/lsx/build-vector.ll     |   40 +-
 llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll  |  448 ++---
 llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll  |  448 ++---
 .../LoongArch/lsx/ir-instruction/add.ll       |   24 +-
 .../LoongArch/lsx/ir-instruction/and.ll       |   24 +-
 .../LoongArch/lsx/ir-instruction/ashr.ll      |   24 +-
 .../lsx/ir-instruction/extractelement.ll      |   24 +-
 .../LoongArch/lsx/ir-instruction/fadd.ll      |   12 +-
 .../LoongArch/lsx/ir-instruction/fcmp.ll      |  168 +-
 .../LoongArch/lsx/ir-instruction/fdiv.ll      |   12 +-
 .../LoongArch/lsx/ir-instruction/fmul.ll      |   12 +-
 .../LoongArch/lsx/ir-instruction/fsub.ll      |   12 +-
 .../LoongArch/lsx/ir-instruction/icmp.ll      |  144 +-
 .../lsx/ir-instruction/insertelement.ll       |   16 +-
 .../LoongArch/lsx/ir-instruction/lshr.ll      |   24 +-
 .../LoongArch/lsx/ir-instruction/mul.ll       |   24 +-
 .../LoongArch/lsx/ir-instruction/or.ll        |   24 +-
 .../LoongArch/lsx/ir-instruction/sdiv.ll      |   24 +-
 .../LoongArch/lsx/ir-instruction/shl.ll       |   24 +-
 .../LoongArch/lsx/ir-instruction/sub.ll       |   24 +-
 .../LoongArch/lsx/ir-instruction/udiv.ll      |   24 +-
 .../LoongArch/lsx/ir-instruction/xor.ll       |   24 +-
 llvm/test/CodeGen/LoongArch/lsx/mulh.ll       |   48 +-
 llvm/test/CodeGen/LoongArch/lsx/vselect.ll    |   22 +-
 .../CodeGen/LoongArch/preferred-alignments.ll |   16 +-
 llvm/test/CodeGen/LoongArch/rotl-rotr.ll      |  388 +++--
 .../CodeGen/LoongArch/select-to-shiftand.ll   |    4 +-
 .../CodeGen/LoongArch/shift-masked-shamt.ll   |   36 +-
 llvm/test/CodeGen/LoongArch/shrinkwrap.ll     |    4 +-
 .../CodeGen/LoongArch/smul-with-overflow.ll   |  706 ++++----
 llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll |   32 +-
 .../LoongArch/spill-ra-without-kill.ll        |   12 +-
 .../CodeGen/LoongArch/spill-reload-cfr.ll     |   62 +-
 llvm/test/CodeGen/LoongArch/tail-calls.ll     |   18 +-
 .../CodeGen/LoongArch/unaligned-access.ll     |   24 +-
 llvm/test/CodeGen/LoongArch/vararg.ll         |   20 +-
 llvm/test/CodeGen/LoongArch/vector-fp-imm.ll  | 1325 ++++++++-------
 .../LoongArch/zext-with-load-is-free.ll       |    8 +-
 ...arch_generated_funcs.ll.generated.expected |   33 +-
 ...ch_generated_funcs.ll.nogenerated.expected |   33 +-
 95 files changed, 5866 insertions(+), 5304 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
index 11c0b39e176e61..cecb4a50aa7633 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -113,6 +113,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
   Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
   Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
   unsigned getMaxBytesForAlignment() const { return MaxBytesForAlignment; }
+  bool enableMachineScheduler() const override { return true; }
 };
 } // end namespace llvm
 
diff --git a/llvm/test/CodeGen/LoongArch/alloca.ll b/llvm/test/CodeGen/LoongArch/alloca.ll
index d766be6aac9509..75a05689e4178d 100644
--- a/llvm/test/CodeGen/LoongArch/alloca.ll
+++ b/llvm/test/CodeGen/LoongArch/alloca.ll
@@ -126,8 +126,7 @@ define void @alloca_callframe(i32 %n) nounwind {
 ; LA32-NEXT:    st.w $a1, $sp, 8
 ; LA32-NEXT:    ori $a1, $zero, 10
 ; LA32-NEXT:    st.w $a1, $sp, 4
-; LA32-NEXT:    ori $a1, $zero, 9
-; LA32-NEXT:    st.w $a1, $sp, 0
+; LA32-NEXT:    ori $t0, $zero, 9
 ; LA32-NEXT:    ori $a1, $zero, 2
 ; LA32-NEXT:    ori $a2, $zero, 3
 ; LA32-NEXT:    ori $a3, $zero, 4
@@ -135,6 +134,7 @@ define void @alloca_callframe(i32 %n) nounwind {
 ; LA32-NEXT:    ori $a5, $zero, 6
 ; LA32-NEXT:    ori $a6, $zero, 7
 ; LA32-NEXT:    ori $a7, $zero, 8
+; LA32-NEXT:    st.w $t0, $sp, 0
 ; LA32-NEXT:    bl %plt(func)
 ; LA32-NEXT:    addi.w $sp, $sp, 16
 ; LA32-NEXT:    addi.w $sp, $fp, -16
@@ -162,8 +162,7 @@ define void @alloca_callframe(i32 %n) nounwind {
 ; LA64-NEXT:    st.d $a1, $sp, 16
 ; LA64-NEXT:    ori $a1, $zero, 10
 ; LA64-NEXT:    st.d $a1, $sp, 8
-; LA64-NEXT:    ori $a1, $zero, 9
-; LA64-NEXT:    st.d $a1, $sp, 0
+; LA64-NEXT:    ori $t0, $zero, 9
 ; LA64-NEXT:    ori $a1, $zero, 2
 ; LA64-NEXT:    ori $a2, $zero, 3
 ; LA64-NEXT:    ori $a3, $zero, 4
@@ -171,6 +170,7 @@ define void @alloca_callframe(i32 %n) nounwind {
 ; LA64-NEXT:    ori $a5, $zero, 6
 ; LA64-NEXT:    ori $a6, $zero, 7
 ; LA64-NEXT:    ori $a7, $zero, 8
+; LA64-NEXT:    st.d $t0, $sp, 0
 ; LA64-NEXT:    bl %plt(func)
 ; LA64-NEXT:    addi.d $sp, $sp, 32
 ; LA64-NEXT:    addi.d $sp, $fp, -16
diff --git a/llvm/test/CodeGen/LoongArch/alsl.ll b/llvm/test/CodeGen/LoongArch/alsl.ll
index 650f504dcaf83a..177e37de0952d7 100644
--- a/llvm/test/CodeGen/LoongArch/alsl.ll
+++ b/llvm/test/CodeGen/LoongArch/alsl.ll
@@ -53,12 +53,12 @@ entry:
 define i64 @alsl_i64(i64 signext %a, i64 signext %b) nounwind {
 ; LA32-LABEL: alsl_i64:
 ; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    slli.w $a1, $a1, 4
 ; LA32-NEXT:    srli.w $a4, $a0, 28
+; LA32-NEXT:    slli.w $a1, $a1, 4
 ; LA32-NEXT:    or $a1, $a1, $a4
-; LA32-NEXT:    add.w $a1, $a3, $a1
 ; LA32-NEXT:    alsl.w $a0, $a0, $a2, 4
 ; LA32-NEXT:    sltu $a2, $a0, $a2
+; LA32-NEXT:    add.w $a1, $a3, $a1
 ; LA32-NEXT:    add.w $a1, $a1, $a2
 ; LA32-NEXT:    ret
 ;
@@ -189,14 +189,14 @@ entry:
 define i64 @mul_add_i64(i64 signext %a, i64 signext %b) nounwind {
 ; LA32-LABEL: mul_add_i64:
 ; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    slli.w $a4, $a1, 4
-; LA32-NEXT:    sub.w $a1, $a4, $a1
 ; LA32-NEXT:    ori $a4, $zero, 15
 ; LA32-NEXT:    mulh.wu $a4, $a0, $a4
+; LA32-NEXT:    slli.w $a5, $a1, 4
+; LA32-NEXT:    sub.w $a1, $a5, $a1
 ; LA32-NEXT:    add.w $a1, $a4, $a1
+; LA32-NEXT:    slli.w $a4, $a0, 4
+; LA32-NEXT:    sub.w $a0, $a4, $a0
 ; LA32-NEXT:    add.w $a1, $a3, $a1
-; LA32-NEXT:    slli.w $a3, $a0, 4
-; LA32-NEXT:    sub.w $a0, $a3, $a0
 ; LA32-NEXT:    add.w $a0, $a2, $a0
 ; LA32-NEXT:    sltu $a2, $a0, $a2
 ; LA32-NEXT:    add.w $a1, $a1, $a2
@@ -342,9 +342,9 @@ define i64 @mul_add_neg_i64(i64 signext %a, i64 signext %b) nounwind {
 ; LA32-NEXT:    mulh.wu $a4, $a0, $a4
 ; LA32-NEXT:    sub.w $a4, $a4, $a0
 ; LA32-NEXT:    add.w $a1, $a4, $a1
+; LA32-NEXT:    slli.w $a4, $a0, 4
+; LA32-NEXT:    sub.w $a0, $a0, $a4
 ; LA32-NEXT:    add.w $a1, $a3, $a1
-; LA32-NEXT:    slli.w $a3, $a0, 4
-; LA32-NEXT:    sub.w $a0, $a0, $a3
 ; LA32-NEXT:    add.w $a0, $a2, $a0
 ; LA32-NEXT:    sltu $a2, $a0, $a2
 ; LA32-NEXT:    add.w $a1, $a1, $a2
diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
index b84c1093eb75f2..bf48c0df3e4961 100644
--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
@@ -4,34 +4,34 @@
 define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
 ; LA64-LABEL: atomicrmw_uinc_wrap_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    slli.d $a2, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    ld.w $a2, $a0, 0
-; LA64-NEXT:    ori $a4, $zero, 255
-; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    andi $a3, $a3, 24
+; LA64-NEXT:    ori $a3, $zero, 255
+; LA64-NEXT:    sll.w $a4, $a3, $a2
+; LA64-NEXT:    ld.w $a3, $a0, 0
+; LA64-NEXT:    andi $a2, $a2, 24
 ; LA64-NEXT:    nor $a4, $a4, $zero
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB0_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB0_3 Depth 2
-; LA64-NEXT:    srl.w $a5, $a2, $a3
-; LA64-NEXT:    andi $a6, $a5, 255
-; LA64-NEXT:    sltu $a6, $a6, $a1
+; LA64-NEXT:    srl.w $a5, $a3, $a2
+; LA64-NEXT:    addi.w $a6, $a3, 0
+; LA64-NEXT:    andi $a7, $a5, 255
 ; LA64-NEXT:    addi.d $a5, $a5, 1
-; LA64-NEXT:    xori $a6, $a6, 1
-; LA64-NEXT:    masknez $a5, $a5, $a6
+; LA64-NEXT:    sltu $a7, $a7, $a1
+; LA64-NEXT:    xori $a7, $a7, 1
+; LA64-NEXT:    masknez $a5, $a5, $a7
 ; LA64-NEXT:    andi $a5, $a5, 255
-; LA64-NEXT:    sll.w $a5, $a5, $a3
-; LA64-NEXT:    and $a6, $a2, $a4
-; LA64-NEXT:    or $a5, $a6, $a5
-; LA64-NEXT:    addi.w $a6, $a2, 0
+; LA64-NEXT:    sll.w $a5, $a5, $a2
+; LA64-NEXT:    and $a3, $a3, $a4
+; LA64-NEXT:    or $a5, $a3, $a5
 ; LA64-NEXT:  .LBB0_3: # %atomicrmw.start
 ; LA64-NEXT:    # Parent Loop BB0_1 Depth=1
 ; LA64-NEXT:    # => This Inner Loop Header: Depth=2
-; LA64-NEXT:    ll.w $a2, $a0, 0
-; LA64-NEXT:    bne $a2, $a6, .LBB0_5
+; LA64-NEXT:    ll.w $a3, $a0, 0
+; LA64-NEXT:    bne $a3, $a6, .LBB0_5
 ; LA64-NEXT:  # %bb.4: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB0_3 Depth=2
 ; LA64-NEXT:    move $a7, $a5
@@ -43,9 +43,9 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB0_6: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; LA64-NEXT:    bne $a2, $a6, .LBB0_1
+; LA64-NEXT:    bne $a3, $a6, .LBB0_1
 ; LA64-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64-NEXT:    srl.w $a0, $a2, $a3
+; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
   %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst
   ret i8 %result
@@ -54,35 +54,35 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
 define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
 ; LA64-LABEL: atomicrmw_uinc_wrap_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    slli.d $a2, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    ld.w $a2, $a0, 0
-; LA64-NEXT:    lu12i.w $a4, 15
-; LA64-NEXT:    ori $a4, $a4, 4095
-; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    andi $a3, $a3, 24
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a4, $a3, $a2
+; LA64-NEXT:    ld.w $a3, $a0, 0
+; LA64-NEXT:    andi $a2, $a2, 24
 ; LA64-NEXT:    nor $a4, $a4, $zero
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB1_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB1_3 Depth 2
-; LA64-NEXT:    srl.w $a5, $a2, $a3
-; LA64-NEXT:    bstrpick.d $a6, $a5, 15, 0
-; LA64-NEXT:    sltu $a6, $a6, $a1
+; LA64-NEXT:    srl.w $a5, $a3, $a2
+; LA64-NEXT:    addi.w $a6, $a3, 0
+; LA64-NEXT:    bstrpick.d $a7, $a5, 15, 0
 ; LA64-NEXT:    addi.d $a5, $a5, 1
-; LA64-NEXT:    xori $a6, $a6, 1
-; LA64-NEXT:    masknez $a5, $a5, $a6
+; LA64-NEXT:    sltu $a7, $a7, $a1
+; LA64-NEXT:    xori $a7, $a7, 1
+; LA64-NEXT:    masknez $a5, $a5, $a7
 ; LA64-NEXT:    bstrpick.d $a5, $a5, 15, 0
-; LA64-NEXT:    sll.w $a5, $a5, $a3
-; LA64-NEXT:    and $a6, $a2, $a4
-; LA64-NEXT:    or $a5, $a6, $a5
-; LA64-NEXT:    addi.w $a6, $a2, 0
+; LA64-NEXT:    sll.w $a5, $a5, $a2
+; LA64-NEXT:    and $a3, $a3, $a4
+; LA64-NEXT:    or $a5, $a3, $a5
 ; LA64-NEXT:  .LBB1_3: # %atomicrmw.start
 ; LA64-NEXT:    # Parent Loop BB1_1 Depth=1
 ; LA64-NEXT:    # => This Inner Loop Header: Depth=2
-; LA64-NEXT:    ll.w $a2, $a0, 0
-; LA64-NEXT:    bne $a2, $a6, .LBB1_5
+; LA64-NEXT:    ll.w $a3, $a0, 0
+; LA64-NEXT:    bne $a3, $a6, .LBB1_5
 ; LA64-NEXT:  # %bb.4: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB1_3 Depth=2
 ; LA64-NEXT:    move $a7, $a5
@@ -94,9 +94,9 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB1_6: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB1_1 Depth=1
-; LA64-NEXT:    bne $a2, $a6, .LBB1_1
+; LA64-NEXT:    bne $a3, $a6, .LBB1_1
 ; LA64-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64-NEXT:    srl.w $a0, $a2, $a3
+; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
   %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst
   ret i16 %result
@@ -111,19 +111,19 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
 ; LA64-NEXT:  .LBB2_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB2_3 Depth 2
-; LA64-NEXT:    addi.w $a3, $a2, 0
-; LA64-NEXT:    sltu $a4, $a3, $a1
-; LA64-NEXT:    xori $a4, $a4, 1
-; LA64-NEXT:    addi.d $a2, $a2, 1
-; LA64-NEXT:    masknez $a4, $a2, $a4
+; LA64-NEXT:    addi.d $a3, $a2, 1
+; LA64-NEXT:    addi.w $a4, $a2, 0
+; LA64-NEXT:    sltu $a2, $a4, $a1
+; LA64-NEXT:    xori $a2, $a2, 1
+; LA64-NEXT:    masknez $a3, $a3, $a2
 ; LA64-NEXT:  .LBB2_3: # %atomicrmw.start
 ; LA64-NEXT:    # Parent Loop BB2_1 Depth=1
 ; LA64-NEXT:    # => This Inner Loop Header: Depth=2
 ; LA64-NEXT:    ll.w $a2, $a0, 0
-; LA64-NEXT:    bne $a2, $a3, .LBB2_5
+; LA64-NEXT:    bne $a2, $a4, .LBB2_5
 ; LA64-NEXT:  # %bb.4: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB2_3 Depth=2
-; LA64-NEXT:    move $a5, $a4
+; LA64-NEXT:    move $a5, $a3
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB2_3
 ; LA64-NEXT:    b .LBB2_6
@@ -132,7 +132,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB2_6: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB2_1 Depth=1
-; LA64-NEXT:    bne $a2, $a3, .LBB2_1
+; LA64-NEXT:    bne $a2, $a4, .LBB2_1
 ; LA64-NEXT:  # %bb.2: # %atomicrmw.end
 ; LA64-NEXT:    move $a0, $a2
 ; LA64-NEXT:    ret
@@ -149,10 +149,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB3_3 Depth 2
 ; LA64-NEXT:    move $a3, $a2
-; LA64-NEXT:    sltu $a2, $a2, $a1
-; LA64-NEXT:    xori $a2, $a2, 1
-; LA64-NEXT:    addi.d $a4, $a3, 1
-; LA64-NEXT:    masknez $a4, $a4, $a2
+; LA64-NEXT:    addi.d $a2, $a2, 1
+; LA64-NEXT:    sltu $a4, $a3, $a1
+; LA64-NEXT:    xori $a4, $a4, 1
+; LA64-NEXT:    masknez $a4, $a2, $a4
 ; LA64-NEXT:  .LBB3_3: # %atomicrmw.start
 ; LA64-NEXT:    # Parent Loop BB3_1 Depth=1
 ; LA64-NEXT:    # => This Inner Loop Header: Depth=2
@@ -180,39 +180,39 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
 define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
 ; LA64-LABEL: atomicrmw_udec_wrap_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    slli.d $a4, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    ld.w $a2, $a0, 0
-; LA64-NEXT:    ori $a4, $zero, 255
-; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    andi $a3, $a3, 24
+; LA64-NEXT:    andi $a2, $a4, 24
+; LA64-NEXT:    ori $a5, $zero, 255
+; LA64-NEXT:    ld.w $a3, $a0, 0
+; LA64-NEXT:    sll.w $a4, $a5, $a4
 ; LA64-NEXT:    nor $a4, $a4, $zero
 ; LA64-NEXT:    andi $a5, $a1, 255
 ; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB4_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB4_3 Depth 2
-; LA64-NEXT:    srl.w $a6, $a2, $a3
-; LA64-NEXT:    andi $a7, $a6, 255
-; LA64-NEXT:    sltu $t0, $a5, $a7
+; LA64-NEXT:    srl.w $a6, $a3, $a2
+; LA64-NEXT:    addi.w $a7, $a3, 0
+; LA64-NEXT:    andi $t0, $a6, 255
 ; LA64-NEXT:    addi.d $a6, $a6, -1
+; LA64-NEXT:    sltui $t1, $t0, 1
+; LA64-NEXT:    sltu $t0, $a5, $t0
 ; LA64-NEXT:    masknez $a6, $a6, $t0
 ; LA64-NEXT:    maskeqz $t0, $a1, $t0
 ; LA64-NEXT:    or $a6, $t0, $a6
-; LA64-NEXT:    sltui $a7, $a7, 1
-; LA64-NEXT:    masknez $a6, $a6, $a7
-; LA64-NEXT:    maskeqz $a7, $a1, $a7
-; LA64-NEXT:    or $a6, $a7, $a6
+; LA64-NEXT:    masknez $a6, $a6, $t1
+; LA64-NEXT:    maskeqz $t0, $a1, $t1
+; LA64-NEXT:    or $a6, $t0, $a6
 ; LA64-NEXT:    andi $a6, $a6, 255
-; LA64-NEXT:    sll.w $a6, $a6, $a3
-; LA64-NEXT:    and $a7, $a2, $a4
-; LA64-NEXT:    or $a6, $a7, $a6
-; LA64-NEXT:    addi.w $a7, $a2, 0
+; LA64-NEXT:    sll.w $a6, $a6, $a2
+; LA64-NEXT:    and $a3, $a3, $a4
+; LA64-NEXT:    or $a6, $a3, $a6
 ; LA64-NEXT:  .LBB4_3: # %atomicrmw.start
 ; LA64-NEXT:    # Parent Loop BB4_1 Depth=1
 ; LA64-NEXT:    # => This Inner Loop Header: Depth=2
-; LA64-NEXT:    ll.w $a2, $a0, 0
-; LA64-NEXT:    bne $a2, $a7, .LBB4_5
+; LA64-NEXT:    ll.w $a3, $a0, 0
+; LA64-NEXT:    bne $a3, $a7, .LBB4_5
 ; LA64-NEXT:  # %bb.4: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB4_3 Depth=2
 ; LA64-NEXT:    move $t0, $a6
@@ -224,9 +224,9 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB4_6: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB4_1 Depth=1
-; LA64-NEXT:    bne $a2, $a7, .LBB4_1
+; LA64-NEXT:    bne $a3, $a7, .LBB4_1
 ; LA64-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64-NEXT:    srl.w $a0, $a2, $a3
+; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
   %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst
   ret i8 %result
@@ -235,40 +235,40 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
 define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
 ; LA64-LABEL: atomicrmw_udec_wrap_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    slli.d $a4, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    ld.w $a2, $a0, 0
-; LA64-NEXT:    lu12i.w $a4, 15
-; LA64-NEXT:    ori $a4, $a4, 4095
-; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    andi $a3, $a3, 24
+; LA64-NEXT:    andi $a2, $a4, 24
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a5, $a3, 4095
+; LA64-NEXT:    ld.w $a3, $a0, 0
+; LA64-NEXT:    sll.w $a4, $a5, $a4
 ; LA64-NEXT:    nor $a4, $a4, $zero
 ; LA64-NEXT:    bstrpick.d $a5, $a1, 15, 0
 ; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB5_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB5_3 Depth 2
-; LA64-NEXT:    srl.w $a6, $a2, $a3
-; LA64-NEXT:    bstrpick.d $a7, $a6, 15, 0
-; LA64-NEXT:    sltu $t0, $a5, $a7
+; LA64-NEXT:    srl.w $a6, $a3, $a2
+; LA64-NEXT:    addi.w $a7, $a3, 0
+; LA64-NEXT:    bstrpick.d $t0, $a6, 15, 0
 ; LA64-NEXT:    addi.d $a6, $a6, -1
+; LA64-NEXT:    sltui $t1, $t0, 1
+; LA64-NEXT:    sltu $t0, $a5, $t0
 ; LA64-NEXT:    masknez $a6, $a6, $t0
 ; LA64-NEXT:    maskeqz $t0, $a1, $t0
 ; LA64-NEXT:    or $a6, $t0, $a6
-; LA64-NEXT:    sltui $a7, $a7, 1
-; LA64-NEXT:    masknez $a6, $a6, $a7
-; LA64-NEXT:    maskeqz $a7, $a1, $a7
-; LA64-NEXT:    or $a6, $a7, $a6
+; LA64-NEXT:    masknez $a6, $a6, $t1
+; LA64-NEXT:    maskeqz $t0, $a1, $t1
+; LA64-NEXT:    or $a6, $t0, $a6
 ; LA64-NEXT:    bstrpick.d $a6, $a6, 15, 0
-; LA64-NEXT:    sll.w $a6, $a6, $a3
-; LA64-NEXT:    and $a7, $a2, $a4
-; LA64-NEXT:    or $a6, $a7, $a6
-; LA64-NEXT:    addi.w $a7, $a2, 0
+; LA64-NEXT:    sll.w $a6, $a6, $a2
+; LA64-NEXT:    and $a3, $a3, $a4
+; LA64-NEXT:    or $a6, $a3, $a6
 ; LA64-NEXT:  .LBB5_3: # %atomicrmw.start
 ; LA64-NEXT:    # Parent Loop BB5_1 Depth=1
 ; LA64-NEXT:    # => This Inner Loop Header: Depth=2
-; LA64-NEXT:    ll.w $a2, $a0, 0
-; LA64-NEXT:    bne $a2, $a7, .LBB5_5
+; LA64-NEXT:    ll.w $a3, $a0, 0
+; LA64-NEXT:    bne $a3, $a7, .LBB5_5
 ; LA64-NEXT:  # %bb.4: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB5_3 Depth=2
 ; LA64-NEXT:    move $t0, $a6
@@ -280,9 +280,9 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB5_6: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB5_1 Depth=1
-; LA64-NEXT:    bne $a2, $a7, .LBB5_1
+; LA64-NEXT:    bne $a3, $a7, .LBB5_1
 ; LA64-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64-NEXT:    srl.w $a0, $a2, $a3
+; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
   %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst
   ret i16 %result
@@ -297,24 +297,24 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
 ; LA64-NEXT:  .LBB6_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB6_3 Depth 2
-; LA64-NEXT:    addi.w $a4, $a2, 0
-; LA64-NEXT:    sltu $a5, $a3, $a4
-; LA64-NEXT:    addi.d $a2, $a2, -1
-; LA64-NEXT:    masknez $a2, $a2, $a5
-; LA64-NEXT:    maskeqz $a5, $a1, $a5
-; LA64-NEXT:    or $a2, $a5, $a2
-; LA64-NEXT:    sltui $a5, $a4, 1
-; LA64-NEXT:    masknez $a2, $a2, $a5
-; LA64-NEXT:    maskeqz $a5, $a1, $a5
-; LA64-NEXT:    or $a5, $a5, $a2
+; LA64-NEXT:    addi.d $a4, $a2, -1
+; LA64-NEXT:    addi.w $a5, $a2, 0
+; LA64-NEXT:    sltui $a2, $a5, 1
+; LA64-NEXT:    sltu $a6, $a3, $a5
+; LA64-NEXT:    masknez $a4, $a4, $a6
+; LA64-NEXT:    maskeqz $a6, $a1, $a6
+; LA64-NEXT:    or $a4, $a6, $a4
+; LA64-NEXT:    masknez $a4, $a4, $a2
+; LA64-NEXT:    maskeqz $a2, $a1, $a2
+; LA64-NEXT:    or $a4, $a2, $a4
 ; LA64-NEXT:  .LBB6_3: # %atomicrmw.start
 ; LA64-NEXT:    # Parent Loop BB6_1 Depth=1
 ; LA64-NEXT:    # => This Inner Loop Header: Depth=2
 ; LA64-NEXT:    ll.w $a2, $a0, 0
-; LA64-NEXT:    bne $a2, $a4, .LBB6_5
+; LA64-NEXT:    bne $a2, $a5, .LBB6_5
 ; LA64-NEXT:  # %bb.4: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB6_3 Depth=2
-; LA64-NEXT:    move $a6, $a5
+; LA64-NEXT:    move $a6, $a4
 ; LA64-NEXT:    sc.w $a6, $a0, 0
 ; LA64-NEXT:    beqz $a6, .LBB6_3
 ; LA64-NEXT:    b .LBB6_6
@@ -323,7 +323,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB6_6: # %atomicrmw.start
 ; LA64-NEXT:    # in Loop: Header=BB6_1 Depth=1
-; LA64-NEXT:    bne $a2, $a4, .LBB6_1
+; LA64-NEXT:    bne $a2, $a5, .LBB6_1
 ; LA64-NEXT:  # %bb.2: # %atomicrmw.end
 ; LA64-NEXT:    move $a0, $a2
 ; LA64-NEXT:    ret
@@ -340,12 +340,12 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB7_3 Depth 2
 ; LA64-NEXT:    move $a3, $a2
-; LA64-NEXT:    sltu $a2, $a1, $a2
-; LA64-NEXT:    addi.d $a4, $a3, -1
-; LA64-NEXT:    masknez $a4, $a4, $a2
-; LA64-NEXT:    maskeqz $a2, $a1, $a2
-; LA64-NEXT:    or $a2, $a2, $a4
+; LA64-NEXT:    addi.d $a2, $a2, -1
 ; LA64-NEXT:    sltui $a4, $a3, 1
+; LA64-NEXT:    sltu $a5, $a1, $a3
+; LA64-NEXT:    masknez $a2, $a2, $a5
+; LA64-NEXT:    maskeqz $a5, $a1, $a5
+; LA64-NEXT:    or $a2, $a5, $a2
 ; LA64-NEXT:    masknez $a2, $a2, $a4
 ; LA64-NEXT:    maskeqz $a4, $a1, $a4
 ; LA64-NEXT:    or $a4, $a4, $a2
diff --git a/llvm/test/CodeGen/LoongArch/bitreverse.ll b/llvm/test/CodeGen/LoongArch/bitreverse.ll
index 259d8565c68420..fcf523aa3c883a 100644
--- a/llvm/test/CodeGen/LoongArch/bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/bitreverse.ll
@@ -129,30 +129,30 @@ define i48 @test_bitreverse_i48(i48 %a) nounwind {
 define i77 @test_bitreverse_i77(i77 %a) nounwind {
 ; LA32-LABEL: test_bitreverse_i77:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    ld.w $a2, $a1, 0
+; LA32-NEXT:    ld.w $a2, $a1, 4
+; LA32-NEXT:    ld.w $a3, $a1, 8
+; LA32-NEXT:    ld.w $a1, $a1, 0
 ; LA32-NEXT:    bitrev.w $a2, $a2
-; LA32-NEXT:    ld.w $a3, $a1, 4
+; LA32-NEXT:    slli.w $a4, $a2, 13
 ; LA32-NEXT:    bitrev.w $a3, $a3
-; LA32-NEXT:    srli.w $a4, $a3, 19
-; LA32-NEXT:    slli.w $a5, $a2, 13
-; LA32-NEXT:    or $a4, $a5, $a4
+; LA32-NEXT:    srli.w $a3, $a3, 19
+; LA32-NEXT:    or $a3, $a3, $a4
 ; LA32-NEXT:    srli.w $a2, $a2, 19
-; LA32-NEXT:    st.h $a2, $a0, 8
-; LA32-NEXT:    st.w $a4, $a0, 4
-; LA32-NEXT:    slli.w $a2, $a3, 13
-; LA32-NEXT:    ld.w $a1, $a1, 8
 ; LA32-NEXT:    bitrev.w $a1, $a1
+; LA32-NEXT:    slli.w $a4, $a1, 13
+; LA32-NEXT:    or $a2, $a4, $a2
 ; LA32-NEXT:    srli.w $a1, $a1, 19
-; LA32-NEXT:    or $a1, $a1, $a2
-; LA32-NEXT:    st.w $a1, $a0, 0
+; LA32-NEXT:    st.h $a1, $a0, 8
+; LA32-NEXT:    st.w $a2, $a0, 4
+; LA32-NEXT:    st.w $a3, $a0, 0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_bitreverse_i77:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bitrev.d $a1, $a1
-; LA64-NEXT:    srli.d $a1, $a1, 51
 ; LA64-NEXT:    bitrev.d $a2, $a0
 ; LA64-NEXT:    slli.d $a0, $a2, 13
+; LA64-NEXT:    bitrev.d $a1, $a1
+; LA64-NEXT:    srli.d $a1, $a1, 51
 ; LA64-NEXT:    or $a0, $a1, $a0
 ; LA64-NEXT:    srli.d $a1, $a2, 51
 ; LA64-NEXT:    ret
@@ -163,18 +163,18 @@ define i77 @test_bitreverse_i77(i77 %a) nounwind {
 define i128 @test_bitreverse_i128(i128 %a) nounwind {
 ; LA32-LABEL: test_bitreverse_i128:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    ld.w $a2, $a1, 0
+; LA32-NEXT:    ld.w $a2, $a1, 12
+; LA32-NEXT:    ld.w $a3, $a1, 8
+; LA32-NEXT:    ld.w $a4, $a1, 4
+; LA32-NEXT:    ld.w $a1, $a1, 0
 ; LA32-NEXT:    bitrev.w $a2, $a2
-; LA32-NEXT:    st.w $a2, $a0, 12
-; LA32-NEXT:    ld.w $a2, $a1, 4
-; LA32-NEXT:    bitrev.w $a2, $a2
-; LA32-NEXT:    st.w $a2, $a0, 8
-; LA32-NEXT:    ld.w $a2, $a1, 8
-; LA32-NEXT:    bitrev.w $a2, $a2
-; LA32-NEXT:    st.w $a2, $a0, 4
-; LA32-NEXT:    ld.w $a1, $a1, 12
+; LA32-NEXT:    bitrev.w $a3, $a3
+; LA32-NEXT:    bitrev.w $a4, $a4
 ; LA32-NEXT:    bitrev.w $a1, $a1
-; LA32-NEXT:    st.w $a1, $a0, 0
+; LA32-NEXT:    st.w $a1, $a0, 12
+; LA32-NEXT:    st.w $a4, $a0, 8
+; LA32-NEXT:    st.w $a3, $a0, 4
+; LA32-NEXT:    st.w $a2, $a0, 0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_bitreverse_i128:
diff --git a/llvm/test/CodeGen/LoongArch/branch-relaxation.ll b/llvm/test/CodeGen/LoongArch/branch-relaxation.ll
index 7d064ddcf3105d..296f543e18d97d 100644
--- a/llvm/test/CodeGen/LoongArch/branch-relaxation.ll
+++ b/llvm/test/CodeGen/LoongArch/branch-relaxation.ll
@@ -12,10 +12,10 @@ define i32 @relax_b18(i32 signext %a, i32 signext %b) {
 ; LA32-NEXT:    beq $a0, $a1, .LBB0_1
 ; LA32-NEXT:    b .LBB0_2
 ; LA32-NEXT:  .LBB0_1: # %iftrue
+; LA32-NEXT:    ori $a0, $zero, 1
 ; LA32-NEXT:    #APP
 ; LA32-NEXT:    .space 1048576
 ; LA32-NEXT:    #NO_APP
-; LA32-NEXT:    ori $a0, $zero, 1
 ; LA32-NEXT:    ret
 ; LA32-NEXT:  .LBB0_2: # %iffalse
 ; LA32-NEXT:    move $a0, $zero
@@ -26,10 +26,10 @@ define i32 @relax_b18(i32 signext %a, i32 signext %b) {
 ; LA64-NEXT:    beq $a0, $a1, .LBB0_1
 ; LA64-NEXT:    b .LBB0_2
 ; LA64-NEXT:  .LBB0_1: # %iftrue
+; LA64-NEXT:    ori $a0, $zero, 1
 ; LA64-NEXT:    #APP
 ; LA64-NEXT:    .space 1048576
 ; LA64-NEXT:    #NO_APP
-; LA64-NEXT:    ori $a0, $zero, 1
 ; LA64-NEXT:    ret
 ; LA64-NEXT:  .LBB0_2: # %iffalse
 ; LA64-NEXT:    move $a0, $zero
@@ -52,10 +52,10 @@ define i32 @relax_b23(i1 %a) {
 ; LA32-NEXT:    bnez $a0, .LBB1_1
 ; LA32-NEXT:    b .LBB1_2
 ; LA32-NEXT:  .LBB1_1: # %iftrue
+; LA32-NEXT:    ori $a0, $zero, 1
 ; LA32-NEXT:    #APP
 ; LA32-NEXT:    .space 16777216
 ; LA32-NEXT:    #NO_APP
-; LA32-NEXT:    ori $a0, $zero, 1
 ; LA32-NEXT:    ret
 ; LA32-NEXT:  .LBB1_2: # %iffalse
 ; LA32-NEXT:    move $a0, $zero
@@ -67,10 +67,10 @@ define i32 @relax_b23(i1 %a) {
 ; LA64-NEXT:    bnez $a0, .LBB1_1
 ; LA64-NEXT:    b .LBB1_2
 ; LA64-NEXT:  .LBB1_1: # %iftrue
+; LA64-NEXT:    ori $a0, $zero, 1
 ; LA64-NEXT:    #APP
 ; LA64-NEXT:    .space 16777216
 ; LA64-NEXT:    #NO_APP
-; LA64-NEXT:    ori $a0, $zero, 1
 ; LA64-NEXT:    ret
 ; LA64-NEXT:  .LBB1_2: # %iffalse
 ; LA64-NEXT:    move $a0, $zero
@@ -97,10 +97,10 @@ define i32 @relax_b28(i1 %a) {
 ; LA32-NEXT:    addi.w $a0, $a0, %pc_lo12(.LBB2_2)
 ; LA32-NEXT:    jr $a0
 ; LA32-NEXT:  .LBB2_1: # %iftrue
+; LA32-NEXT:    ori $a0, $zero, 1
 ; LA32-NEXT:    #APP
 ; LA32-NEXT:    .space 536870912
 ; LA32-NEXT:    #NO_APP
-; LA32-NEXT:    ori $a0, $zero, 1
 ; LA32-NEXT:    addi.w $sp, $sp, 16
 ; LA32-NEXT:    ret
 ; LA32-NEXT:  .LBB2_2: # %iffalse
@@ -119,10 +119,10 @@ define i32 @relax_b28(i1 %a) {
 ; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(.LBB2_2)
 ; LA64-NEXT:    jr $a0
 ; LA64-NEXT:  .LBB2_1: # %iftrue
+; LA64-NEXT:    ori $a0, $zero, 1
 ; LA64-NEXT:    #APP
 ; LA64-NEXT:    .space 536870912
 ; LA64-NEXT:    #NO_APP
-; LA64-NEXT:    ori $a0, $zero, 1
 ; LA64-NEXT:    addi.d $sp, $sp, 16
 ; LA64-NEXT:    ret
 ; LA64-NEXT:  .LBB2_2: # %iffalse
diff --git a/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll b/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll
index c99adfbb0574fe..828fb933bf3c80 100644
--- a/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll
@@ -114,17 +114,17 @@ define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind {
 define i32 @pr55484(i32 %0) {
 ; LA32-LABEL: pr55484:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    slli.w $a1, $a0, 8
-; LA32-NEXT:    srli.w $a0, $a0, 8
-; LA32-NEXT:    or $a0, $a0, $a1
+; LA32-NEXT:    srli.w $a1, $a0, 8
+; LA32-NEXT:    slli.w $a0, $a0, 8
+; LA32-NEXT:    or $a0, $a1, $a0
 ; LA32-NEXT:    ext.w.h $a0, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pr55484:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a1, $a0, 8
-; LA64-NEXT:    srli.d $a0, $a0, 8
-; LA64-NEXT:    or $a0, $a0, $a1
+; LA64-NEXT:    srli.d $a1, $a0, 8
+; LA64-NEXT:    slli.d $a0, $a0, 8
+; LA64-NEXT:    or $a0, $a1, $a0
 ; LA64-NEXT:    ext.w.h $a0, $a0
 ; LA64-NEXT:    ret
   %2 = lshr i32 %0, 8
diff --git a/llvm/test/CodeGen/LoongArch/bswap.ll b/llvm/test/CodeGen/LoongArch/bswap.ll
index eb9107302ef68e..71095ab972e356 100644
--- a/llvm/test/CodeGen/LoongArch/bswap.ll
+++ b/llvm/test/CodeGen/LoongArch/bswap.ll
@@ -83,20 +83,20 @@ define i48 @test_bswap_i48(i48 %a) nounwind {
 define i80 @test_bswap_i80(i80 %a) nounwind {
 ; LA32-LABEL: test_bswap_i80:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    ld.w $a2, $a1, 0
+; LA32-NEXT:    ld.w $a2, $a1, 4
+; LA32-NEXT:    ld.w $a3, $a1, 8
+; LA32-NEXT:    ld.w $a1, $a1, 0
 ; LA32-NEXT:    revb.2h $a2, $a2
 ; LA32-NEXT:    rotri.w $a2, $a2, 16
-; LA32-NEXT:    ld.w $a3, $a1, 4
 ; LA32-NEXT:    revb.2h $a3, $a3
 ; LA32-NEXT:    rotri.w $a3, $a3, 16
-; LA32-NEXT:    bytepick.w $a4, $a3, $a2, 2
-; LA32-NEXT:    st.w $a4, $a0, 4
-; LA32-NEXT:    ld.w $a1, $a1, 8
+; LA32-NEXT:    bytepick.w $a3, $a3, $a2, 2
 ; LA32-NEXT:    revb.2h $a1, $a1
 ; LA32-NEXT:    rotri.w $a1, $a1, 16
-; LA32-NEXT:    bytepick.w $a1, $a1, $a3, 2
-; LA32-NEXT:    st.w $a1, $a0, 0
-; LA32-NEXT:    srli.w $a1, $a2, 16
+; LA32-NEXT:    bytepick.w $a2, $a2, $a1, 2
+; LA32-NEXT:    srli.w $a1, $a1, 16
+; LA32-NEXT:    st.w $a2, $a0, 4
+; LA32-NEXT:    st.w $a3, $a0, 0
 ; LA32-NEXT:    st.h $a1, $a0, 8
 ; LA32-NEXT:    ret
 ;
@@ -114,22 +114,22 @@ define i80 @test_bswap_i80(i80 %a) nounwind {
 define i128 @test_bswap_i128(i128 %a) nounwind {
 ; LA32-LABEL: test_bswap_i128:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    ld.w $a2, $a1, 0
-; LA32-NEXT:    revb.2h $a2, $a2
-; LA32-NEXT:    rotri.w $a2, $a2, 16
-; LA32-NEXT:    st.w $a2, $a0, 12
-; LA32-NEXT:    ld.w $a2, $a1, 4
+; LA32-NEXT:    ld.w $a2, $a1, 12
+; LA32-NEXT:    ld.w $a3, $a1, 0
+; LA32-NEXT:    ld.w $a4, $a1, 8
+; LA32-NEXT:    ld.w $a1, $a1, 4
 ; LA32-NEXT:    revb.2h $a2, $a2
 ; LA32-NEXT:    rotri.w $a2, $a2, 16
-; LA32-NEXT:    st.w $a2, $a0, 8
-; LA32-NEXT:    ld.w $a2, $a1, 8
-; LA32-NEXT:    revb.2h $a2, $a2
-; LA32-NEXT:    rotri.w $a2, $a2, 16
-; LA32-NEXT:    st.w $a2, $a0, 4
-; LA32-NEXT:    ld.w $a1, $a1, 12
+; LA32-NEXT:    revb.2h $a4, $a4
+; LA32-NEXT:    rotri.w $a4, $a4, 16
 ; LA32-NEXT:    revb.2h $a1, $a1
 ; LA32-NEXT:    rotri.w $a1, $a1, 16
-; LA32-NEXT:    st.w $a1, $a0, 0
+; LA32-NEXT:    revb.2h $a3, $a3
+; LA32-NEXT:    rotri.w $a3, $a3, 16
+; LA32-NEXT:    st.w $a3, $a0, 12
+; LA32-NEXT:    st.w $a1, $a0, 8
+; LA32-NEXT:    st.w $a4, $a0, 4
+; LA32-NEXT:    st.w $a2, $a0, 0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_bswap_i128:
diff --git a/llvm/test/CodeGen/LoongArch/bytepick.ll b/llvm/test/CodeGen/LoongArch/bytepick.ll
index 86148b374e70d8..1a2cd48448ba28 100644
--- a/llvm/test/CodeGen/LoongArch/bytepick.ll
+++ b/llvm/test/CodeGen/LoongArch/bytepick.ll
@@ -14,8 +14,8 @@ define i32 @pick_i32_1(i32 %a, i32 %b) {
 ;
 ; LA64-LABEL: pick_i32_1:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a0, $a0, 8
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 24
+; LA64-NEXT:    slli.d $a0, $a0, 8
 ; LA64-NEXT:    or $a0, $a1, $a0
 ; LA64-NEXT:    ret
   %1 = lshr i32 %b, 24
@@ -52,8 +52,8 @@ define i32 @pick_i32_2(i32 %a, i32 %b) {
 ;
 ; LA64-LABEL: pick_i32_2:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a0, $a0, 16
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 16
+; LA64-NEXT:    slli.d $a0, $a0, 16
 ; LA64-NEXT:    or $a0, $a1, $a0
 ; LA64-NEXT:    ret
   %1 = lshr i32 %b, 16
@@ -90,8 +90,8 @@ define i32 @pick_i32_3(i32 %a, i32 %b) {
 ;
 ; LA64-LABEL: pick_i32_3:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a0, $a0, 24
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 8
+; LA64-NEXT:    slli.d $a0, $a0, 24
 ; LA64-NEXT:    or $a0, $a1, $a0
 ; LA64-NEXT:    ret
   %1 = lshr i32 %b, 8
@@ -123,9 +123,8 @@ define signext i32 @pick_i32_3_sext(i32 %a, i32 %b) {
 define i64 @pick_i64_1(i64 %a, i64 %b) {
 ; LA32-LABEL: pick_i64_1:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bytepick.w $a2, $a3, $a0, 1
 ; LA32-NEXT:    bytepick.w $a1, $a0, $a1, 1
-; LA32-NEXT:    move $a0, $a2
+; LA32-NEXT:    bytepick.w $a0, $a3, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i64_1:
@@ -143,9 +142,8 @@ define i64 @pick_i64_1(i64 %a, i64 %b) {
 define i64 @pick_i64_2(i64 %a, i64 %b) {
 ; LA32-LABEL: pick_i64_2:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bytepick.w $a2, $a3, $a0, 2
 ; LA32-NEXT:    bytepick.w $a1, $a0, $a1, 2
-; LA32-NEXT:    move $a0, $a2
+; LA32-NEXT:    bytepick.w $a0, $a3, $a0, 2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i64_2:
@@ -163,9 +161,8 @@ define i64 @pick_i64_2(i64 %a, i64 %b) {
 define i64 @pick_i64_3(i64 %a, i64 %b) {
 ; LA32-LABEL: pick_i64_3:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bytepick.w $a2, $a3, $a0, 3
 ; LA32-NEXT:    bytepick.w $a1, $a0, $a1, 3
-; LA32-NEXT:    move $a0, $a2
+; LA32-NEXT:    bytepick.w $a0, $a3, $a0, 3
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i64_3:
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
index 08fff9f8ceedd4..06dfe00d908475 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
@@ -36,21 +36,21 @@ define i64 @caller_i128_in_regs() nounwind {
 define i64 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i128 %e, i64 %f, i128 %g, i64 %h) nounwind {
 ; CHECK-LABEL: callee_many_scalars:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    ld.d $t0, $sp, 0
-; CHECK-NEXT:    xor $a5, $a5, $t0
-; CHECK-NEXT:    xor $a4, $a4, $a7
-; CHECK-NEXT:    or $a4, $a4, $a5
-; CHECK-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; CHECK-NEXT:    ld.d $t0, $sp, 8
+; CHECK-NEXT:    ld.d $t1, $sp, 0
 ; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; CHECK-NEXT:    bstrpick.d $a2, $a2, 31, 0
 ; CHECK-NEXT:    add.d $a0, $a0, $a1
-; CHECK-NEXT:    bstrpick.d $a1, $a2, 31, 0
-; CHECK-NEXT:    add.d $a0, $a0, $a1
+; CHECK-NEXT:    add.d $a0, $a0, $a2
 ; CHECK-NEXT:    add.d $a0, $a0, $a3
-; CHECK-NEXT:    sltui $a1, $a4, 1
+; CHECK-NEXT:    xor $a1, $a5, $t1
+; CHECK-NEXT:    xor $a2, $a4, $a7
+; CHECK-NEXT:    or $a1, $a2, $a1
+; CHECK-NEXT:    sltui $a1, $a1, 1
 ; CHECK-NEXT:    add.d $a0, $a1, $a0
 ; CHECK-NEXT:    add.d $a0, $a0, $a6
-; CHECK-NEXT:    ld.d $a1, $sp, 8
-; CHECK-NEXT:    add.d $a0, $a0, $a1
+; CHECK-NEXT:    add.d $a0, $a0, $t0
 ; CHECK-NEXT:    ret
   %a_ext = zext i8 %a to i64
   %b_ext = zext i16 %b to i64
@@ -73,7 +73,6 @@ define i64 @caller_many_scalars() nounwind {
 ; CHECK-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
 ; CHECK-NEXT:    ori $a0, $zero, 8
 ; CHECK-NEXT:    st.d $a0, $sp, 8
-; CHECK-NEXT:    st.d $zero, $sp, 0
 ; CHECK-NEXT:    ori $a0, $zero, 1
 ; CHECK-NEXT:    ori $a1, $zero, 2
 ; CHECK-NEXT:    ori $a2, $zero, 3
@@ -81,6 +80,7 @@ define i64 @caller_many_scalars() nounwind {
 ; CHECK-NEXT:    ori $a4, $zero, 5
 ; CHECK-NEXT:    ori $a6, $zero, 6
 ; CHECK-NEXT:    ori $a7, $zero, 7
+; CHECK-NEXT:    st.d $zero, $sp, 0
 ; CHECK-NEXT:    move $a5, $zero
 ; CHECK-NEXT:    bl %plt(callee_many_scalars)
 ; CHECK-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
@@ -95,21 +95,21 @@ define i64 @caller_many_scalars() nounwind {
 define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind {
 ; CHECK-LABEL: callee_large_scalars:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    ld.d $a2, $a1, 24
-; CHECK-NEXT:    ld.d $a3, $a0, 24
-; CHECK-NEXT:    xor $a2, $a3, $a2
-; CHECK-NEXT:    ld.d $a3, $a1, 8
-; CHECK-NEXT:    ld.d $a4, $a0, 8
-; CHECK-NEXT:    xor $a3, $a4, $a3
-; CHECK-NEXT:    or $a2, $a3, $a2
-; CHECK-NEXT:    ld.d $a3, $a1, 16
-; CHECK-NEXT:    ld.d $a4, $a0, 16
-; CHECK-NEXT:    xor $a3, $a4, $a3
-; CHECK-NEXT:    ld.d $a1, $a1, 0
-; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    ld.d $a2, $a1, 0
+; CHECK-NEXT:    ld.d $a3, $a0, 0
+; CHECK-NEXT:    ld.d $a4, $a1, 8
+; CHECK-NEXT:    ld.d $a5, $a1, 24
+; CHECK-NEXT:    ld.d $a6, $a0, 24
+; CHECK-NEXT:    ld.d $a7, $a0, 8
+; CHECK-NEXT:    ld.d $a1, $a1, 16
+; CHECK-NEXT:    ld.d $a0, $a0, 16
+; CHECK-NEXT:    xor $a5, $a6, $a5
+; CHECK-NEXT:    xor $a4, $a7, $a4
+; CHECK-NEXT:    or $a4, $a4, $a5
 ; CHECK-NEXT:    xor $a0, $a0, $a1
-; CHECK-NEXT:    or $a0, $a0, $a3
-; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    xor $a1, $a3, $a2
+; CHECK-NEXT:    or $a0, $a1, $a0
+; CHECK-NEXT:    or $a0, $a0, $a4
 ; CHECK-NEXT:    sltui $a0, $a0, 1
 ; CHECK-NEXT:    ret
   %1 = icmp eq i256 %a, %b
@@ -122,18 +122,18 @@ define i64 @caller_large_scalars() nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi.d $sp, $sp, -80
 ; CHECK-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
-; CHECK-NEXT:    ori $a0, $zero, 2
-; CHECK-NEXT:    st.d $a0, $sp, 0
 ; CHECK-NEXT:    st.d $zero, $sp, 24
 ; CHECK-NEXT:    st.d $zero, $sp, 16
 ; CHECK-NEXT:    st.d $zero, $sp, 8
+; CHECK-NEXT:    ori $a0, $zero, 2
+; CHECK-NEXT:    st.d $a0, $sp, 0
 ; CHECK-NEXT:    st.d $zero, $sp, 56
 ; CHECK-NEXT:    st.d $zero, $sp, 48
 ; CHECK-NEXT:    st.d $zero, $sp, 40
-; CHECK-NEXT:    ori $a0, $zero, 1
-; CHECK-NEXT:    st.d $a0, $sp, 32
+; CHECK-NEXT:    ori $a2, $zero, 1
 ; CHECK-NEXT:    addi.d $a0, $sp, 32
 ; CHECK-NEXT:    addi.d $a1, $sp, 0
+; CHECK-NEXT:    st.d $a2, $sp, 32
 ; CHECK-NEXT:    bl %plt(callee_large_scalars)
 ; CHECK-NEXT:    ld.d $ra, $sp, 72 # 8-byte Folded Reload
 ; CHECK-NEXT:    addi.d $sp, $sp, 80
@@ -150,21 +150,21 @@ define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d,
 ; CHECK-LABEL: callee_large_scalars_exhausted_regs:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ld.d $a0, $sp, 8
-; CHECK-NEXT:    ld.d $a1, $a0, 24
-; CHECK-NEXT:    ld.d $a2, $a7, 24
+; CHECK-NEXT:    ld.d $a1, $a0, 0
+; CHECK-NEXT:    ld.d $a2, $a7, 0
+; CHECK-NEXT:    ld.d $a3, $a0, 8
+; CHECK-NEXT:    ld.d $a4, $a0, 24
+; CHECK-NEXT:    ld.d $a5, $a7, 24
+; CHECK-NEXT:    ld.d $a6, $a7, 8
+; CHECK-NEXT:    ld.d $a0, $a0, 16
+; CHECK-NEXT:    ld.d $a7, $a7, 16
+; CHECK-NEXT:    xor $a4, $a5, $a4
+; CHECK-NEXT:    xor $a3, $a6, $a3
+; CHECK-NEXT:    or $a3, $a3, $a4
+; CHECK-NEXT:    xor $a0, $a7, $a0
 ; CHECK-NEXT:    xor $a1, $a2, $a1
-; CHECK-NEXT:    ld.d $a2, $a0, 8
-; CHECK-NEXT:    ld.d $a3, $a7, 8
-; CHECK-NEXT:    xor $a2, $a3, $a2
-; CHECK-NEXT:    or $a1, $a2, $a1
-; CHECK-NEXT:    ld.d $a2, $a0, 16
-; CHECK-NEXT:    ld.d $a3, $a7, 16
-; CHECK-NEXT:    xor $a2, $a3, $a2
-; CHECK-NEXT:    ld.d $a0, $a0, 0
-; CHECK-NEXT:    ld.d $a3, $a7, 0
-; CHECK-NEXT:    xor $a0, $a3, $a0
-; CHECK-NEXT:    or $a0, $a0, $a2
-; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    or $a0, $a1, $a0
+; CHECK-NEXT:    or $a0, $a0, $a3
 ; CHECK-NEXT:    sltui $a0, $a0, 1
 ; CHECK-NEXT:    ret
   %1 = icmp eq i256 %h, %j
@@ -181,16 +181,15 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind {
 ; CHECK-NEXT:    st.d $a0, $sp, 8
 ; CHECK-NEXT:    ori $a0, $zero, 9
 ; CHECK-NEXT:    st.d $a0, $sp, 0
-; CHECK-NEXT:    ori $a0, $zero, 10
-; CHECK-NEXT:    st.d $a0, $sp, 16
 ; CHECK-NEXT:    st.d $zero, $sp, 40
 ; CHECK-NEXT:    st.d $zero, $sp, 32
 ; CHECK-NEXT:    st.d $zero, $sp, 24
+; CHECK-NEXT:    ori $a0, $zero, 10
+; CHECK-NEXT:    st.d $a0, $sp, 16
 ; CHECK-NEXT:    st.d $zero, $sp, 72
 ; CHECK-NEXT:    st.d $zero, $sp, 64
 ; CHECK-NEXT:    st.d $zero, $sp, 56
-; CHECK-NEXT:    ori $a0, $zero, 8
-; CHECK-NEXT:    st.d $a0, $sp, 48
+; CHECK-NEXT:    ori $t0, $zero, 8
 ; CHECK-NEXT:    ori $a0, $zero, 1
 ; CHECK-NEXT:    ori $a1, $zero, 2
 ; CHECK-NEXT:    ori $a2, $zero, 3
@@ -199,6 +198,7 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind {
 ; CHECK-NEXT:    ori $a5, $zero, 6
 ; CHECK-NEXT:    ori $a6, $zero, 7
 ; CHECK-NEXT:    addi.d $a7, $sp, 48
+; CHECK-NEXT:    st.d $t0, $sp, 48
 ; CHECK-NEXT:    bl %plt(callee_large_scalars_exhausted_regs)
 ; CHECK-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
 ; CHECK-NEXT:    addi.d $sp, $sp, 96
@@ -216,9 +216,9 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind {
 define i64 @callee_large_struct(ptr byval(%struct.large) align 8 %a) nounwind {
 ; CHECK-LABEL: callee_large_struct:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    ld.d $a1, $a0, 24
-; CHECK-NEXT:    ld.d $a0, $a0, 0
-; CHECK-NEXT:    add.d $a0, $a0, $a1
+; CHECK-NEXT:    ld.d $a1, $a0, 0
+; CHECK-NEXT:    ld.d $a0, $a0, 24
+; CHECK-NEXT:    add.d $a0, $a1, $a0
 ; CHECK-NEXT:    ret
   %1 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 0
   %2 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 3
@@ -235,16 +235,16 @@ define i64 @caller_large_struct() nounwind {
 ; CHECK-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
 ; CHECK-NEXT:    ori $a0, $zero, 1
 ; CHECK-NEXT:    st.d $a0, $sp, 40
+; CHECK-NEXT:    ori $a1, $zero, 2
+; CHECK-NEXT:    st.d $a1, $sp, 48
+; CHECK-NEXT:    ori $a2, $zero, 3
+; CHECK-NEXT:    st.d $a2, $sp, 56
+; CHECK-NEXT:    ori $a3, $zero, 4
+; CHECK-NEXT:    st.d $a3, $sp, 64
 ; CHECK-NEXT:    st.d $a0, $sp, 8
-; CHECK-NEXT:    ori $a0, $zero, 2
-; CHECK-NEXT:    st.d $a0, $sp, 48
-; CHECK-NEXT:    st.d $a0, $sp, 16
-; CHECK-NEXT:    ori $a0, $zero, 3
-; CHECK-NEXT:    st.d $a0, $sp, 56
-; CHECK-NEXT:    st.d $a0, $sp, 24
-; CHECK-NEXT:    ori $a0, $zero, 4
-; CHECK-NEXT:    st.d $a0, $sp, 64
-; CHECK-NEXT:    st.d $a0, $sp, 32
+; CHECK-NEXT:    st.d $a1, $sp, 16
+; CHECK-NEXT:    st.d $a2, $sp, 24
+; CHECK-NEXT:    st.d $a3, $sp, 32
 ; CHECK-NEXT:    addi.d $a0, $sp, 8
 ; CHECK-NEXT:    bl %plt(callee_large_struct)
 ; CHECK-NEXT:    ld.d $ra, $sp, 72 # 8-byte Folded Reload
@@ -359,14 +359,14 @@ define void @caller_large_scalar_ret() nounwind {
 define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result) nounwind {
 ; CHECK-LABEL: callee_large_struct_ret:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    ori $a1, $zero, 4
-; CHECK-NEXT:    st.d $a1, $a0, 24
-; CHECK-NEXT:    ori $a1, $zero, 3
-; CHECK-NEXT:    st.d $a1, $a0, 16
-; CHECK-NEXT:    ori $a1, $zero, 2
-; CHECK-NEXT:    st.d $a1, $a0, 8
 ; CHECK-NEXT:    ori $a1, $zero, 1
 ; CHECK-NEXT:    st.d $a1, $a0, 0
+; CHECK-NEXT:    ori $a1, $zero, 2
+; CHECK-NEXT:    st.d $a1, $a0, 8
+; CHECK-NEXT:    ori $a1, $zero, 3
+; CHECK-NEXT:    st.d $a1, $a0, 16
+; CHECK-NEXT:    ori $a1, $zero, 4
+; CHECK-NEXT:    st.d $a1, $a0, 24
 ; CHECK-NEXT:    ret
   %a = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 0
   store i64 1, ptr %a, align 4
@@ -386,9 +386,9 @@ define i64 @caller_large_struct_ret() nounwind {
 ; CHECK-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; CHECK-NEXT:    addi.d $a0, $sp, 8
 ; CHECK-NEXT:    bl %plt(callee_large_struct_ret)
-; CHECK-NEXT:    ld.d $a0, $sp, 32
-; CHECK-NEXT:    ld.d $a1, $sp, 8
-; CHECK-NEXT:    add.d $a0, $a1, $a0
+; CHECK-NEXT:    ld.d $a0, $sp, 8
+; CHECK-NEXT:    ld.d $a1, $sp, 32
+; CHECK-NEXT:    add.d $a0, $a0, $a1
 ; CHECK-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
 ; CHECK-NEXT:    addi.d $sp, $sp, 48
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
index ceb38876c384a4..5e89fd41d6a7c7 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
@@ -11,10 +11,10 @@ define i64 @callee_float_in_fpr(i64 %a, float %b, double %c) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ftintrz.l.s $fa0, $fa0
 ; CHECK-NEXT:    movfr2gr.d $a1, $fa0
-; CHECK-NEXT:    add.d $a0, $a0, $a1
 ; CHECK-NEXT:    ftintrz.l.d $fa0, $fa1
-; CHECK-NEXT:    movfr2gr.d $a1, $fa0
+; CHECK-NEXT:    movfr2gr.d $a2, $fa0
 ; CHECK-NEXT:    add.d $a0, $a0, $a1
+; CHECK-NEXT:    add.d $a0, $a0, $a2
 ; CHECK-NEXT:    ret
   %b_fptosi = fptosi float %b to i64
   %c_fptosi = fptosi double %c to i64
@@ -45,12 +45,12 @@ define i64 @caller_float_in_fpr() nounwind {
 define i64 @callee_double_in_gpr_exhausted_fprs(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) nounwind {
 ; CHECK-LABEL: callee_double_in_gpr_exhausted_fprs:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    ftintrz.l.d $fa0, $fa7
-; CHECK-NEXT:    movfr2gr.d $a1, $fa0
 ; CHECK-NEXT:    movgr2fr.d $fa0, $a0
+; CHECK-NEXT:    ftintrz.l.d $fa1, $fa7
+; CHECK-NEXT:    movfr2gr.d $a0, $fa1
 ; CHECK-NEXT:    ftintrz.l.d $fa0, $fa0
-; CHECK-NEXT:    movfr2gr.d $a0, $fa0
-; CHECK-NEXT:    add.d $a0, $a1, $a0
+; CHECK-NEXT:    movfr2gr.d $a1, $fa0
+; CHECK-NEXT:    add.d $a0, $a0, $a1
 ; CHECK-NEXT:    ret
   %h_fptosi = fptosi double %h to i64
   %i_fptosi = fptosi double %i to i64
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll
index d738c066e1ad36..f577435114967b 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll
@@ -54,12 +54,12 @@ define i64 @caller_float_on_stack() nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi.d $sp, $sp, -16
 ; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; CHECK-NEXT:    lu12i.w $a0, 264704
-; CHECK-NEXT:    st.d $a0, $sp, 0
+; CHECK-NEXT:    lu12i.w $a1, 264704
 ; CHECK-NEXT:    ori $a0, $zero, 1
 ; CHECK-NEXT:    ori $a2, $zero, 2
 ; CHECK-NEXT:    ori $a4, $zero, 3
 ; CHECK-NEXT:    ori $a6, $zero, 4
+; CHECK-NEXT:    st.d $a1, $sp, 0
 ; CHECK-NEXT:    move $a1, $zero
 ; CHECK-NEXT:    move $a3, $zero
 ; CHECK-NEXT:    move $a5, $zero
diff --git a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
index af24ae64b7c741..a3d85f6bd06d8f 100644
--- a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
+++ b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
@@ -25,29 +25,29 @@ define dso_local noundef signext i32 @main() nounwind {
 ; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI0_1)
 ; CHECK-NEXT:    xvld $xr1, $a0, 0
 ; CHECK-NEXT:    xvst $xr1, $sp, 64 # 32-byte Folded Spill
-; CHECK-NEXT:    xvst $xr1, $sp, 224
-; CHECK-NEXT:    xvst $xr0, $sp, 192
 ; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI0_2)
 ; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI0_2)
-; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; CHECK-NEXT:    xvst $xr0, $sp, 160
+; CHECK-NEXT:    xvld $xr2, $a0, 0
+; CHECK-NEXT:    xvst $xr2, $sp, 32 # 32-byte Folded Spill
 ; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI0_3)
 ; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI0_3)
-; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvst $xr0, $sp, 0 # 32-byte Folded Spill
+; CHECK-NEXT:    xvld $xr3, $a0, 0
+; CHECK-NEXT:    xvst $xr3, $sp, 0 # 32-byte Folded Spill
 ; CHECK-NEXT:    xvst $xr0, $sp, 128
+; CHECK-NEXT:    xvst $xr1, $sp, 160
+; CHECK-NEXT:    xvst $xr2, $sp, 192
+; CHECK-NEXT:    xvst $xr3, $sp, 224
 ; CHECK-NEXT:    addi.d $fp, $sp, 128
 ; CHECK-NEXT:    move $a0, $fp
 ; CHECK-NEXT:    bl %plt(foo)
-; CHECK-NEXT:    xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; CHECK-NEXT:    xvst $xr0, $sp, 224
 ; CHECK-NEXT:    xvld $xr0, $sp, 96 # 32-byte Folded Reload
-; CHECK-NEXT:    xvst $xr0, $sp, 192
-; CHECK-NEXT:    xvld $xr0, $sp, 32 # 32-byte Folded Reload
+; CHECK-NEXT:    xvst $xr0, $sp, 128
+; CHECK-NEXT:    xvld $xr0, $sp, 64 # 32-byte Folded Reload
 ; CHECK-NEXT:    xvst $xr0, $sp, 160
+; CHECK-NEXT:    xvld $xr0, $sp, 32 # 32-byte Folded Reload
+; CHECK-NEXT:    xvst $xr0, $sp, 192
 ; CHECK-NEXT:    xvld $xr0, $sp, 0 # 32-byte Folded Reload
-; CHECK-NEXT:    xvst $xr0, $sp, 128
+; CHECK-NEXT:    xvst $xr0, $sp, 224
 ; CHECK-NEXT:    move $a0, $fp
 ; CHECK-NEXT:    bl %plt(bar)
 ; CHECK-NEXT:    move $a0, $zero
diff --git a/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir b/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir
index c5a6da72389f47..b8b358eeadc1df 100644
--- a/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir
+++ b/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir
@@ -17,8 +17,8 @@ body:             |
     ; CHECK: liveins: $fcc0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $fcc1 = PseudoCopyCFR $fcc0
-    ; CHECK-NEXT: $r4 = MOVCF2GR killed $fcc1
-    ; CHECK-NEXT: PseudoRET implicit killed $r4
+    ; CHECK-NEXT: $r4 = MOVCF2GR $fcc1
+    ; CHECK-NEXT: PseudoRET implicit $r4
     $fcc1 = COPY $fcc0
     $r4 = COPY $fcc1
     PseudoRET implicit killed $r4
diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
index fa4fda9b8972bf..9fa3f5076bb221 100644
--- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -201,10 +201,10 @@ define i8 @test_ctpop_i8(i8 %a) nounwind {
 define i16 @test_ctpop_i16(i16 %a) nounwind {
 ; LA32-LABEL: test_ctpop_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a1, 5
-; LA32-NEXT:    ori $a1, $a1, 1365
-; LA32-NEXT:    srli.w $a2, $a0, 1
-; LA32-NEXT:    and $a1, $a2, $a1
+; LA32-NEXT:    srli.w $a1, $a0, 1
+; LA32-NEXT:    lu12i.w $a2, 5
+; LA32-NEXT:    ori $a2, $a2, 1365
+; LA32-NEXT:    and $a1, $a1, $a2
 ; LA32-NEXT:    sub.w $a0, $a0, $a1
 ; LA32-NEXT:    lu12i.w $a1, 3
 ; LA32-NEXT:    ori $a1, $a1, 819
@@ -221,10 +221,10 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
 ;
 ; LA64-LABEL: test_ctpop_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a1, 5
-; LA64-NEXT:    ori $a1, $a1, 1365
-; LA64-NEXT:    srli.d $a2, $a0, 1
-; LA64-NEXT:    and $a1, $a2, $a1
+; LA64-NEXT:    srli.d $a1, $a0, 1
+; LA64-NEXT:    lu12i.w $a2, 5
+; LA64-NEXT:    ori $a2, $a2, 1365
+; LA64-NEXT:    and $a1, $a1, $a2
 ; LA64-NEXT:    sub.d $a0, $a0, $a1
 ; LA64-NEXT:    lu12i.w $a1, 3
 ; LA64-NEXT:    ori $a1, $a1, 819
@@ -245,10 +245,10 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
 define i32 @test_ctpop_i32(i32 %a) nounwind {
 ; LA32-LABEL: test_ctpop_i32:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a1, 349525
-; LA32-NEXT:    ori $a1, $a1, 1365
-; LA32-NEXT:    srli.w $a2, $a0, 1
-; LA32-NEXT:    and $a1, $a2, $a1
+; LA32-NEXT:    srli.w $a1, $a0, 1
+; LA32-NEXT:    lu12i.w $a2, 349525
+; LA32-NEXT:    ori $a2, $a2, 1365
+; LA32-NEXT:    and $a1, $a1, $a2
 ; LA32-NEXT:    sub.w $a0, $a0, $a1
 ; LA32-NEXT:    lu12i.w $a1, 209715
 ; LA32-NEXT:    ori $a1, $a1, 819
@@ -269,10 +269,10 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
 ;
 ; LA64-LABEL: test_ctpop_i32:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a1, 349525
-; LA64-NEXT:    ori $a1, $a1, 1365
-; LA64-NEXT:    srli.d $a2, $a0, 1
-; LA64-NEXT:    and $a1, $a2, $a1
+; LA64-NEXT:    srli.d $a1, $a0, 1
+; LA64-NEXT:    lu12i.w $a2, 349525
+; LA64-NEXT:    ori $a2, $a2, 1365
+; LA64-NEXT:    and $a1, $a1, $a2
 ; LA64-NEXT:    sub.d $a0, $a0, $a1
 ; LA64-NEXT:    lu12i.w $a1, 209715
 ; LA64-NEXT:    ori $a1, $a1, 819
@@ -297,37 +297,37 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
 define i64 @test_ctpop_i64(i64 %a) nounwind {
 ; LA32-LABEL: test_ctpop_i64:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 349525
-; LA32-NEXT:    ori $a2, $a2, 1365
-; LA32-NEXT:    srli.w $a3, $a0, 1
-; LA32-NEXT:    and $a3, $a3, $a2
-; LA32-NEXT:    sub.w $a0, $a0, $a3
-; LA32-NEXT:    lu12i.w $a3, 209715
-; LA32-NEXT:    ori $a3, $a3, 819
-; LA32-NEXT:    and $a4, $a0, $a3
-; LA32-NEXT:    srli.w $a0, $a0, 2
-; LA32-NEXT:    and $a0, $a0, $a3
-; LA32-NEXT:    add.w $a0, $a4, $a0
-; LA32-NEXT:    srli.w $a4, $a1, 1
-; LA32-NEXT:    and $a2, $a4, $a2
+; LA32-NEXT:    srli.w $a2, $a1, 1
+; LA32-NEXT:    lu12i.w $a3, 349525
+; LA32-NEXT:    ori $a3, $a3, 1365
+; LA32-NEXT:    and $a2, $a2, $a3
 ; LA32-NEXT:    sub.w $a1, $a1, $a2
-; LA32-NEXT:    srli.w $a2, $a0, 4
-; LA32-NEXT:    add.w $a0, $a0, $a2
-; LA32-NEXT:    and $a2, $a1, $a3
+; LA32-NEXT:    lu12i.w $a2, 209715
+; LA32-NEXT:    ori $a2, $a2, 819
+; LA32-NEXT:    and $a4, $a1, $a2
 ; LA32-NEXT:    srli.w $a1, $a1, 2
-; LA32-NEXT:    and $a1, $a1, $a3
-; LA32-NEXT:    add.w $a1, $a2, $a1
-; LA32-NEXT:    srli.w $a2, $a1, 4
-; LA32-NEXT:    add.w $a1, $a1, $a2
-; LA32-NEXT:    lu12i.w $a2, 61680
-; LA32-NEXT:    ori $a2, $a2, 3855
 ; LA32-NEXT:    and $a1, $a1, $a2
-; LA32-NEXT:    and $a0, $a0, $a2
-; LA32-NEXT:    lu12i.w $a2, 4112
-; LA32-NEXT:    ori $a2, $a2, 257
-; LA32-NEXT:    mul.w $a0, $a0, $a2
-; LA32-NEXT:    mul.w $a1, $a1, $a2
+; LA32-NEXT:    add.w $a1, $a4, $a1
+; LA32-NEXT:    srli.w $a4, $a1, 4
+; LA32-NEXT:    add.w $a1, $a1, $a4
+; LA32-NEXT:    lu12i.w $a4, 61680
+; LA32-NEXT:    ori $a4, $a4, 3855
+; LA32-NEXT:    and $a1, $a1, $a4
+; LA32-NEXT:    lu12i.w $a5, 4112
+; LA32-NEXT:    ori $a5, $a5, 257
+; LA32-NEXT:    mul.w $a1, $a1, $a5
 ; LA32-NEXT:    srli.w $a1, $a1, 24
+; LA32-NEXT:    srli.w $a6, $a0, 1
+; LA32-NEXT:    and $a3, $a6, $a3
+; LA32-NEXT:    sub.w $a0, $a0, $a3
+; LA32-NEXT:    and $a3, $a0, $a2
+; LA32-NEXT:    srli.w $a0, $a0, 2
+; LA32-NEXT:    and $a0, $a0, $a2
+; LA32-NEXT:    add.w $a0, $a3, $a0
+; LA32-NEXT:    srli.w $a2, $a0, 4
+; LA32-NEXT:    add.w $a0, $a0, $a2
+; LA32-NEXT:    and $a0, $a0, $a4
+; LA32-NEXT:    mul.w $a0, $a0, $a5
 ; LA32-NEXT:    srli.w $a0, $a0, 24
 ; LA32-NEXT:    add.w $a0, $a0, $a1
 ; LA32-NEXT:    move $a1, $zero
@@ -335,12 +335,12 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
 ;
 ; LA64-LABEL: test_ctpop_i64:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a1, 349525
-; LA64-NEXT:    ori $a1, $a1, 1365
-; LA64-NEXT:    lu32i.d $a1, 349525
-; LA64-NEXT:    lu52i.d $a1, $a1, 1365
-; LA64-NEXT:    srli.d $a2, $a0, 1
-; LA64-NEXT:    and $a1, $a2, $a1
+; LA64-NEXT:    srli.d $a1, $a0, 1
+; LA64-NEXT:    lu12i.w $a2, 349525
+; LA64-NEXT:    ori $a2, $a2, 1365
+; LA64-NEXT:    lu32i.d $a2, 349525
+; LA64-NEXT:    lu52i.d $a2, $a2, 1365
+; LA64-NEXT:    and $a1, $a1, $a2
 ; LA64-NEXT:    sub.d $a0, $a0, $a1
 ; LA64-NEXT:    lu12i.w $a1, 209715
 ; LA64-NEXT:    ori $a1, $a1, 819
diff --git a/llvm/test/CodeGen/LoongArch/fcopysign.ll b/llvm/test/CodeGen/LoongArch/fcopysign.ll
index 181130d2c6a5f0..49e8fbca3e12ed 100644
--- a/llvm/test/CodeGen/LoongArch/fcopysign.ll
+++ b/llvm/test/CodeGen/LoongArch/fcopysign.ll
@@ -73,10 +73,10 @@ define double @fold_promote_d_s(double %a, float %b) nounwind {
 ;
 ; LA64F-LABEL: fold_promote_d_s:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    lu12i.w $a1, -524288
-; LA64F-NEXT:    lu32i.d $a1, 0
-; LA64F-NEXT:    movfr2gr.s $a2, $fa0
-; LA64F-NEXT:    and $a1, $a2, $a1
+; LA64F-NEXT:    movfr2gr.s $a1, $fa0
+; LA64F-NEXT:    lu12i.w $a2, -524288
+; LA64F-NEXT:    lu32i.d $a2, 0
+; LA64F-NEXT:    and $a1, $a1, $a2
 ; LA64F-NEXT:    slli.d $a1, $a1, 32
 ; LA64F-NEXT:    bstrins.d $a1, $a0, 62, 0
 ; LA64F-NEXT:    move $a0, $a1
diff --git a/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll b/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll
index 432cedff6d8319..5e4c8418b222b2 100644
--- a/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll
+++ b/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll
@@ -5,21 +5,21 @@
 define void @getSetCCResultType(ptr %p) {
 ; CHECK-LABEL: getSetCCResultType:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    ld.w $a1, $a0, 12
-; CHECK-NEXT:    sltui $a1, $a1, 1
-; CHECK-NEXT:    sub.d $a1, $zero, $a1
-; CHECK-NEXT:    st.w $a1, $a0, 12
-; CHECK-NEXT:    ld.w $a1, $a0, 8
-; CHECK-NEXT:    sltui $a1, $a1, 1
-; CHECK-NEXT:    sub.d $a1, $zero, $a1
-; CHECK-NEXT:    st.w $a1, $a0, 8
-; CHECK-NEXT:    ld.w $a1, $a0, 4
-; CHECK-NEXT:    sltui $a1, $a1, 1
-; CHECK-NEXT:    sub.d $a1, $zero, $a1
-; CHECK-NEXT:    st.w $a1, $a0, 4
 ; CHECK-NEXT:    ld.w $a1, $a0, 0
+; CHECK-NEXT:    ld.w $a2, $a0, 12
+; CHECK-NEXT:    ld.w $a3, $a0, 4
+; CHECK-NEXT:    ld.w $a4, $a0, 8
 ; CHECK-NEXT:    sltui $a1, $a1, 1
 ; CHECK-NEXT:    sub.d $a1, $zero, $a1
+; CHECK-NEXT:    sltui $a3, $a3, 1
+; CHECK-NEXT:    sub.d $a3, $zero, $a3
+; CHECK-NEXT:    sltui $a4, $a4, 1
+; CHECK-NEXT:    sub.d $a4, $zero, $a4
+; CHECK-NEXT:    sltui $a2, $a2, 1
+; CHECK-NEXT:    sub.d $a2, $zero, $a2
+; CHECK-NEXT:    st.w $a2, $a0, 12
+; CHECK-NEXT:    st.w $a4, $a0, 8
+; CHECK-NEXT:    st.w $a3, $a0, 4
 ; CHECK-NEXT:    st.w $a1, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/ghc-cc.ll b/llvm/test/CodeGen/LoongArch/ghc-cc.ll
index 0ab125e875b996..735315d323a362 100644
--- a/llvm/test/CodeGen/LoongArch/ghc-cc.ll
+++ b/llvm/test/CodeGen/LoongArch/ghc-cc.ll
@@ -26,57 +26,57 @@
 define ghccc void @foo() nounwind {
 ; LA64-LABEL: foo:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(base)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(base)
-; LA64-NEXT:    ld.d $s0, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(sp)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(sp)
-; LA64-NEXT:    ld.d $s1, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(hp)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(hp)
-; LA64-NEXT:    ld.d $s2, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r1)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r1)
-; LA64-NEXT:    ld.d $s3, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r2)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r2)
-; LA64-NEXT:    ld.d $s4, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r3)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r3)
-; LA64-NEXT:    ld.d $s5, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r4)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r4)
-; LA64-NEXT:    ld.d $s6, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r5)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r5)
-; LA64-NEXT:    ld.d $s7, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(splim)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(splim)
-; LA64-NEXT:    ld.d $s8, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f1)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f1)
-; LA64-NEXT:    fld.s $fs0, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f2)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f2)
-; LA64-NEXT:    fld.s $fs1, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f3)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f3)
-; LA64-NEXT:    fld.s $fs2, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f4)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f4)
-; LA64-NEXT:    fld.s $fs3, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(d1)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(d1)
-; LA64-NEXT:    fld.d $fs4, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(d2)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(d2)
-; LA64-NEXT:    fld.d $fs5, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(d3)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(d3)
-; LA64-NEXT:    fld.d $fs6, $a0, 0
 ; LA64-NEXT:    pcalau12i $a0, %pc_hi20(d4)
 ; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(d4)
 ; LA64-NEXT:    fld.d $fs7, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(d3)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(d3)
+; LA64-NEXT:    fld.d $fs6, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(d2)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(d2)
+; LA64-NEXT:    fld.d $fs5, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(d1)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(d1)
+; LA64-NEXT:    fld.d $fs4, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f4)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f4)
+; LA64-NEXT:    fld.s $fs3, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f3)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f3)
+; LA64-NEXT:    fld.s $fs2, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f2)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f2)
+; LA64-NEXT:    fld.s $fs1, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f1)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f1)
+; LA64-NEXT:    fld.s $fs0, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(splim)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(splim)
+; LA64-NEXT:    ld.d $s8, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r5)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r5)
+; LA64-NEXT:    ld.d $s7, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r4)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r4)
+; LA64-NEXT:    ld.d $s6, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r3)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r3)
+; LA64-NEXT:    ld.d $s5, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r2)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r2)
+; LA64-NEXT:    ld.d $s4, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r1)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r1)
+; LA64-NEXT:    ld.d $s3, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(hp)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(hp)
+; LA64-NEXT:    ld.d $s2, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(sp)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(sp)
+; LA64-NEXT:    ld.d $s1, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(base)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(base)
+; LA64-NEXT:    ld.d $s0, $a0, 0
 ; LA64-NEXT:    b %plt(bar)
 
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll b/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll
index ff845cb1f3dbd9..06ef4d2f6c151d 100644
--- a/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll
+++ b/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll
@@ -13,17 +13,17 @@ define void @box(ptr noalias nocapture noundef writeonly sret(%Box) align 16 der
 ; CHECK-NEXT:    addi.d $a2, $sp, 0
 ; CHECK-NEXT:    add.d $a3, $a2, $a1
 ; CHECK-NEXT:    ldx.d $a1, $a1, $a2
+; CHECK-NEXT:    ld.d $a2, $a3, 40
 ; CHECK-NEXT:    st.d $a1, $a0, 0
-; CHECK-NEXT:    ld.d $a1, $a3, 40
-; CHECK-NEXT:    st.d $a1, $a0, 40
+; CHECK-NEXT:    st.d $a2, $a0, 40
 ; CHECK-NEXT:    ld.d $a1, $a3, 32
+; CHECK-NEXT:    ld.d $a2, $a3, 24
+; CHECK-NEXT:    ld.d $a4, $a3, 16
+; CHECK-NEXT:    ld.d $a3, $a3, 8
 ; CHECK-NEXT:    st.d $a1, $a0, 32
-; CHECK-NEXT:    ld.d $a1, $a3, 24
-; CHECK-NEXT:    st.d $a1, $a0, 24
-; CHECK-NEXT:    ld.d $a1, $a3, 16
-; CHECK-NEXT:    st.d $a1, $a0, 16
-; CHECK-NEXT:    ld.d $a1, $a3, 8
-; CHECK-NEXT:    st.d $a1, $a0, 8
+; CHECK-NEXT:    st.d $a2, $a0, 24
+; CHECK-NEXT:    st.d $a4, $a0, 16
+; CHECK-NEXT:    st.d $a3, $a0, 8
 ; CHECK-NEXT:    addi.d $sp, $sp, 96
 ; CHECK-NEXT:    ret
   %1 = alloca [2 x %Box], align 16
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll
index b43ed7859cde8f..b3e32cc5c00c64 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll
@@ -285,19 +285,19 @@ define signext i32 @and_i32_0xfff0(i32 %a) {
 define signext i32 @and_i32_0xfff0_twice(i32 %a, i32 %b) {
 ; LA32-LABEL: and_i32_0xfff0_twice:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 4
-; LA32-NEXT:    slli.w $a1, $a1, 4
 ; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 4
 ; LA32-NEXT:    slli.w $a0, $a0, 4
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 4
+; LA32-NEXT:    slli.w $a1, $a1, 4
 ; LA32-NEXT:    sub.w $a0, $a0, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: and_i32_0xfff0_twice:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 4
-; LA64-NEXT:    slli.d $a1, $a1, 4
 ; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 4
 ; LA64-NEXT:    slli.d $a0, $a0, 4
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 4
+; LA64-NEXT:    slli.d $a1, $a1, 4
 ; LA64-NEXT:    sub.d $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %c = and i32 %a, 65520
@@ -326,21 +326,21 @@ define i64 @and_i64_0xfff0(i64 %a) {
 define i64 @and_i64_0xfff0_twice(i64 %a, i64 %b) {
 ; LA32-LABEL: and_i64_0xfff0_twice:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a2, 15, 4
-; LA32-NEXT:    slli.w $a1, $a1, 4
 ; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 4
+; LA32-NEXT:    slli.w $a1, $a0, 4
+; LA32-NEXT:    bstrpick.w $a0, $a2, 15, 4
 ; LA32-NEXT:    slli.w $a2, $a0, 4
-; LA32-NEXT:    sub.w $a0, $a2, $a1
-; LA32-NEXT:    sltu $a1, $a2, $a1
+; LA32-NEXT:    sub.w $a0, $a1, $a2
+; LA32-NEXT:    sltu $a1, $a1, $a2
 ; LA32-NEXT:    sub.w $a1, $zero, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: and_i64_0xfff0_twice:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 4
-; LA64-NEXT:    slli.d $a1, $a1, 4
 ; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 4
 ; LA64-NEXT:    slli.d $a0, $a0, 4
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 4
+; LA64-NEXT:    slli.d $a1, $a1, 4
 ; LA64-NEXT:    sub.d $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %c = and i64 %a, 65520
@@ -390,14 +390,14 @@ define i64 @and_i64_0xfff0_multiple_times(i64 %a, i64 %b, i64 %c) {
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    lu12i.w $a1, 15
 ; LA32-NEXT:    ori $a1, $a1, 4080
-; LA32-NEXT:    and $a3, $a0, $a1
-; LA32-NEXT:    and $a0, $a4, $a1
-; LA32-NEXT:    and $a1, $a2, $a1
-; LA32-NEXT:    mul.w $a0, $a1, $a0
-; LA32-NEXT:    sub.w $a2, $a3, $a1
-; LA32-NEXT:    xor $a0, $a2, $a0
-; LA32-NEXT:    sltu $a1, $a3, $a1
+; LA32-NEXT:    and $a0, $a0, $a1
+; LA32-NEXT:    and $a2, $a2, $a1
+; LA32-NEXT:    and $a3, $a4, $a1
+; LA32-NEXT:    sltu $a1, $a0, $a2
 ; LA32-NEXT:    sub.w $a1, $zero, $a1
+; LA32-NEXT:    sub.w $a0, $a0, $a2
+; LA32-NEXT:    mul.w $a2, $a2, $a3
+; LA32-NEXT:    xor $a0, $a0, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: and_i64_0xfff0_multiple_times:
@@ -405,11 +405,11 @@ define i64 @and_i64_0xfff0_multiple_times(i64 %a, i64 %b, i64 %c) {
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4080
 ; LA64-NEXT:    and $a0, $a0, $a3
-; LA64-NEXT:    and $a2, $a2, $a3
 ; LA64-NEXT:    and $a1, $a1, $a3
-; LA64-NEXT:    mul.d $a2, $a1, $a2
+; LA64-NEXT:    and $a2, $a2, $a3
 ; LA64-NEXT:    sub.d $a0, $a0, $a1
-; LA64-NEXT:    xor $a0, $a0, $a2
+; LA64-NEXT:    mul.d $a1, $a1, $a2
+; LA64-NEXT:    xor $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %d = and i64 %a, 65520
   %e = and i64 %b, 65520
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll
index 0d8e7127d0df8b..d4f766d460d107 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll
@@ -77,8 +77,8 @@ define i64 @ashr_i64(i64 %x, i64 %y) {
 ; LA32-NEXT:    slli.w $a6, $a1, 1
 ; LA32-NEXT:    sll.w $a2, $a6, $a2
 ; LA32-NEXT:    or $a0, $a0, $a2
-; LA32-NEXT:    sra.w $a1, $a1, $a4
 ; LA32-NEXT:    maskeqz $a0, $a0, $a5
+; LA32-NEXT:    sra.w $a1, $a1, $a4
 ; LA32-NEXT:    masknez $a1, $a1, $a5
 ; LA32-NEXT:    or $a0, $a0, $a1
 ; LA32-NEXT:    move $a1, $a3
@@ -154,8 +154,8 @@ define i32 @ashr_i32_3(i32 %x) {
 define i64 @ashr_i64_3(i64 %x) {
 ; LA32-LABEL: ashr_i64_3:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a0, $a0, 3
 ; LA32-NEXT:    slli.w $a2, $a1, 29
+; LA32-NEXT:    srli.w $a0, $a0, 3
 ; LA32-NEXT:    or $a0, $a0, $a2
 ; LA32-NEXT:    srai.w $a1, $a1, 3
 ; LA32-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
index 31ecec6ea8051b..06ad89972b849b 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
@@ -4,17 +4,17 @@
 define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i8_acquire_acquire:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    ori $a4, $zero, 255
+; LA64-NEXT:    sll.w $a4, $a4, $a3
+; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    andi $a2, $a2, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    ori $a4, $zero, 255
-; LA64-NEXT:    sll.w $a3, $a4, $a3
-; LA64-NEXT:    addi.w $a3, $a3, 0
-; LA64-NEXT:    addi.w $a2, $a2, 0
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    addi.w $a3, $a4, 0
 ; LA64-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a3
@@ -36,18 +36,18 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i16_acquire_acquire:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a4, 15
+; LA64-NEXT:    ori $a4, $a4, 4095
+; LA64-NEXT:    sll.w $a4, $a4, $a3
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    lu12i.w $a4, 15
-; LA64-NEXT:    ori $a4, $a4, 4095
-; LA64-NEXT:    sll.w $a3, $a4, $a3
-; LA64-NEXT:    addi.w $a3, $a3, 0
-; LA64-NEXT:    addi.w $a2, $a2, 0
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    addi.w $a3, $a4, 0
 ; LA64-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a3
@@ -108,17 +108,17 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
 define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i8_acquire_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    ori $a4, $zero, 255
+; LA64-NEXT:    sll.w $a4, $a4, $a3
+; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    andi $a2, $a2, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    ori $a4, $zero, 255
-; LA64-NEXT:    sll.w $a3, $a4, $a3
-; LA64-NEXT:    addi.w $a3, $a3, 0
-; LA64-NEXT:    addi.w $a2, $a2, 0
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    addi.w $a3, $a4, 0
 ; LA64-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a3
@@ -140,18 +140,18 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i16_acquire_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a4, 15
+; LA64-NEXT:    ori $a4, $a4, 4095
+; LA64-NEXT:    sll.w $a4, $a4, $a3
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    lu12i.w $a4, 15
-; LA64-NEXT:    ori $a4, $a4, 4095
-; LA64-NEXT:    sll.w $a3, $a4, $a3
-; LA64-NEXT:    addi.w $a3, $a3, 0
-; LA64-NEXT:    addi.w $a2, $a2, 0
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    addi.w $a3, $a4, 0
 ; LA64-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a3
@@ -213,16 +213,16 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind
 ; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a4, $zero, 255
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    andi $a2, $a2, 255
-; LA64-NEXT:    addi.w $a4, $a4, 0
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    andi $a2, $a2, 255
+; LA64-NEXT:    sll.w $a2, $a2, $a3
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    addi.w $a4, $a4, 0
 ; LA64-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a6, $a5, $a4
@@ -246,24 +246,24 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind
 define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a3, 15
-; LA64-NEXT:    ori $a3, $a3, 4095
-; LA64-NEXT:    slli.d $a4, $a0, 3
-; LA64-NEXT:    sll.w $a3, $a3, $a4
-; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
-; LA64-NEXT:    addi.w $a3, $a3, 0
-; LA64-NEXT:    sll.w $a2, $a2, $a4
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a4, 15
+; LA64-NEXT:    ori $a4, $a4, 4095
+; LA64-NEXT:    sll.w $a4, $a4, $a3
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a4
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
+; LA64-NEXT:    sll.w $a2, $a2, $a3
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    addi.w $a4, $a4, 0
 ; LA64-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a6, $a5, $a3
+; LA64-NEXT:    and $a6, $a5, $a4
 ; LA64-NEXT:    bne $a6, $a1, .LBB9_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB9_1 Depth=1
-; LA64-NEXT:    andn $a6, $a5, $a3
+; LA64-NEXT:    andn $a6, $a5, $a4
 ; LA64-NEXT:    or $a6, $a6, $a2
 ; LA64-NEXT:    sc.w $a6, $a0, 0
 ; LA64-NEXT:    beqz $a6, .LBB9_1
@@ -271,7 +271,7 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou
 ; LA64-NEXT:  .LBB9_3:
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB9_4:
-; LA64-NEXT:    srl.w $a0, $a5, $a4
+; LA64-NEXT:    srl.w $a0, $a5, $a3
 ; LA64-NEXT:    ret
   %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire
   %res = extractvalue { i16, i1 } %tmp, 0
@@ -324,16 +324,16 @@ define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nou
 define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti1:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a4, $zero, 255
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
+; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    andi $a2, $a2, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a2, $a2, 0
 ; LA64-NEXT:    addi.w $a3, $a4, 0
 ; LA64-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
@@ -361,24 +361,24 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind
 define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti1:
 ; LA64:       # %bb.0:
+; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a4, 15
+; LA64-NEXT:    ori $a4, $a4, 4095
+; LA64-NEXT:    sll.w $a4, $a4, $a3
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    lu12i.w $a3, 15
-; LA64-NEXT:    ori $a3, $a3, 4095
-; LA64-NEXT:    slli.d $a4, $a0, 3
-; LA64-NEXT:    sll.w $a3, $a3, $a4
-; LA64-NEXT:    sll.w $a1, $a1, $a4
+; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
-; LA64-NEXT:    sll.w $a2, $a2, $a4
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    sll.w $a2, $a2, $a3
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    addi.w $a4, $a3, 0
+; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    addi.w $a3, $a4, 0
 ; LA64-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a6, $a5, $a4
+; LA64-NEXT:    and $a6, $a5, $a3
 ; LA64-NEXT:    bne $a6, $a1, .LBB13_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB13_1 Depth=1
-; LA64-NEXT:    andn $a6, $a5, $a4
+; LA64-NEXT:    andn $a6, $a5, $a3
 ; LA64-NEXT:    or $a6, $a6, $a2
 ; LA64-NEXT:    sc.w $a6, $a0, 0
 ; LA64-NEXT:    beqz $a6, .LBB13_1
@@ -386,7 +386,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw
 ; LA64-NEXT:  .LBB13_3:
 ; LA64-NEXT:    dbar 20
 ; LA64-NEXT:  .LBB13_4:
-; LA64-NEXT:    and $a0, $a5, $a3
+; LA64-NEXT:    and $a0, $a5, $a4
 ; LA64-NEXT:    addi.w $a0, $a0, 0
 ; LA64-NEXT:    xor $a0, $a1, $a0
 ; LA64-NEXT:    sltui $a0, $a0, 1
@@ -444,17 +444,17 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw
 define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i8_monotonic_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    ori $a4, $zero, 255
+; LA64-NEXT:    sll.w $a4, $a4, $a3
+; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    andi $a2, $a2, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    ori $a4, $zero, 255
-; LA64-NEXT:    sll.w $a3, $a4, $a3
-; LA64-NEXT:    addi.w $a3, $a3, 0
-; LA64-NEXT:    addi.w $a2, $a2, 0
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    addi.w $a3, $a4, 0
 ; LA64-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a3
@@ -476,18 +476,18 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind
 define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i16_monotonic_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a4, 15
+; LA64-NEXT:    ori $a4, $a4, 4095
+; LA64-NEXT:    sll.w $a4, $a4, $a3
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    lu12i.w $a4, 15
-; LA64-NEXT:    ori $a4, $a4, 4095
-; LA64-NEXT:    sll.w $a3, $a4, $a3
-; LA64-NEXT:    addi.w $a3, $a3, 0
-; LA64-NEXT:    addi.w $a2, $a2, 0
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    addi.w $a3, $a4, 0
 ; LA64-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a3
@@ -549,16 +549,16 @@ define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) noun
 ; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti8:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a4, $zero, 255
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    andi $a2, $a2, 255
-; LA64-NEXT:    addi.w $a4, $a4, 0
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    andi $a2, $a2, 255
+; LA64-NEXT:    sll.w $a2, $a2, $a3
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    addi.w $a4, $a4, 0
 ; LA64-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a6, $a5, $a4
@@ -582,24 +582,24 @@ define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) noun
 define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a3, 15
-; LA64-NEXT:    ori $a3, $a3, 4095
-; LA64-NEXT:    slli.d $a4, $a0, 3
-; LA64-NEXT:    sll.w $a3, $a3, $a4
-; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
-; LA64-NEXT:    addi.w $a3, $a3, 0
-; LA64-NEXT:    sll.w $a2, $a2, $a4
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a4, 15
+; LA64-NEXT:    ori $a4, $a4, 4095
+; LA64-NEXT:    sll.w $a4, $a4, $a3
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a4
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
+; LA64-NEXT:    sll.w $a2, $a2, $a3
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    addi.w $a4, $a4, 0
 ; LA64-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a6, $a5, $a3
+; LA64-NEXT:    and $a6, $a5, $a4
 ; LA64-NEXT:    bne $a6, $a1, .LBB21_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB21_1 Depth=1
-; LA64-NEXT:    andn $a6, $a5, $a3
+; LA64-NEXT:    andn $a6, $a5, $a4
 ; LA64-NEXT:    or $a6, $a6, $a2
 ; LA64-NEXT:    sc.w $a6, $a0, 0
 ; LA64-NEXT:    beqz $a6, .LBB21_1
@@ -607,7 +607,7 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val)
 ; LA64-NEXT:  .LBB21_3:
 ; LA64-NEXT:    dbar 1792
 ; LA64-NEXT:  .LBB21_4:
-; LA64-NEXT:    srl.w $a0, $a5, $a4
+; LA64-NEXT:    srl.w $a0, $a5, $a3
 ; LA64-NEXT:    ret
   %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic
   %res = extractvalue { i16, i1 } %tmp, 0
@@ -660,16 +660,16 @@ define i64 @cmpxchg_i64_monotonic_monotonic_reti64(ptr %ptr, i64 %cmp, i64 %val)
 define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti1:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a4, $zero, 255
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
+; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    andi $a2, $a2, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a2, $a2, 0
 ; LA64-NEXT:    addi.w $a3, $a4, 0
 ; LA64-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
@@ -697,24 +697,24 @@ define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) noun
 define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti1:
 ; LA64:       # %bb.0:
+; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a4, 15
+; LA64-NEXT:    ori $a4, $a4, 4095
+; LA64-NEXT:    sll.w $a4, $a4, $a3
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    lu12i.w $a3, 15
-; LA64-NEXT:    ori $a3, $a3, 4095
-; LA64-NEXT:    slli.d $a4, $a0, 3
-; LA64-NEXT:    sll.w $a3, $a3, $a4
-; LA64-NEXT:    sll.w $a1, $a1, $a4
+; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
-; LA64-NEXT:    sll.w $a2, $a2, $a4
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    sll.w $a2, $a2, $a3
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    addi.w $a4, $a3, 0
+; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    addi.w $a3, $a4, 0
 ; LA64-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a6, $a5, $a4
+; LA64-NEXT:    and $a6, $a5, $a3
 ; LA64-NEXT:    bne $a6, $a1, .LBB25_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB25_1 Depth=1
-; LA64-NEXT:    andn $a6, $a5, $a4
+; LA64-NEXT:    andn $a6, $a5, $a3
 ; LA64-NEXT:    or $a6, $a6, $a2
 ; LA64-NEXT:    sc.w $a6, $a0, 0
 ; LA64-NEXT:    beqz $a6, .LBB25_1
@@ -722,7 +722,7 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n
 ; LA64-NEXT:  .LBB25_3:
 ; LA64-NEXT:    dbar 1792
 ; LA64-NEXT:  .LBB25_4:
-; LA64-NEXT:    and $a0, $a5, $a3
+; LA64-NEXT:    and $a0, $a5, $a4
 ; LA64-NEXT:    addi.w $a0, $a0, 0
 ; LA64-NEXT:    xor $a0, $a1, $a0
 ; LA64-NEXT:    sltui $a0, $a0, 1
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
index 4d8160d7080340..ba08790fb7cb04 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
@@ -313,20 +313,22 @@ define double @double_fadd_acquire(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s5, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 16
-; LA64F-NEXT:    addi.d $s3, $sp, 8
+; LA64F-NEXT:    addi.d $s2, $sp, 8
+; LA64F-NEXT:    addi.d $s3, $sp, 0
 ; LA64F-NEXT:    ori $s4, $zero, 2
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB4_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 16
+; LA64F-NEXT:    move $a0, $s5
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    st.d $s5, $sp, 8
+; LA64F-NEXT:    st.d $a0, $sp, 0
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -334,10 +336,11 @@ define double @double_fadd_acquire(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $s4
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 16
-; LA64F-NEXT:    beqz $a1, .LBB4_1
+; LA64F-NEXT:    ld.d $s5, $sp, 8
+; LA64F-NEXT:    beqz $a0, .LBB4_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
+; LA64F-NEXT:    move $a0, $s5
+; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
@@ -370,9 +373,9 @@ define double @double_fadd_acquire(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB4_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 8
+; LA64D-NEXT:    fst.d $fa1, $sp, 8
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -407,20 +410,22 @@ define double @double_fsub_acquire(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s5, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, -1025
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 16
-; LA64F-NEXT:    addi.d $s3, $sp, 8
+; LA64F-NEXT:    addi.d $s2, $sp, 8
+; LA64F-NEXT:    addi.d $s3, $sp, 0
 ; LA64F-NEXT:    ori $s4, $zero, 2
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB5_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 16
+; LA64F-NEXT:    move $a0, $s5
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    st.d $s5, $sp, 8
+; LA64F-NEXT:    st.d $a0, $sp, 0
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -428,10 +433,11 @@ define double @double_fsub_acquire(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $s4
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 16
-; LA64F-NEXT:    beqz $a1, .LBB5_1
+; LA64F-NEXT:    ld.d $s5, $sp, 8
+; LA64F-NEXT:    beqz $a0, .LBB5_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
+; LA64F-NEXT:    move $a0, $s5
+; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
@@ -464,9 +470,9 @@ define double @double_fsub_acquire(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB5_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 8
+; LA64D-NEXT:    fst.d $fa1, $sp, 8
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -501,20 +507,22 @@ define double @double_fmin_acquire(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s5, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 16
-; LA64F-NEXT:    addi.d $s3, $sp, 8
+; LA64F-NEXT:    addi.d $s2, $sp, 8
+; LA64F-NEXT:    addi.d $s3, $sp, 0
 ; LA64F-NEXT:    ori $s4, $zero, 2
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB6_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 16
+; LA64F-NEXT:    move $a0, $s5
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmin)
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    st.d $s5, $sp, 8
+; LA64F-NEXT:    st.d $a0, $sp, 0
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -522,10 +530,11 @@ define double @double_fmin_acquire(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $s4
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 16
-; LA64F-NEXT:    beqz $a1, .LBB6_1
+; LA64F-NEXT:    ld.d $s5, $sp, 8
+; LA64F-NEXT:    beqz $a0, .LBB6_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
+; LA64F-NEXT:    move $a0, $s5
+; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
@@ -558,10 +567,10 @@ define double @double_fmin_acquire(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB6_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
+; LA64D-NEXT:    fmin.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
-; LA64D-NEXT:    fmin.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 8
+; LA64D-NEXT:    fst.d $fa1, $sp, 8
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -596,20 +605,22 @@ define double @double_fmax_acquire(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s5, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 16
-; LA64F-NEXT:    addi.d $s3, $sp, 8
+; LA64F-NEXT:    addi.d $s2, $sp, 8
+; LA64F-NEXT:    addi.d $s3, $sp, 0
 ; LA64F-NEXT:    ori $s4, $zero, 2
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB7_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 16
+; LA64F-NEXT:    move $a0, $s5
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmax)
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    st.d $s5, $sp, 8
+; LA64F-NEXT:    st.d $a0, $sp, 0
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -617,10 +628,11 @@ define double @double_fmax_acquire(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $s4
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 16
-; LA64F-NEXT:    beqz $a1, .LBB7_1
+; LA64F-NEXT:    ld.d $s5, $sp, 8
+; LA64F-NEXT:    beqz $a0, .LBB7_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
+; LA64F-NEXT:    move $a0, $s5
+; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
@@ -653,10 +665,10 @@ define double @double_fmax_acquire(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB7_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
+; LA64D-NEXT:    fmax.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
-; LA64D-NEXT:    fmax.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 8
+; LA64D-NEXT:    fst.d $fa1, $sp, 8
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -991,20 +1003,22 @@ define double @double_fadd_release(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s5, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 16
-; LA64F-NEXT:    addi.d $s3, $sp, 8
+; LA64F-NEXT:    addi.d $s2, $sp, 8
+; LA64F-NEXT:    addi.d $s3, $sp, 0
 ; LA64F-NEXT:    ori $s4, $zero, 3
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB12_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 16
+; LA64F-NEXT:    move $a0, $s5
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    st.d $s5, $sp, 8
+; LA64F-NEXT:    st.d $a0, $sp, 0
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -1012,10 +1026,11 @@ define double @double_fadd_release(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 16
-; LA64F-NEXT:    beqz $a1, .LBB12_1
+; LA64F-NEXT:    ld.d $s5, $sp, 8
+; LA64F-NEXT:    beqz $a0, .LBB12_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
+; LA64F-NEXT:    move $a0, $s5
+; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
@@ -1048,9 +1063,9 @@ define double @double_fadd_release(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB12_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 8
+; LA64D-NEXT:    fst.d $fa1, $sp, 8
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -1085,20 +1100,22 @@ define double @double_fsub_release(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s5, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, -1025
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 16
-; LA64F-NEXT:    addi.d $s3, $sp, 8
+; LA64F-NEXT:    addi.d $s2, $sp, 8
+; LA64F-NEXT:    addi.d $s3, $sp, 0
 ; LA64F-NEXT:    ori $s4, $zero, 3
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB13_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 16
+; LA64F-NEXT:    move $a0, $s5
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    st.d $s5, $sp, 8
+; LA64F-NEXT:    st.d $a0, $sp, 0
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -1106,10 +1123,11 @@ define double @double_fsub_release(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 16
-; LA64F-NEXT:    beqz $a1, .LBB13_1
+; LA64F-NEXT:    ld.d $s5, $sp, 8
+; LA64F-NEXT:    beqz $a0, .LBB13_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
+; LA64F-NEXT:    move $a0, $s5
+; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
@@ -1142,9 +1160,9 @@ define double @double_fsub_release(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB13_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 8
+; LA64D-NEXT:    fst.d $fa1, $sp, 8
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -1179,20 +1197,22 @@ define double @double_fmin_release(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s5, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 16
-; LA64F-NEXT:    addi.d $s3, $sp, 8
+; LA64F-NEXT:    addi.d $s2, $sp, 8
+; LA64F-NEXT:    addi.d $s3, $sp, 0
 ; LA64F-NEXT:    ori $s4, $zero, 3
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB14_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 16
+; LA64F-NEXT:    move $a0, $s5
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmin)
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    st.d $s5, $sp, 8
+; LA64F-NEXT:    st.d $a0, $sp, 0
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -1200,10 +1220,11 @@ define double @double_fmin_release(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 16
-; LA64F-NEXT:    beqz $a1, .LBB14_1
+; LA64F-NEXT:    ld.d $s5, $sp, 8
+; LA64F-NEXT:    beqz $a0, .LBB14_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
+; LA64F-NEXT:    move $a0, $s5
+; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
@@ -1236,10 +1257,10 @@ define double @double_fmin_release(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB14_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
+; LA64D-NEXT:    fmin.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
-; LA64D-NEXT:    fmin.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 8
+; LA64D-NEXT:    fst.d $fa1, $sp, 8
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -1274,20 +1295,22 @@ define double @double_fmax_release(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s5, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 16
-; LA64F-NEXT:    addi.d $s3, $sp, 8
+; LA64F-NEXT:    addi.d $s2, $sp, 8
+; LA64F-NEXT:    addi.d $s3, $sp, 0
 ; LA64F-NEXT:    ori $s4, $zero, 3
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB15_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 16
+; LA64F-NEXT:    move $a0, $s5
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmax)
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    st.d $s5, $sp, 8
+; LA64F-NEXT:    st.d $a0, $sp, 0
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -1295,10 +1318,11 @@ define double @double_fmax_release(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 16
-; LA64F-NEXT:    beqz $a1, .LBB15_1
+; LA64F-NEXT:    ld.d $s5, $sp, 8
+; LA64F-NEXT:    beqz $a0, .LBB15_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
+; LA64F-NEXT:    move $a0, $s5
+; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
@@ -1331,10 +1355,10 @@ define double @double_fmax_release(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB15_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
+; LA64D-NEXT:    fmax.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
-; LA64D-NEXT:    fmax.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 8
+; LA64D-NEXT:    fst.d $fa1, $sp, 8
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -1661,30 +1685,32 @@ define float @float_fmax_acq_rel(ptr %p) nounwind {
 define double @double_fadd_acq_rel(ptr %p) nounwind {
 ; LA64F-LABEL: double_fadd_acq_rel:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    addi.d $sp, $sp, -80
-; LA64F-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $fp, $sp, 64 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s0, $sp, 56 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 48 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT:    addi.d $sp, $sp, -96
+; LA64F-NEXT:    st.d $ra, $sp, 88 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $fp, $sp, 80 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s0, $sp, 72 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s1, $sp, 64 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s2, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s3, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s4, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 32 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s6, $sp, 24 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s6, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 8
-; LA64F-NEXT:    addi.d $s3, $sp, 0
+; LA64F-NEXT:    addi.d $s2, $sp, 16
+; LA64F-NEXT:    addi.d $s3, $sp, 8
 ; LA64F-NEXT:    ori $s4, $zero, 4
 ; LA64F-NEXT:    ori $s5, $zero, 2
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB20_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    move $a0, $s6
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $s6, $sp, 16
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -1692,19 +1718,20 @@ define double @double_fadd_acq_rel(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $s5
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 8
-; LA64F-NEXT:    beqz $a1, .LBB20_1
+; LA64F-NEXT:    ld.d $s6, $sp, 16
+; LA64F-NEXT:    beqz $a0, .LBB20_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s1, $sp, 48 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s0, $sp, 56 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $fp, $sp, 64 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $ra, $sp, 72 # 8-byte Folded Reload
-; LA64F-NEXT:    addi.d $sp, $sp, 80
+; LA64F-NEXT:    move $a0, $s6
+; LA64F-NEXT:    ld.d $s6, $sp, 24 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s5, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s4, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s3, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s2, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s1, $sp, 64 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s0, $sp, 72 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $fp, $sp, 80 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 96
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: double_fadd_acq_rel:
@@ -1731,9 +1758,9 @@ define double @double_fadd_acq_rel(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB20_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 8
-; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 0
+; LA64D-NEXT:    fst.d $fa1, $sp, 0
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -1761,30 +1788,32 @@ define double @double_fadd_acq_rel(ptr %p) nounwind {
 define double @double_fsub_acq_rel(ptr %p) nounwind {
 ; LA64F-LABEL: double_fsub_acq_rel:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    addi.d $sp, $sp, -80
-; LA64F-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $fp, $sp, 64 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s0, $sp, 56 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 48 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT:    addi.d $sp, $sp, -96
+; LA64F-NEXT:    st.d $ra, $sp, 88 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $fp, $sp, 80 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s0, $sp, 72 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s1, $sp, 64 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s2, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s3, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s4, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 32 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s6, $sp, 24 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s6, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, -1025
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 8
-; LA64F-NEXT:    addi.d $s3, $sp, 0
+; LA64F-NEXT:    addi.d $s2, $sp, 16
+; LA64F-NEXT:    addi.d $s3, $sp, 8
 ; LA64F-NEXT:    ori $s4, $zero, 4
 ; LA64F-NEXT:    ori $s5, $zero, 2
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB21_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    move $a0, $s6
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $s6, $sp, 16
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -1792,19 +1821,20 @@ define double @double_fsub_acq_rel(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $s5
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 8
-; LA64F-NEXT:    beqz $a1, .LBB21_1
+; LA64F-NEXT:    ld.d $s6, $sp, 16
+; LA64F-NEXT:    beqz $a0, .LBB21_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s1, $sp, 48 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s0, $sp, 56 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $fp, $sp, 64 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $ra, $sp, 72 # 8-byte Folded Reload
-; LA64F-NEXT:    addi.d $sp, $sp, 80
+; LA64F-NEXT:    move $a0, $s6
+; LA64F-NEXT:    ld.d $s6, $sp, 24 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s5, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s4, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s3, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s2, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s1, $sp, 64 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s0, $sp, 72 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $fp, $sp, 80 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 96
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: double_fsub_acq_rel:
@@ -1831,9 +1861,9 @@ define double @double_fsub_acq_rel(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB21_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 8
-; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 0
+; LA64D-NEXT:    fst.d $fa1, $sp, 0
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -1861,30 +1891,32 @@ define double @double_fsub_acq_rel(ptr %p) nounwind {
 define double @double_fmin_acq_rel(ptr %p) nounwind {
 ; LA64F-LABEL: double_fmin_acq_rel:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    addi.d $sp, $sp, -80
-; LA64F-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $fp, $sp, 64 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s0, $sp, 56 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 48 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT:    addi.d $sp, $sp, -96
+; LA64F-NEXT:    st.d $ra, $sp, 88 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $fp, $sp, 80 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s0, $sp, 72 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s1, $sp, 64 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s2, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s3, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s4, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 32 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s6, $sp, 24 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s6, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 8
-; LA64F-NEXT:    addi.d $s3, $sp, 0
+; LA64F-NEXT:    addi.d $s2, $sp, 16
+; LA64F-NEXT:    addi.d $s3, $sp, 8
 ; LA64F-NEXT:    ori $s4, $zero, 4
 ; LA64F-NEXT:    ori $s5, $zero, 2
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB22_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    move $a0, $s6
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmin)
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $s6, $sp, 16
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -1892,19 +1924,20 @@ define double @double_fmin_acq_rel(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $s5
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 8
-; LA64F-NEXT:    beqz $a1, .LBB22_1
+; LA64F-NEXT:    ld.d $s6, $sp, 16
+; LA64F-NEXT:    beqz $a0, .LBB22_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s1, $sp, 48 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s0, $sp, 56 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $fp, $sp, 64 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $ra, $sp, 72 # 8-byte Folded Reload
-; LA64F-NEXT:    addi.d $sp, $sp, 80
+; LA64F-NEXT:    move $a0, $s6
+; LA64F-NEXT:    ld.d $s6, $sp, 24 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s5, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s4, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s3, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s2, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s1, $sp, 64 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s0, $sp, 72 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $fp, $sp, 80 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 96
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: double_fmin_acq_rel:
@@ -1931,10 +1964,10 @@ define double @double_fmin_acq_rel(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB22_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
+; LA64D-NEXT:    fmin.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 8
-; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
-; LA64D-NEXT:    fmin.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 0
+; LA64D-NEXT:    fst.d $fa1, $sp, 0
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -1962,30 +1995,32 @@ define double @double_fmin_acq_rel(ptr %p) nounwind {
 define double @double_fmax_acq_rel(ptr %p) nounwind {
 ; LA64F-LABEL: double_fmax_acq_rel:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    addi.d $sp, $sp, -80
-; LA64F-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $fp, $sp, 64 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s0, $sp, 56 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 48 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT:    addi.d $sp, $sp, -96
+; LA64F-NEXT:    st.d $ra, $sp, 88 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $fp, $sp, 80 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s0, $sp, 72 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s1, $sp, 64 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s2, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s3, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s4, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 32 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s6, $sp, 24 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s6, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 8
-; LA64F-NEXT:    addi.d $s3, $sp, 0
+; LA64F-NEXT:    addi.d $s2, $sp, 16
+; LA64F-NEXT:    addi.d $s3, $sp, 8
 ; LA64F-NEXT:    ori $s4, $zero, 4
 ; LA64F-NEXT:    ori $s5, $zero, 2
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB23_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    move $a0, $s6
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmax)
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $s6, $sp, 16
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -1993,19 +2028,20 @@ define double @double_fmax_acq_rel(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $s5
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 8
-; LA64F-NEXT:    beqz $a1, .LBB23_1
+; LA64F-NEXT:    ld.d $s6, $sp, 16
+; LA64F-NEXT:    beqz $a0, .LBB23_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s1, $sp, 48 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s0, $sp, 56 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $fp, $sp, 64 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $ra, $sp, 72 # 8-byte Folded Reload
-; LA64F-NEXT:    addi.d $sp, $sp, 80
+; LA64F-NEXT:    move $a0, $s6
+; LA64F-NEXT:    ld.d $s6, $sp, 24 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s5, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s4, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s3, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s2, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s1, $sp, 64 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s0, $sp, 72 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $fp, $sp, 80 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 96
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: double_fmax_acq_rel:
@@ -2032,10 +2068,10 @@ define double @double_fmax_acq_rel(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB23_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
+; LA64D-NEXT:    fmax.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 8
-; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
-; LA64D-NEXT:    fmax.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 0
+; LA64D-NEXT:    fst.d $fa1, $sp, 0
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -2371,20 +2407,22 @@ define double @double_fadd_seq_cst(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s5, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 16
-; LA64F-NEXT:    addi.d $s3, $sp, 8
+; LA64F-NEXT:    addi.d $s2, $sp, 8
+; LA64F-NEXT:    addi.d $s3, $sp, 0
 ; LA64F-NEXT:    ori $s4, $zero, 5
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB28_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 16
+; LA64F-NEXT:    move $a0, $s5
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    st.d $s5, $sp, 8
+; LA64F-NEXT:    st.d $a0, $sp, 0
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -2392,10 +2430,11 @@ define double @double_fadd_seq_cst(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $s4
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 16
-; LA64F-NEXT:    beqz $a1, .LBB28_1
+; LA64F-NEXT:    ld.d $s5, $sp, 8
+; LA64F-NEXT:    beqz $a0, .LBB28_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
+; LA64F-NEXT:    move $a0, $s5
+; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
@@ -2428,9 +2467,9 @@ define double @double_fadd_seq_cst(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB28_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 8
+; LA64D-NEXT:    fst.d $fa1, $sp, 8
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -2465,20 +2504,22 @@ define double @double_fsub_seq_cst(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s5, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, -1025
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 16
-; LA64F-NEXT:    addi.d $s3, $sp, 8
+; LA64F-NEXT:    addi.d $s2, $sp, 8
+; LA64F-NEXT:    addi.d $s3, $sp, 0
 ; LA64F-NEXT:    ori $s4, $zero, 5
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB29_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 16
+; LA64F-NEXT:    move $a0, $s5
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    st.d $s5, $sp, 8
+; LA64F-NEXT:    st.d $a0, $sp, 0
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -2486,10 +2527,11 @@ define double @double_fsub_seq_cst(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $s4
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 16
-; LA64F-NEXT:    beqz $a1, .LBB29_1
+; LA64F-NEXT:    ld.d $s5, $sp, 8
+; LA64F-NEXT:    beqz $a0, .LBB29_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
+; LA64F-NEXT:    move $a0, $s5
+; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
@@ -2522,9 +2564,9 @@ define double @double_fsub_seq_cst(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB29_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 8
+; LA64D-NEXT:    fst.d $fa1, $sp, 8
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -2559,20 +2601,22 @@ define double @double_fmin_seq_cst(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s5, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 16
-; LA64F-NEXT:    addi.d $s3, $sp, 8
+; LA64F-NEXT:    addi.d $s2, $sp, 8
+; LA64F-NEXT:    addi.d $s3, $sp, 0
 ; LA64F-NEXT:    ori $s4, $zero, 5
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB30_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 16
+; LA64F-NEXT:    move $a0, $s5
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmin)
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    st.d $s5, $sp, 8
+; LA64F-NEXT:    st.d $a0, $sp, 0
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -2580,10 +2624,11 @@ define double @double_fmin_seq_cst(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $s4
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 16
-; LA64F-NEXT:    beqz $a1, .LBB30_1
+; LA64F-NEXT:    ld.d $s5, $sp, 8
+; LA64F-NEXT:    beqz $a0, .LBB30_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
+; LA64F-NEXT:    move $a0, $s5
+; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
@@ -2616,10 +2661,10 @@ define double @double_fmin_seq_cst(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB30_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
+; LA64D-NEXT:    fmin.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
-; LA64D-NEXT:    fmin.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 8
+; LA64D-NEXT:    fst.d $fa1, $sp, 8
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -2654,20 +2699,22 @@ define double @double_fmax_seq_cst(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s5, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 16
-; LA64F-NEXT:    addi.d $s3, $sp, 8
+; LA64F-NEXT:    addi.d $s2, $sp, 8
+; LA64F-NEXT:    addi.d $s3, $sp, 0
 ; LA64F-NEXT:    ori $s4, $zero, 5
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB31_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 16
+; LA64F-NEXT:    move $a0, $s5
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmax)
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    st.d $s5, $sp, 8
+; LA64F-NEXT:    st.d $a0, $sp, 0
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -2675,10 +2722,11 @@ define double @double_fmax_seq_cst(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $s4
 ; LA64F-NEXT:    move $a5, $s4
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 16
-; LA64F-NEXT:    beqz $a1, .LBB31_1
+; LA64F-NEXT:    ld.d $s5, $sp, 8
+; LA64F-NEXT:    beqz $a0, .LBB31_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
+; LA64F-NEXT:    move $a0, $s5
+; LA64F-NEXT:    ld.d $s5, $sp, 16 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
@@ -2711,10 +2759,10 @@ define double @double_fmax_seq_cst(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB31_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
+; LA64D-NEXT:    fmax.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
-; LA64D-NEXT:    fmax.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 8
+; LA64D-NEXT:    fst.d $fa1, $sp, 8
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -3041,26 +3089,28 @@ define float @float_fmax_monotonic(ptr %p) nounwind {
 define double @double_fadd_monotonic(ptr %p) nounwind {
 ; LA64F-LABEL: double_fadd_monotonic:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    addi.d $sp, $sp, -64
-; LA64F-NEXT:    st.d $ra, $sp, 56 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $fp, $sp, 48 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s0, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s2, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s3, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT:    addi.d $sp, $sp, -80
+; LA64F-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $fp, $sp, 64 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s0, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s1, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s4, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 8
-; LA64F-NEXT:    addi.d $s3, $sp, 0
+; LA64F-NEXT:    addi.d $s2, $sp, 16
+; LA64F-NEXT:    addi.d $s3, $sp, 8
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB36_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    move $a0, $s4
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $s4, $sp, 16
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -3068,17 +3118,18 @@ define double @double_fadd_monotonic(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $zero
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 8
-; LA64F-NEXT:    beqz $a1, .LBB36_1
+; LA64F-NEXT:    ld.d $s4, $sp, 16
+; LA64F-NEXT:    beqz $a0, .LBB36_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    ld.d $s3, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s2, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s1, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s0, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $fp, $sp, 48 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $ra, $sp, 56 # 8-byte Folded Reload
-; LA64F-NEXT:    addi.d $sp, $sp, 64
+; LA64F-NEXT:    move $a0, $s4
+; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s1, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s0, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $fp, $sp, 64 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $ra, $sp, 72 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 80
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: double_fadd_monotonic:
@@ -3101,9 +3152,9 @@ define double @double_fadd_monotonic(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB36_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 8
-; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 0
+; LA64D-NEXT:    fst.d $fa1, $sp, 0
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -3129,26 +3180,28 @@ define double @double_fadd_monotonic(ptr %p) nounwind {
 define double @double_fsub_monotonic(ptr %p) nounwind {
 ; LA64F-LABEL: double_fsub_monotonic:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    addi.d $sp, $sp, -64
-; LA64F-NEXT:    st.d $ra, $sp, 56 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $fp, $sp, 48 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s0, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s2, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s3, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT:    addi.d $sp, $sp, -80
+; LA64F-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $fp, $sp, 64 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s0, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s1, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s4, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, -1025
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 8
-; LA64F-NEXT:    addi.d $s3, $sp, 0
+; LA64F-NEXT:    addi.d $s2, $sp, 16
+; LA64F-NEXT:    addi.d $s3, $sp, 8
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB37_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    move $a0, $s4
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $s4, $sp, 16
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -3156,17 +3209,18 @@ define double @double_fsub_monotonic(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $zero
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 8
-; LA64F-NEXT:    beqz $a1, .LBB37_1
+; LA64F-NEXT:    ld.d $s4, $sp, 16
+; LA64F-NEXT:    beqz $a0, .LBB37_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    ld.d $s3, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s2, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s1, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s0, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $fp, $sp, 48 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $ra, $sp, 56 # 8-byte Folded Reload
-; LA64F-NEXT:    addi.d $sp, $sp, 64
+; LA64F-NEXT:    move $a0, $s4
+; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s1, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s0, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $fp, $sp, 64 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $ra, $sp, 72 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 80
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: double_fsub_monotonic:
@@ -3189,9 +3243,9 @@ define double @double_fsub_monotonic(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB37_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 8
-; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 0
+; LA64D-NEXT:    fst.d $fa1, $sp, 0
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -3217,26 +3271,28 @@ define double @double_fsub_monotonic(ptr %p) nounwind {
 define double @double_fmin_monotonic(ptr %p) nounwind {
 ; LA64F-LABEL: double_fmin_monotonic:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    addi.d $sp, $sp, -64
-; LA64F-NEXT:    st.d $ra, $sp, 56 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $fp, $sp, 48 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s0, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s2, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s3, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT:    addi.d $sp, $sp, -80
+; LA64F-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $fp, $sp, 64 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s0, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s1, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s4, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 8
-; LA64F-NEXT:    addi.d $s3, $sp, 0
+; LA64F-NEXT:    addi.d $s2, $sp, 16
+; LA64F-NEXT:    addi.d $s3, $sp, 8
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB38_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    move $a0, $s4
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmin)
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $s4, $sp, 16
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -3244,17 +3300,18 @@ define double @double_fmin_monotonic(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $zero
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 8
-; LA64F-NEXT:    beqz $a1, .LBB38_1
+; LA64F-NEXT:    ld.d $s4, $sp, 16
+; LA64F-NEXT:    beqz $a0, .LBB38_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    ld.d $s3, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s2, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s1, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s0, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $fp, $sp, 48 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $ra, $sp, 56 # 8-byte Folded Reload
-; LA64F-NEXT:    addi.d $sp, $sp, 64
+; LA64F-NEXT:    move $a0, $s4
+; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s1, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s0, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $fp, $sp, 64 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $ra, $sp, 72 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 80
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: double_fmin_monotonic:
@@ -3277,10 +3334,10 @@ define double @double_fmin_monotonic(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB38_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
+; LA64D-NEXT:    fmin.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 8
-; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
-; LA64D-NEXT:    fmin.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 0
+; LA64D-NEXT:    fst.d $fa1, $sp, 0
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
@@ -3306,26 +3363,28 @@ define double @double_fmin_monotonic(ptr %p) nounwind {
 define double @double_fmax_monotonic(ptr %p) nounwind {
 ; LA64F-LABEL: double_fmax_monotonic:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    addi.d $sp, $sp, -64
-; LA64F-NEXT:    st.d $ra, $sp, 56 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $fp, $sp, 48 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s0, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s2, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s3, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT:    addi.d $sp, $sp, -80
+; LA64F-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $fp, $sp, 64 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s0, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s1, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s2, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s4, $sp, 24 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    ld.d $s4, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    ori $s1, $zero, 8
-; LA64F-NEXT:    addi.d $s2, $sp, 8
-; LA64F-NEXT:    addi.d $s3, $sp, 0
+; LA64F-NEXT:    addi.d $s2, $sp, 16
+; LA64F-NEXT:    addi.d $s3, $sp, 8
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB39_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    st.d $a0, $sp, 8
+; LA64F-NEXT:    move $a0, $s4
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmax)
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $s4, $sp, 16
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    move $a0, $s1
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a2, $s2
@@ -3333,17 +3392,18 @@ define double @double_fmax_monotonic(ptr %p) nounwind {
 ; LA64F-NEXT:    move $a4, $zero
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    move $a1, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 8
-; LA64F-NEXT:    beqz $a1, .LBB39_1
+; LA64F-NEXT:    ld.d $s4, $sp, 16
+; LA64F-NEXT:    beqz $a0, .LBB39_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    ld.d $s3, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s2, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s1, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s0, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $fp, $sp, 48 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $ra, $sp, 56 # 8-byte Folded Reload
-; LA64F-NEXT:    addi.d $sp, $sp, 64
+; LA64F-NEXT:    move $a0, $s4
+; LA64F-NEXT:    ld.d $s4, $sp, 24 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s3, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s2, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s1, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s0, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $fp, $sp, 64 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $ra, $sp, 72 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 80
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: double_fmax_monotonic:
@@ -3366,10 +3426,10 @@ define double @double_fmax_monotonic(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB39_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
+; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
+; LA64D-NEXT:    fmax.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 8
-; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
-; LA64D-NEXT:    fmax.d $fa0, $fa0, $fs0
-; LA64D-NEXT:    fst.d $fa0, $sp, 0
+; LA64D-NEXT:    fst.d $fa1, $sp, 0
 ; LA64D-NEXT:    move $a0, $s0
 ; LA64D-NEXT:    move $a1, $fp
 ; LA64D-NEXT:    move $a2, $s1
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
index 464c9ce97c5a6e..c36734e11f0189 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
@@ -8,13 +8,13 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -37,29 +37,29 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i16_acquire:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
-; LA64-NEXT:    and $a6, $a4, $a2
+; LA64-NEXT:    and $a6, $a4, $a3
 ; LA64-NEXT:    move $a5, $a4
 ; LA64-NEXT:    bgeu $a6, $a1, .LBB1_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB1_1 Depth=1
 ; LA64-NEXT:    xor $a5, $a4, $a1
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:  .LBB1_3: # in Loop: Header=BB1_1 Depth=1
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB1_1
 ; LA64-NEXT:  # %bb.4:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw umax ptr %a, i16 %b acquire
   ret i16 %1
@@ -89,13 +89,13 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -118,29 +118,29 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i16_acquire:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
-; LA64-NEXT:    and $a6, $a4, $a2
+; LA64-NEXT:    and $a6, $a4, $a3
 ; LA64-NEXT:    move $a5, $a4
 ; LA64-NEXT:    bgeu $a1, $a6, .LBB5_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB5_1 Depth=1
 ; LA64-NEXT:    xor $a5, $a4, $a1
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:  .LBB5_3: # in Loop: Header=BB5_1 Depth=1
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB5_1
 ; LA64-NEXT:  # %bb.4:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw umin ptr %a, i16 %b acquire
   ret i16 %1
@@ -170,25 +170,25 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    ori $a3, $zero, 255
-; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 255
+; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a4, $a2, 24
-; LA64-NEXT:    xori $a4, $a4, 56
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    xori $a3, $a3, 56
 ; LA64-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a3
+; LA64-NEXT:    and $a7, $a5, $a4
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a4
-; LA64-NEXT:    sra.w $a7, $a7, $a4
+; LA64-NEXT:    sll.w $a7, $a7, $a3
+; LA64-NEXT:    sra.w $a7, $a7, $a3
 ; LA64-NEXT:    bge $a7, $a1, .LBB8_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB8_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a3
+; LA64-NEXT:    and $a6, $a6, $a4
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB8_3: # in Loop: Header=BB8_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -204,17 +204,17 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i16_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
-; LA64-NEXT:    addi.w $a4, $a4, 0
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    ori $a5, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a5, $a3
 ; LA64-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -260,25 +260,25 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    ori $a3, $zero, 255
-; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 255
+; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a4, $a2, 24
-; LA64-NEXT:    xori $a4, $a4, 56
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    xori $a3, $a3, 56
 ; LA64-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a3
+; LA64-NEXT:    and $a7, $a5, $a4
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a4
-; LA64-NEXT:    sra.w $a7, $a7, $a4
+; LA64-NEXT:    sll.w $a7, $a7, $a3
+; LA64-NEXT:    sra.w $a7, $a7, $a3
 ; LA64-NEXT:    bge $a1, $a7, .LBB12_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a3
+; LA64-NEXT:    and $a6, $a6, $a4
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB12_3: # in Loop: Header=BB12_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -294,17 +294,17 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i16_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
-; LA64-NEXT:    addi.w $a4, $a4, 0
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    ori $a5, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a5, $a3
 ; LA64-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -350,13 +350,13 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -379,29 +379,29 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i16_release:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
-; LA64-NEXT:    and $a6, $a4, $a2
+; LA64-NEXT:    and $a6, $a4, $a3
 ; LA64-NEXT:    move $a5, $a4
 ; LA64-NEXT:    bgeu $a6, $a1, .LBB17_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB17_1 Depth=1
 ; LA64-NEXT:    xor $a5, $a4, $a1
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:  .LBB17_3: # in Loop: Header=BB17_1 Depth=1
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB17_1
 ; LA64-NEXT:  # %bb.4:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw umax ptr %a, i16 %b release
   ret i16 %1
@@ -431,13 +431,13 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -460,29 +460,29 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i16_release:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
-; LA64-NEXT:    and $a6, $a4, $a2
+; LA64-NEXT:    and $a6, $a4, $a3
 ; LA64-NEXT:    move $a5, $a4
 ; LA64-NEXT:    bgeu $a1, $a6, .LBB21_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB21_1 Depth=1
 ; LA64-NEXT:    xor $a5, $a4, $a1
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:  .LBB21_3: # in Loop: Header=BB21_1 Depth=1
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB21_1
 ; LA64-NEXT:  # %bb.4:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw umin ptr %a, i16 %b release
   ret i16 %1
@@ -512,25 +512,25 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    ori $a3, $zero, 255
-; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 255
+; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a4, $a2, 24
-; LA64-NEXT:    xori $a4, $a4, 56
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    xori $a3, $a3, 56
 ; LA64-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a3
+; LA64-NEXT:    and $a7, $a5, $a4
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a4
-; LA64-NEXT:    sra.w $a7, $a7, $a4
+; LA64-NEXT:    sll.w $a7, $a7, $a3
+; LA64-NEXT:    sra.w $a7, $a7, $a3
 ; LA64-NEXT:    bge $a7, $a1, .LBB24_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB24_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a3
+; LA64-NEXT:    and $a6, $a6, $a4
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB24_3: # in Loop: Header=BB24_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -546,17 +546,17 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i16_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
-; LA64-NEXT:    addi.w $a4, $a4, 0
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    ori $a5, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a5, $a3
 ; LA64-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -602,25 +602,25 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    ori $a3, $zero, 255
-; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 255
+; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a4, $a2, 24
-; LA64-NEXT:    xori $a4, $a4, 56
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    xori $a3, $a3, 56
 ; LA64-NEXT:  .LBB28_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a3
+; LA64-NEXT:    and $a7, $a5, $a4
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a4
-; LA64-NEXT:    sra.w $a7, $a7, $a4
+; LA64-NEXT:    sll.w $a7, $a7, $a3
+; LA64-NEXT:    sra.w $a7, $a7, $a3
 ; LA64-NEXT:    bge $a1, $a7, .LBB28_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB28_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a3
+; LA64-NEXT:    and $a6, $a6, $a4
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB28_3: # in Loop: Header=BB28_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -636,17 +636,17 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i16_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
-; LA64-NEXT:    addi.w $a4, $a4, 0
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    ori $a5, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a5, $a3
 ; LA64-NEXT:  .LBB29_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -692,13 +692,13 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -721,29 +721,29 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i16_acq_rel:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
-; LA64-NEXT:    and $a6, $a4, $a2
+; LA64-NEXT:    and $a6, $a4, $a3
 ; LA64-NEXT:    move $a5, $a4
 ; LA64-NEXT:    bgeu $a6, $a1, .LBB33_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB33_1 Depth=1
 ; LA64-NEXT:    xor $a5, $a4, $a1
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:  .LBB33_3: # in Loop: Header=BB33_1 Depth=1
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB33_1
 ; LA64-NEXT:  # %bb.4:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw umax ptr %a, i16 %b acq_rel
   ret i16 %1
@@ -773,13 +773,13 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -802,29 +802,29 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i16_acq_rel:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB37_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
-; LA64-NEXT:    and $a6, $a4, $a2
+; LA64-NEXT:    and $a6, $a4, $a3
 ; LA64-NEXT:    move $a5, $a4
 ; LA64-NEXT:    bgeu $a1, $a6, .LBB37_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB37_1 Depth=1
 ; LA64-NEXT:    xor $a5, $a4, $a1
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:  .LBB37_3: # in Loop: Header=BB37_1 Depth=1
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB37_1
 ; LA64-NEXT:  # %bb.4:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw umin ptr %a, i16 %b acq_rel
   ret i16 %1
@@ -854,25 +854,25 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    ori $a3, $zero, 255
-; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 255
+; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a4, $a2, 24
-; LA64-NEXT:    xori $a4, $a4, 56
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    xori $a3, $a3, 56
 ; LA64-NEXT:  .LBB40_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a3
+; LA64-NEXT:    and $a7, $a5, $a4
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a4
-; LA64-NEXT:    sra.w $a7, $a7, $a4
+; LA64-NEXT:    sll.w $a7, $a7, $a3
+; LA64-NEXT:    sra.w $a7, $a7, $a3
 ; LA64-NEXT:    bge $a7, $a1, .LBB40_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB40_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a3
+; LA64-NEXT:    and $a6, $a6, $a4
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB40_3: # in Loop: Header=BB40_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -888,17 +888,17 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i16_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
-; LA64-NEXT:    addi.w $a4, $a4, 0
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    ori $a5, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a5, $a3
 ; LA64-NEXT:  .LBB41_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -944,25 +944,25 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    ori $a3, $zero, 255
-; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 255
+; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a4, $a2, 24
-; LA64-NEXT:    xori $a4, $a4, 56
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    xori $a3, $a3, 56
 ; LA64-NEXT:  .LBB44_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a3
+; LA64-NEXT:    and $a7, $a5, $a4
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a4
-; LA64-NEXT:    sra.w $a7, $a7, $a4
+; LA64-NEXT:    sll.w $a7, $a7, $a3
+; LA64-NEXT:    sra.w $a7, $a7, $a3
 ; LA64-NEXT:    bge $a1, $a7, .LBB44_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB44_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a3
+; LA64-NEXT:    and $a6, $a6, $a4
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB44_3: # in Loop: Header=BB44_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -978,17 +978,17 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i16_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
-; LA64-NEXT:    addi.w $a4, $a4, 0
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    ori $a5, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a5, $a3
 ; LA64-NEXT:  .LBB45_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -1034,13 +1034,13 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -1063,29 +1063,29 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i16_seq_cst:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
-; LA64-NEXT:    and $a6, $a4, $a2
+; LA64-NEXT:    and $a6, $a4, $a3
 ; LA64-NEXT:    move $a5, $a4
 ; LA64-NEXT:    bgeu $a6, $a1, .LBB49_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB49_1 Depth=1
 ; LA64-NEXT:    xor $a5, $a4, $a1
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:  .LBB49_3: # in Loop: Header=BB49_1 Depth=1
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB49_1
 ; LA64-NEXT:  # %bb.4:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw umax ptr %a, i16 %b seq_cst
   ret i16 %1
@@ -1115,13 +1115,13 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -1144,29 +1144,29 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i16_seq_cst:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
-; LA64-NEXT:    and $a6, $a4, $a2
+; LA64-NEXT:    and $a6, $a4, $a3
 ; LA64-NEXT:    move $a5, $a4
 ; LA64-NEXT:    bgeu $a1, $a6, .LBB53_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
 ; LA64-NEXT:    xor $a5, $a4, $a1
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB53_1
 ; LA64-NEXT:  # %bb.4:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw umin ptr %a, i16 %b seq_cst
   ret i16 %1
@@ -1196,25 +1196,25 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    ori $a3, $zero, 255
-; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 255
+; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a4, $a2, 24
-; LA64-NEXT:    xori $a4, $a4, 56
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    xori $a3, $a3, 56
 ; LA64-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a3
+; LA64-NEXT:    and $a7, $a5, $a4
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a4
-; LA64-NEXT:    sra.w $a7, $a7, $a4
+; LA64-NEXT:    sll.w $a7, $a7, $a3
+; LA64-NEXT:    sra.w $a7, $a7, $a3
 ; LA64-NEXT:    bge $a7, $a1, .LBB56_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a3
+; LA64-NEXT:    and $a6, $a6, $a4
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -1230,17 +1230,17 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i16_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
-; LA64-NEXT:    addi.w $a4, $a4, 0
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    ori $a5, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a5, $a3
 ; LA64-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -1286,25 +1286,25 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    ori $a3, $zero, 255
-; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 255
+; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a4, $a2, 24
-; LA64-NEXT:    xori $a4, $a4, 56
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    xori $a3, $a3, 56
 ; LA64-NEXT:  .LBB60_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a3
+; LA64-NEXT:    and $a7, $a5, $a4
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a4
-; LA64-NEXT:    sra.w $a7, $a7, $a4
+; LA64-NEXT:    sll.w $a7, $a7, $a3
+; LA64-NEXT:    sra.w $a7, $a7, $a3
 ; LA64-NEXT:    bge $a1, $a7, .LBB60_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB60_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a3
+; LA64-NEXT:    and $a6, $a6, $a4
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB60_3: # in Loop: Header=BB60_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -1320,17 +1320,17 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i16_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
-; LA64-NEXT:    addi.w $a4, $a4, 0
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    ori $a5, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a5, $a3
 ; LA64-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -1376,13 +1376,13 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB64_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -1405,29 +1405,29 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i16_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB65_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
-; LA64-NEXT:    and $a6, $a4, $a2
+; LA64-NEXT:    and $a6, $a4, $a3
 ; LA64-NEXT:    move $a5, $a4
 ; LA64-NEXT:    bgeu $a6, $a1, .LBB65_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB65_1 Depth=1
 ; LA64-NEXT:    xor $a5, $a4, $a1
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:  .LBB65_3: # in Loop: Header=BB65_1 Depth=1
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB65_1
 ; LA64-NEXT:  # %bb.4:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw umax ptr %a, i16 %b monotonic
   ret i16 %1
@@ -1457,13 +1457,13 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -1486,29 +1486,29 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i16_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB69_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
-; LA64-NEXT:    and $a6, $a4, $a2
+; LA64-NEXT:    and $a6, $a4, $a3
 ; LA64-NEXT:    move $a5, $a4
 ; LA64-NEXT:    bgeu $a1, $a6, .LBB69_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB69_1 Depth=1
 ; LA64-NEXT:    xor $a5, $a4, $a1
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:  .LBB69_3: # in Loop: Header=BB69_1 Depth=1
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB69_1
 ; LA64-NEXT:  # %bb.4:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw umin ptr %a, i16 %b monotonic
   ret i16 %1
@@ -1538,25 +1538,25 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    ori $a3, $zero, 255
-; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 255
+; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a4, $a2, 24
-; LA64-NEXT:    xori $a4, $a4, 56
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    xori $a3, $a3, 56
 ; LA64-NEXT:  .LBB72_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a3
+; LA64-NEXT:    and $a7, $a5, $a4
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a4
-; LA64-NEXT:    sra.w $a7, $a7, $a4
+; LA64-NEXT:    sll.w $a7, $a7, $a3
+; LA64-NEXT:    sra.w $a7, $a7, $a3
 ; LA64-NEXT:    bge $a7, $a1, .LBB72_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB72_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a3
+; LA64-NEXT:    and $a6, $a6, $a4
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB72_3: # in Loop: Header=BB72_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -1572,17 +1572,17 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i16_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
-; LA64-NEXT:    addi.w $a4, $a4, 0
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    ori $a5, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a5, $a3
 ; LA64-NEXT:  .LBB73_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -1628,25 +1628,25 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    ori $a3, $zero, 255
-; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 255
+; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a4, $a2, 24
-; LA64-NEXT:    xori $a4, $a4, 56
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    xori $a3, $a3, 56
 ; LA64-NEXT:  .LBB76_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a3
+; LA64-NEXT:    and $a7, $a5, $a4
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a4
-; LA64-NEXT:    sra.w $a7, $a7, $a4
+; LA64-NEXT:    sll.w $a7, $a7, $a3
+; LA64-NEXT:    sra.w $a7, $a7, $a3
 ; LA64-NEXT:    bge $a1, $a7, .LBB76_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB76_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a3
+; LA64-NEXT:    and $a6, $a6, $a4
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB76_3: # in Loop: Header=BB76_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -1662,17 +1662,17 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i16_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
-; LA64-NEXT:    addi.w $a4, $a4, 0
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a4, $a4, 0
+; LA64-NEXT:    ori $a5, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a5, $a3
 ; LA64-NEXT:  .LBB77_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
index d4f7ed017121da..4669065114f0cc 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
@@ -6,11 +6,11 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
@@ -26,13 +26,13 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
@@ -52,10 +52,10 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:    nor $a2, $a2, $zero
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a2
@@ -68,10 +68,10 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_0_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
 ; LA64-NEXT:    nor $a2, $a2, $zero
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -83,9 +83,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
@@ -98,9 +98,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -111,46 +111,46 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
 define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i16_acquire:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB3_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_xchg_i16_acquire:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB3_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 %b acquire
   ret i16 %1
@@ -159,31 +159,31 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
 define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i16_acquire:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a1, 15
-; LA32-NEXT:    ori $a1, $a1, 4095
-; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    nor $a1, $a1, $zero
+; LA32-NEXT:    slli.w $a1, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a2, 15
+; LA32-NEXT:    ori $a2, $a2, 4095
+; LA32-NEXT:    sll.w $a2, $a2, $a1
+; LA32-NEXT:    nor $a2, $a2, $zero
 ; LA32-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
-; LA32-NEXT:    and $a4, $a3, $a1
+; LA32-NEXT:    and $a4, $a3, $a2
 ; LA32-NEXT:    sc.w $a4, $a0, 0
 ; LA32-NEXT:    beqz $a4, .LBB4_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a3, $a2
+; LA32-NEXT:    srl.w $a0, $a3, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_xchg_0_i16_acquire:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a1, 15
-; LA64-NEXT:    ori $a1, $a1, 4095
-; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    nor $a1, $a1, $zero
+; LA64-NEXT:    slli.d $a1, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    amand_db.w $a3, $a1, $a0
-; LA64-NEXT:    srl.w $a0, $a3, $a2
+; LA64-NEXT:    lu12i.w $a2, 15
+; LA64-NEXT:    ori $a2, $a2, 4095
+; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    nor $a2, $a2, $zero
+; LA64-NEXT:    amand_db.w $a3, $a2, $a0
+; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 0 acquire
   ret i16 %1
@@ -192,29 +192,29 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
 define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a1, 15
-; LA32-NEXT:    ori $a1, $a1, 4095
-; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    slli.w $a1, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a2, 15
+; LA32-NEXT:    ori $a2, $a2, 4095
+; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
-; LA32-NEXT:    or $a4, $a3, $a1
+; LA32-NEXT:    or $a4, $a3, $a2
 ; LA32-NEXT:    sc.w $a4, $a0, 0
 ; LA32-NEXT:    beqz $a4, .LBB5_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a3, $a2
+; LA32-NEXT:    srl.w $a0, $a3, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a1, 15
-; LA64-NEXT:    ori $a1, $a1, 4095
-; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    slli.d $a1, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    amor_db.w $a3, $a1, $a0
-; LA64-NEXT:    srl.w $a0, $a3, $a2
+; LA64-NEXT:    lu12i.w $a2, 15
+; LA64-NEXT:    ori $a2, $a2, 4095
+; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    amor_db.w $a3, $a2, $a0
+; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 -1 acquire
   ret i16 %1
@@ -265,11 +265,11 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
@@ -285,13 +285,13 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_add_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
@@ -310,46 +310,46 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i16_acquire:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB9_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_add_i16_acquire:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB9_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw add ptr %a, i16 %b acquire
   ret i16 %1
@@ -400,11 +400,11 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
@@ -420,13 +420,13 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_sub_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
@@ -445,46 +445,46 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i16_acquire:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB13_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_sub_i16_acquire:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB13_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i16 %b acquire
   ret i16 %1
@@ -537,11 +537,11 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
@@ -558,13 +558,13 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_nand_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
@@ -584,48 +584,48 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i16_acquire:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
 ; LA32-NEXT:    nor $a5, $a5, $zero
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB17_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_nand_i16_acquire:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
 ; LA64-NEXT:    nor $a5, $a5, $zero
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB17_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i16 %b acquire
   ret i16 %1
@@ -689,12 +689,12 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    orn $a1, $a1, $a3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a1
@@ -707,12 +707,12 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_and_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    orn $a1, $a1, $a3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -723,35 +723,35 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i16_acquire:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
-; LA32-NEXT:    orn $a1, $a1, $a2
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    orn $a1, $a1, $a3
 ; LA32-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT:    ll.w $a2, $a0, 0
-; LA32-NEXT:    and $a4, $a2, $a1
+; LA32-NEXT:    ll.w $a3, $a0, 0
+; LA32-NEXT:    and $a4, $a3, $a1
 ; LA32-NEXT:    sc.w $a4, $a0, 0
 ; LA32-NEXT:    beqz $a4, .LBB21_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a2, $a3
+; LA32-NEXT:    srl.w $a0, $a3, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_and_i16_acquire:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
-; LA64-NEXT:    orn $a1, $a1, $a2
+; LA64-NEXT:    slli.d $a2, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    amand_db.w $a2, $a1, $a0
-; LA64-NEXT:    srl.w $a0, $a2, $a3
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    orn $a1, $a1, $a3
+; LA64-NEXT:    amand_db.w $a3, $a1, $a0
+; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw and ptr %a, i16 %b acquire
   ret i16 %1
@@ -801,10 +801,10 @@ define i64 @atomicrmw_and_i64_acquire(ptr %a, i64 %b) nounwind {
 define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i8_acquire:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    andi $a1, $a1, 255
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -816,10 +816,10 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_or_i8_acquire:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -830,10 +830,10 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i16_acquire:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -845,10 +845,10 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_or_i16_acquire:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -900,10 +900,10 @@ define i64 @atomicrmw_or_i64_acquire(ptr %a, i64 %b) nounwind {
 define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i8_acquire:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    andi $a1, $a1, 255
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB28_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -915,10 +915,10 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_xor_i8_acquire:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -929,10 +929,10 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i16_acquire:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB29_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -944,10 +944,10 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_xor_i16_acquire:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -1000,11 +1000,11 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
@@ -1020,13 +1020,13 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
@@ -1046,10 +1046,10 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:    nor $a2, $a2, $zero
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a2
@@ -1062,10 +1062,10 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_0_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
 ; LA64-NEXT:    nor $a2, $a2, $zero
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -1077,9 +1077,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB34_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
@@ -1092,9 +1092,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -1105,46 +1105,46 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
 define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i16_release:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB35_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_xchg_i16_release:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB35_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 %b release
   ret i16 %1
@@ -1153,31 +1153,31 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
 define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i16_release:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a1, 15
-; LA32-NEXT:    ori $a1, $a1, 4095
-; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    nor $a1, $a1, $zero
+; LA32-NEXT:    slli.w $a1, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a2, 15
+; LA32-NEXT:    ori $a2, $a2, 4095
+; LA32-NEXT:    sll.w $a2, $a2, $a1
+; LA32-NEXT:    nor $a2, $a2, $zero
 ; LA32-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
-; LA32-NEXT:    and $a4, $a3, $a1
+; LA32-NEXT:    and $a4, $a3, $a2
 ; LA32-NEXT:    sc.w $a4, $a0, 0
 ; LA32-NEXT:    beqz $a4, .LBB36_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a3, $a2
+; LA32-NEXT:    srl.w $a0, $a3, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_xchg_0_i16_release:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a1, 15
-; LA64-NEXT:    ori $a1, $a1, 4095
-; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    nor $a1, $a1, $zero
+; LA64-NEXT:    slli.d $a1, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    amand_db.w $a3, $a1, $a0
-; LA64-NEXT:    srl.w $a0, $a3, $a2
+; LA64-NEXT:    lu12i.w $a2, 15
+; LA64-NEXT:    ori $a2, $a2, 4095
+; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    nor $a2, $a2, $zero
+; LA64-NEXT:    amand_db.w $a3, $a2, $a0
+; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 0 release
   ret i16 %1
@@ -1186,29 +1186,29 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
 define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i16_release:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a1, 15
-; LA32-NEXT:    ori $a1, $a1, 4095
-; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    slli.w $a1, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a2, 15
+; LA32-NEXT:    ori $a2, $a2, 4095
+; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:  .LBB37_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
-; LA32-NEXT:    or $a4, $a3, $a1
+; LA32-NEXT:    or $a4, $a3, $a2
 ; LA32-NEXT:    sc.w $a4, $a0, 0
 ; LA32-NEXT:    beqz $a4, .LBB37_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a3, $a2
+; LA32-NEXT:    srl.w $a0, $a3, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i16_release:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a1, 15
-; LA64-NEXT:    ori $a1, $a1, 4095
-; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    slli.d $a1, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    amor_db.w $a3, $a1, $a0
-; LA64-NEXT:    srl.w $a0, $a3, $a2
+; LA64-NEXT:    lu12i.w $a2, 15
+; LA64-NEXT:    ori $a2, $a2, 4095
+; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    amor_db.w $a3, $a2, $a0
+; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 -1 release
   ret i16 %1
@@ -1259,11 +1259,11 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB40_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
@@ -1279,13 +1279,13 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_add_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB40_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
@@ -1304,46 +1304,46 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i16_release:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB41_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB41_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_add_i16_release:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB41_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB41_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw add ptr %a, i16 %b release
   ret i16 %1
@@ -1394,11 +1394,11 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB44_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
@@ -1414,13 +1414,13 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_sub_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB44_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
@@ -1439,46 +1439,46 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i16_release:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB45_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB45_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_sub_i16_release:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB45_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB45_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i16 %b release
   ret i16 %1
@@ -1531,11 +1531,11 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
@@ -1552,13 +1552,13 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_nand_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
@@ -1578,48 +1578,48 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i16_release:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
 ; LA32-NEXT:    nor $a5, $a5, $zero
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB49_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_nand_i16_release:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
 ; LA64-NEXT:    nor $a5, $a5, $zero
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB49_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i16 %b release
   ret i16 %1
@@ -1683,12 +1683,12 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    orn $a1, $a1, $a3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a1
@@ -1701,12 +1701,12 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_and_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    orn $a1, $a1, $a3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -1717,35 +1717,35 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i16_release:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
-; LA32-NEXT:    orn $a1, $a1, $a2
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    orn $a1, $a1, $a3
 ; LA32-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT:    ll.w $a2, $a0, 0
-; LA32-NEXT:    and $a4, $a2, $a1
+; LA32-NEXT:    ll.w $a3, $a0, 0
+; LA32-NEXT:    and $a4, $a3, $a1
 ; LA32-NEXT:    sc.w $a4, $a0, 0
 ; LA32-NEXT:    beqz $a4, .LBB53_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a2, $a3
+; LA32-NEXT:    srl.w $a0, $a3, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_and_i16_release:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
-; LA64-NEXT:    orn $a1, $a1, $a2
+; LA64-NEXT:    slli.d $a2, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    amand_db.w $a2, $a1, $a0
-; LA64-NEXT:    srl.w $a0, $a2, $a3
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    orn $a1, $a1, $a3
+; LA64-NEXT:    amand_db.w $a3, $a1, $a0
+; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw and ptr %a, i16 %b release
   ret i16 %1
@@ -1795,10 +1795,10 @@ define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind {
 define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i8_release:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    andi $a1, $a1, 255
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -1810,10 +1810,10 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_or_i8_release:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -1824,10 +1824,10 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i16_release:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -1839,10 +1839,10 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_or_i16_release:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -1894,10 +1894,10 @@ define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind {
 define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i8_release:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    andi $a1, $a1, 255
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB60_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -1909,10 +1909,10 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_xor_i8_release:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -1923,10 +1923,10 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i16_release:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -1938,10 +1938,10 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_xor_i16_release:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -1994,11 +1994,11 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB64_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
@@ -2014,13 +2014,13 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB64_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
@@ -2040,10 +2040,10 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:    nor $a2, $a2, $zero
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB65_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a2
@@ -2056,10 +2056,10 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_0_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
 ; LA64-NEXT:    nor $a2, $a2, $zero
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -2071,9 +2071,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
@@ -2086,9 +2086,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -2099,46 +2099,46 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
 define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i16_acq_rel:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB67_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_xchg_i16_acq_rel:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB67_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 %b acq_rel
   ret i16 %1
@@ -2147,31 +2147,31 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
 define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i16_acq_rel:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a1, 15
-; LA32-NEXT:    ori $a1, $a1, 4095
-; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    nor $a1, $a1, $zero
+; LA32-NEXT:    slli.w $a1, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a2, 15
+; LA32-NEXT:    ori $a2, $a2, 4095
+; LA32-NEXT:    sll.w $a2, $a2, $a1
+; LA32-NEXT:    nor $a2, $a2, $zero
 ; LA32-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
-; LA32-NEXT:    and $a4, $a3, $a1
+; LA32-NEXT:    and $a4, $a3, $a2
 ; LA32-NEXT:    sc.w $a4, $a0, 0
 ; LA32-NEXT:    beqz $a4, .LBB68_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a3, $a2
+; LA32-NEXT:    srl.w $a0, $a3, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_xchg_0_i16_acq_rel:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a1, 15
-; LA64-NEXT:    ori $a1, $a1, 4095
-; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    nor $a1, $a1, $zero
+; LA64-NEXT:    slli.d $a1, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    amand_db.w $a3, $a1, $a0
-; LA64-NEXT:    srl.w $a0, $a3, $a2
+; LA64-NEXT:    lu12i.w $a2, 15
+; LA64-NEXT:    ori $a2, $a2, 4095
+; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    nor $a2, $a2, $zero
+; LA64-NEXT:    amand_db.w $a3, $a2, $a0
+; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 0 acq_rel
   ret i16 %1
@@ -2180,29 +2180,29 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
 define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a1, 15
-; LA32-NEXT:    ori $a1, $a1, 4095
-; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    slli.w $a1, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a2, 15
+; LA32-NEXT:    ori $a2, $a2, 4095
+; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:  .LBB69_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
-; LA32-NEXT:    or $a4, $a3, $a1
+; LA32-NEXT:    or $a4, $a3, $a2
 ; LA32-NEXT:    sc.w $a4, $a0, 0
 ; LA32-NEXT:    beqz $a4, .LBB69_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a3, $a2
+; LA32-NEXT:    srl.w $a0, $a3, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a1, 15
-; LA64-NEXT:    ori $a1, $a1, 4095
-; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    slli.d $a1, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    amor_db.w $a3, $a1, $a0
-; LA64-NEXT:    srl.w $a0, $a3, $a2
+; LA64-NEXT:    lu12i.w $a2, 15
+; LA64-NEXT:    ori $a2, $a2, 4095
+; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    amor_db.w $a3, $a2, $a0
+; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 -1 acq_rel
   ret i16 %1
@@ -2253,11 +2253,11 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB72_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
@@ -2273,13 +2273,13 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_add_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB72_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
@@ -2298,46 +2298,46 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i16_acq_rel:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB73_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB73_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_add_i16_acq_rel:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB73_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB73_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw add ptr %a, i16 %b acq_rel
   ret i16 %1
@@ -2388,11 +2388,11 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB76_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
@@ -2408,13 +2408,13 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_sub_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB76_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
@@ -2433,46 +2433,46 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i16_acq_rel:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB77_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB77_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_sub_i16_acq_rel:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB77_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB77_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i16 %b acq_rel
   ret i16 %1
@@ -2525,11 +2525,11 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB80_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
@@ -2546,13 +2546,13 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_nand_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB80_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
@@ -2572,48 +2572,48 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i16_acq_rel:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
 ; LA32-NEXT:    nor $a5, $a5, $zero
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB81_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_nand_i16_acq_rel:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
 ; LA64-NEXT:    nor $a5, $a5, $zero
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB81_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i16 %b acq_rel
   ret i16 %1
@@ -2677,12 +2677,12 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    orn $a1, $a1, $a3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB84_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a1
@@ -2695,12 +2695,12 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_and_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    orn $a1, $a1, $a3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -2711,35 +2711,35 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i16_acq_rel:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
-; LA32-NEXT:    orn $a1, $a1, $a2
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    orn $a1, $a1, $a3
 ; LA32-NEXT:  .LBB85_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT:    ll.w $a2, $a0, 0
-; LA32-NEXT:    and $a4, $a2, $a1
+; LA32-NEXT:    ll.w $a3, $a0, 0
+; LA32-NEXT:    and $a4, $a3, $a1
 ; LA32-NEXT:    sc.w $a4, $a0, 0
 ; LA32-NEXT:    beqz $a4, .LBB85_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a2, $a3
+; LA32-NEXT:    srl.w $a0, $a3, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_and_i16_acq_rel:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
-; LA64-NEXT:    orn $a1, $a1, $a2
+; LA64-NEXT:    slli.d $a2, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    amand_db.w $a2, $a1, $a0
-; LA64-NEXT:    srl.w $a0, $a2, $a3
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    orn $a1, $a1, $a3
+; LA64-NEXT:    amand_db.w $a3, $a1, $a0
+; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw and ptr %a, i16 %b acq_rel
   ret i16 %1
@@ -2789,10 +2789,10 @@ define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind {
 define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i8_acq_rel:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    andi $a1, $a1, 255
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -2804,10 +2804,10 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_or_i8_acq_rel:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -2818,10 +2818,10 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i16_acq_rel:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB89_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -2833,10 +2833,10 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_or_i16_acq_rel:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -2888,10 +2888,10 @@ define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind {
 define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i8_acq_rel:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    andi $a1, $a1, 255
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB92_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -2903,10 +2903,10 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_xor_i8_acq_rel:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -2917,10 +2917,10 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i16_acq_rel:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB93_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -2932,10 +2932,10 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_xor_i16_acq_rel:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -2988,11 +2988,11 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
@@ -3008,13 +3008,13 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
@@ -3034,10 +3034,10 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:    nor $a2, $a2, $zero
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a2
@@ -3050,10 +3050,10 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_0_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
 ; LA64-NEXT:    nor $a2, $a2, $zero
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -3065,9 +3065,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
@@ -3080,9 +3080,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -3093,46 +3093,46 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
 define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i16_seq_cst:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB99_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB99_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_xchg_i16_seq_cst:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB99_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB99_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 %b seq_cst
   ret i16 %1
@@ -3141,31 +3141,31 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
 define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i16_seq_cst:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a1, 15
-; LA32-NEXT:    ori $a1, $a1, 4095
-; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    nor $a1, $a1, $zero
+; LA32-NEXT:    slli.w $a1, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a2, 15
+; LA32-NEXT:    ori $a2, $a2, 4095
+; LA32-NEXT:    sll.w $a2, $a2, $a1
+; LA32-NEXT:    nor $a2, $a2, $zero
 ; LA32-NEXT:  .LBB100_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
-; LA32-NEXT:    and $a4, $a3, $a1
+; LA32-NEXT:    and $a4, $a3, $a2
 ; LA32-NEXT:    sc.w $a4, $a0, 0
 ; LA32-NEXT:    beqz $a4, .LBB100_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a3, $a2
+; LA32-NEXT:    srl.w $a0, $a3, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_xchg_0_i16_seq_cst:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a1, 15
-; LA64-NEXT:    ori $a1, $a1, 4095
-; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    nor $a1, $a1, $zero
+; LA64-NEXT:    slli.d $a1, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    amand_db.w $a3, $a1, $a0
-; LA64-NEXT:    srl.w $a0, $a3, $a2
+; LA64-NEXT:    lu12i.w $a2, 15
+; LA64-NEXT:    ori $a2, $a2, 4095
+; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    nor $a2, $a2, $zero
+; LA64-NEXT:    amand_db.w $a3, $a2, $a0
+; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 0 seq_cst
   ret i16 %1
@@ -3174,29 +3174,29 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
 define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a1, 15
-; LA32-NEXT:    ori $a1, $a1, 4095
-; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    slli.w $a1, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a2, 15
+; LA32-NEXT:    ori $a2, $a2, 4095
+; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:  .LBB101_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
-; LA32-NEXT:    or $a4, $a3, $a1
+; LA32-NEXT:    or $a4, $a3, $a2
 ; LA32-NEXT:    sc.w $a4, $a0, 0
 ; LA32-NEXT:    beqz $a4, .LBB101_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a3, $a2
+; LA32-NEXT:    srl.w $a0, $a3, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a1, 15
-; LA64-NEXT:    ori $a1, $a1, 4095
-; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    slli.d $a1, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    amor_db.w $a3, $a1, $a0
-; LA64-NEXT:    srl.w $a0, $a3, $a2
+; LA64-NEXT:    lu12i.w $a2, 15
+; LA64-NEXT:    ori $a2, $a2, 4095
+; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    amor_db.w $a3, $a2, $a0
+; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 -1 seq_cst
   ret i16 %1
@@ -3247,11 +3247,11 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB104_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
@@ -3267,13 +3267,13 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_add_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB104_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
@@ -3292,46 +3292,46 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i16_seq_cst:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB105_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB105_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_add_i16_seq_cst:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB105_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB105_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw add ptr %a, i16 %b seq_cst
   ret i16 %1
@@ -3382,11 +3382,11 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB108_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
@@ -3402,13 +3402,13 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_sub_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB108_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
@@ -3427,46 +3427,46 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i16_seq_cst:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB109_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB109_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_sub_i16_seq_cst:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB109_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB109_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i16 %b seq_cst
   ret i16 %1
@@ -3519,11 +3519,11 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
@@ -3540,13 +3540,13 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_nand_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
@@ -3566,48 +3566,48 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i16_seq_cst:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
 ; LA32-NEXT:    nor $a5, $a5, $zero
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB113_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_nand_i16_seq_cst:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
 ; LA64-NEXT:    nor $a5, $a5, $zero
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB113_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i16 %b seq_cst
   ret i16 %1
@@ -3671,12 +3671,12 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    orn $a1, $a1, $a3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a1
@@ -3689,12 +3689,12 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_and_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    orn $a1, $a1, $a3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -3705,35 +3705,35 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i16_seq_cst:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
-; LA32-NEXT:    orn $a1, $a1, $a2
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    orn $a1, $a1, $a3
 ; LA32-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT:    ll.w $a2, $a0, 0
-; LA32-NEXT:    and $a4, $a2, $a1
+; LA32-NEXT:    ll.w $a3, $a0, 0
+; LA32-NEXT:    and $a4, $a3, $a1
 ; LA32-NEXT:    sc.w $a4, $a0, 0
 ; LA32-NEXT:    beqz $a4, .LBB117_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a2, $a3
+; LA32-NEXT:    srl.w $a0, $a3, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_and_i16_seq_cst:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
-; LA64-NEXT:    orn $a1, $a1, $a2
+; LA64-NEXT:    slli.d $a2, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    amand_db.w $a2, $a1, $a0
-; LA64-NEXT:    srl.w $a0, $a2, $a3
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    orn $a1, $a1, $a3
+; LA64-NEXT:    amand_db.w $a3, $a1, $a0
+; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw and ptr %a, i16 %b seq_cst
   ret i16 %1
@@ -3783,10 +3783,10 @@ define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind {
 define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i8_seq_cst:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    andi $a1, $a1, 255
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB120_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -3798,10 +3798,10 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_or_i8_seq_cst:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -3812,10 +3812,10 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i16_seq_cst:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -3827,10 +3827,10 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_or_i16_seq_cst:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -3882,10 +3882,10 @@ define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind {
 define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i8_seq_cst:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    andi $a1, $a1, 255
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB124_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -3897,10 +3897,10 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_xor_i8_seq_cst:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -3911,10 +3911,10 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i16_seq_cst:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB125_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -3926,10 +3926,10 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_xor_i16_seq_cst:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -3982,11 +3982,11 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
@@ -4002,13 +4002,13 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
@@ -4028,10 +4028,10 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:    nor $a2, $a2, $zero
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB129_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a2
@@ -4044,10 +4044,10 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_0_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
 ; LA64-NEXT:    nor $a2, $a2, $zero
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -4059,9 +4059,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB130_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
@@ -4074,9 +4074,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -4087,46 +4087,46 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
 define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i16_monotonic:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB131_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB131_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_xchg_i16_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB131_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB131_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 %b monotonic
   ret i16 %1
@@ -4135,31 +4135,31 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
 define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i16_monotonic:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a1, 15
-; LA32-NEXT:    ori $a1, $a1, 4095
-; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    nor $a1, $a1, $zero
+; LA32-NEXT:    slli.w $a1, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a2, 15
+; LA32-NEXT:    ori $a2, $a2, 4095
+; LA32-NEXT:    sll.w $a2, $a2, $a1
+; LA32-NEXT:    nor $a2, $a2, $zero
 ; LA32-NEXT:  .LBB132_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
-; LA32-NEXT:    and $a4, $a3, $a1
+; LA32-NEXT:    and $a4, $a3, $a2
 ; LA32-NEXT:    sc.w $a4, $a0, 0
 ; LA32-NEXT:    beqz $a4, .LBB132_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a3, $a2
+; LA32-NEXT:    srl.w $a0, $a3, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_xchg_0_i16_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a1, 15
-; LA64-NEXT:    ori $a1, $a1, 4095
-; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    nor $a1, $a1, $zero
+; LA64-NEXT:    slli.d $a1, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    amand_db.w $a3, $a1, $a0
-; LA64-NEXT:    srl.w $a0, $a3, $a2
+; LA64-NEXT:    lu12i.w $a2, 15
+; LA64-NEXT:    ori $a2, $a2, 4095
+; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    nor $a2, $a2, $zero
+; LA64-NEXT:    amand_db.w $a3, $a2, $a0
+; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 0 monotonic
   ret i16 %1
@@ -4168,29 +4168,29 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
 define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a1, 15
-; LA32-NEXT:    ori $a1, $a1, 4095
-; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    slli.w $a1, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a2, 15
+; LA32-NEXT:    ori $a2, $a2, 4095
+; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:  .LBB133_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
-; LA32-NEXT:    or $a4, $a3, $a1
+; LA32-NEXT:    or $a4, $a3, $a2
 ; LA32-NEXT:    sc.w $a4, $a0, 0
 ; LA32-NEXT:    beqz $a4, .LBB133_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a3, $a2
+; LA32-NEXT:    srl.w $a0, $a3, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a1, 15
-; LA64-NEXT:    ori $a1, $a1, 4095
-; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    slli.d $a1, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    amor_db.w $a3, $a1, $a0
-; LA64-NEXT:    srl.w $a0, $a3, $a2
+; LA64-NEXT:    lu12i.w $a2, 15
+; LA64-NEXT:    ori $a2, $a2, 4095
+; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    amor_db.w $a3, $a2, $a0
+; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
   %1 = atomicrmw xchg ptr %a, i16 -1 monotonic
   ret i16 %1
@@ -4241,11 +4241,11 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB136_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
@@ -4261,13 +4261,13 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_add_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB136_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
@@ -4286,46 +4286,46 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i16_monotonic:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB137_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB137_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_add_i16_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB137_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB137_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw add ptr %a, i16 %b monotonic
   ret i16 %1
@@ -4376,11 +4376,11 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB140_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
@@ -4396,13 +4396,13 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_sub_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB140_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
@@ -4421,46 +4421,46 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i16_monotonic:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB141_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB141_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_sub_i16_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB141_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB141_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw sub ptr %a, i16 %b monotonic
   ret i16 %1
@@ -4513,11 +4513,11 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB144_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
@@ -4534,13 +4534,13 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_nand_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
-; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB144_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
@@ -4560,48 +4560,48 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i16_monotonic:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB145_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
 ; LA32-NEXT:    nor $a5, $a5, $zero
 ; LA32-NEXT:    xor $a5, $a4, $a5
-; LA32-NEXT:    and $a5, $a5, $a2
+; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a0, 0
 ; LA32-NEXT:    beqz $a5, .LBB145_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a4, $a3
+; LA32-NEXT:    srl.w $a0, $a4, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_nand_i16_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    addi.w $a2, $a2, 0
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    addi.w $a1, $a1, 0
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    addi.w $a3, $a3, 0
 ; LA64-NEXT:  .LBB145_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
 ; LA64-NEXT:    nor $a5, $a5, $zero
 ; LA64-NEXT:    xor $a5, $a4, $a5
-; LA64-NEXT:    and $a5, $a5, $a2
+; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a0, 0
 ; LA64-NEXT:    beqz $a5, .LBB145_1
 ; LA64-NEXT:  # %bb.2:
-; LA64-NEXT:    srl.w $a0, $a4, $a3
+; LA64-NEXT:    srl.w $a0, $a4, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw nand ptr %a, i16 %b monotonic
   ret i16 %1
@@ -4665,12 +4665,12 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    orn $a1, $a1, $a3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB148_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a1
@@ -4683,12 +4683,12 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_and_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    orn $a1, $a1, $a3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -4699,35 +4699,35 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i16_monotonic:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a2, 15
-; LA32-NEXT:    ori $a2, $a2, 4095
-; LA32-NEXT:    slli.w $a3, $a0, 3
-; LA32-NEXT:    sll.w $a2, $a2, $a3
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    sll.w $a1, $a1, $a3
-; LA32-NEXT:    orn $a1, $a1, $a2
+; LA32-NEXT:    slli.w $a2, $a0, 3
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    lu12i.w $a3, 15
+; LA32-NEXT:    ori $a3, $a3, 4095
+; LA32-NEXT:    sll.w $a3, $a3, $a2
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    orn $a1, $a1, $a3
 ; LA32-NEXT:  .LBB149_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT:    ll.w $a2, $a0, 0
-; LA32-NEXT:    and $a4, $a2, $a1
+; LA32-NEXT:    ll.w $a3, $a0, 0
+; LA32-NEXT:    and $a4, $a3, $a1
 ; LA32-NEXT:    sc.w $a4, $a0, 0
 ; LA32-NEXT:    beqz $a4, .LBB149_1
 ; LA32-NEXT:  # %bb.2:
-; LA32-NEXT:    srl.w $a0, $a2, $a3
+; LA32-NEXT:    srl.w $a0, $a3, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: atomicrmw_and_i16_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a2, 15
-; LA64-NEXT:    ori $a2, $a2, 4095
-; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    sll.w $a2, $a2, $a3
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
-; LA64-NEXT:    orn $a1, $a1, $a2
+; LA64-NEXT:    slli.d $a2, $a0, 3
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    amand_db.w $a2, $a1, $a0
-; LA64-NEXT:    srl.w $a0, $a2, $a3
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    orn $a1, $a1, $a3
+; LA64-NEXT:    amand_db.w $a3, $a1, $a0
+; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
   %1 = atomicrmw and ptr %a, i16 %b monotonic
   ret i16 %1
@@ -4777,10 +4777,10 @@ define i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind {
 define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i8_monotonic:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    andi $a1, $a1, 255
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB152_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -4792,10 +4792,10 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_or_i8_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -4806,10 +4806,10 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i16_monotonic:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB153_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -4821,10 +4821,10 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_or_i16_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -4876,10 +4876,10 @@ define i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind {
 define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i8_monotonic:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    andi $a1, $a1, 255
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB156_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -4891,10 +4891,10 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_xor_i8_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -4905,10 +4905,10 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
 define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i16_monotonic:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:  .LBB157_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -4920,10 +4920,10 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
 ;
 ; LA64-LABEL: atomicrmw_xor_i16_monotonic:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll
index 03a126a736efc3..ef117f97488715 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll
@@ -119,12 +119,12 @@ define i32 @convert_double_to_u32(double %a) nounwind {
 ; LA32-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI7_0)
 ; LA32-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI7_0)
 ; LA32-NEXT:    fld.d $fa1, $a0, 0
-; LA32-NEXT:    fsub.d $fa2, $fa0, $fa1
-; LA32-NEXT:    ftintrz.w.d $fa2, $fa2
-; LA32-NEXT:    movfr2gr.s $a0, $fa2
+; LA32-NEXT:    fcmp.clt.d $fcc0, $fa0, $fa1
+; LA32-NEXT:    fsub.d $fa1, $fa0, $fa1
+; LA32-NEXT:    ftintrz.w.d $fa1, $fa1
+; LA32-NEXT:    movfr2gr.s $a0, $fa1
 ; LA32-NEXT:    lu12i.w $a1, -524288
 ; LA32-NEXT:    xor $a0, $a0, $a1
-; LA32-NEXT:    fcmp.clt.d $fcc0, $fa0, $fa1
 ; LA32-NEXT:    movcf2gr $a1, $fcc0
 ; LA32-NEXT:    masknez $a0, $a0, $a1
 ; LA32-NEXT:    ftintrz.w.d $fa0, $fa0
@@ -176,12 +176,12 @@ define i64 @convert_double_to_u64(double %a) nounwind {
 ; LA64-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI9_0)
 ; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI9_0)
 ; LA64-NEXT:    fld.d $fa1, $a0, 0
-; LA64-NEXT:    fsub.d $fa2, $fa0, $fa1
-; LA64-NEXT:    ftintrz.l.d $fa2, $fa2
-; LA64-NEXT:    movfr2gr.d $a0, $fa2
+; LA64-NEXT:    fcmp.clt.d $fcc0, $fa0, $fa1
+; LA64-NEXT:    fsub.d $fa1, $fa0, $fa1
+; LA64-NEXT:    ftintrz.l.d $fa1, $fa1
+; LA64-NEXT:    movfr2gr.d $a0, $fa1
 ; LA64-NEXT:    lu52i.d $a1, $zero, -2048
 ; LA64-NEXT:    xor $a0, $a0, $a1
-; LA64-NEXT:    fcmp.clt.d $fcc0, $fa0, $fa1
 ; LA64-NEXT:    movcf2gr $a1, $fcc0
 ; LA64-NEXT:    masknez $a0, $a0, $a1
 ; LA64-NEXT:    ftintrz.l.d $fa0, $fa0
@@ -232,11 +232,11 @@ define double @convert_u32_to_double(i32 %a) nounwind {
 ; LA32-NEXT:    lu12i.w $a1, 275200
 ; LA32-NEXT:    st.w $a1, $sp, 12
 ; LA32-NEXT:    st.w $a0, $sp, 8
+; LA32-NEXT:    fld.d $fa0, $sp, 8
 ; LA32-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI12_0)
 ; LA32-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI12_0)
-; LA32-NEXT:    fld.d $fa0, $a0, 0
-; LA32-NEXT:    fld.d $fa1, $sp, 8
-; LA32-NEXT:    fsub.d $fa0, $fa1, $fa0
+; LA32-NEXT:    fld.d $fa1, $a0, 0
+; LA32-NEXT:    fsub.d $fa0, $fa0, $fa1
 ; LA32-NEXT:    addi.w $sp, $sp, 16
 ; LA32-NEXT:    ret
 ;
@@ -263,13 +263,13 @@ define double @convert_u64_to_double(i64 %a) nounwind {
 ; LA64-LABEL: convert_u64_to_double:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    srli.d $a1, $a0, 32
+; LA64-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI13_0)
+; LA64-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI13_0)
+; LA64-NEXT:    fld.d $fa0, $a2, 0
 ; LA64-NEXT:    lu52i.d $a2, $zero, 1107
 ; LA64-NEXT:    or $a1, $a1, $a2
-; LA64-NEXT:    movgr2fr.d $fa0, $a1
-; LA64-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI13_0)
-; LA64-NEXT:    addi.d $a1, $a1, %pc_lo12(.LCPI13_0)
-; LA64-NEXT:    fld.d $fa1, $a1, 0
-; LA64-NEXT:    fsub.d $fa0, $fa0, $fa1
+; LA64-NEXT:    movgr2fr.d $fa1, $a1
+; LA64-NEXT:    fsub.d $fa0, $fa1, $fa0
 ; LA64-NEXT:    lu12i.w $a1, 275200
 ; LA64-NEXT:    bstrins.d $a0, $a1, 63, 32
 ; LA64-NEXT:    movgr2fr.d $fa1, $a0
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll
index b57d96aee32f49..b01b84ba385ec8 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll
@@ -184,12 +184,12 @@ define i32 @convert_float_to_u32(float %a) nounwind {
 ; LA32F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI6_0)
 ; LA32F-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI6_0)
 ; LA32F-NEXT:    fld.s $fa1, $a0, 0
-; LA32F-NEXT:    fsub.s $fa2, $fa0, $fa1
-; LA32F-NEXT:    ftintrz.w.s $fa2, $fa2
-; LA32F-NEXT:    movfr2gr.s $a0, $fa2
+; LA32F-NEXT:    fcmp.clt.s $fcc0, $fa0, $fa1
+; LA32F-NEXT:    fsub.s $fa1, $fa0, $fa1
+; LA32F-NEXT:    ftintrz.w.s $fa1, $fa1
+; LA32F-NEXT:    movfr2gr.s $a0, $fa1
 ; LA32F-NEXT:    lu12i.w $a1, -524288
 ; LA32F-NEXT:    xor $a0, $a0, $a1
-; LA32F-NEXT:    fcmp.clt.s $fcc0, $fa0, $fa1
 ; LA32F-NEXT:    movcf2gr $a1, $fcc0
 ; LA32F-NEXT:    masknez $a0, $a0, $a1
 ; LA32F-NEXT:    ftintrz.w.s $fa0, $fa0
@@ -203,12 +203,12 @@ define i32 @convert_float_to_u32(float %a) nounwind {
 ; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI6_0)
 ; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI6_0)
 ; LA32D-NEXT:    fld.s $fa1, $a0, 0
-; LA32D-NEXT:    fsub.s $fa2, $fa0, $fa1
-; LA32D-NEXT:    ftintrz.w.s $fa2, $fa2
-; LA32D-NEXT:    movfr2gr.s $a0, $fa2
+; LA32D-NEXT:    fcmp.clt.s $fcc0, $fa0, $fa1
+; LA32D-NEXT:    fsub.s $fa1, $fa0, $fa1
+; LA32D-NEXT:    ftintrz.w.s $fa1, $fa1
+; LA32D-NEXT:    movfr2gr.s $a0, $fa1
 ; LA32D-NEXT:    lu12i.w $a1, -524288
 ; LA32D-NEXT:    xor $a0, $a0, $a1
-; LA32D-NEXT:    fcmp.clt.s $fcc0, $fa0, $fa1
 ; LA32D-NEXT:    movcf2gr $a1, $fcc0
 ; LA32D-NEXT:    masknez $a0, $a0, $a1
 ; LA32D-NEXT:    ftintrz.w.s $fa0, $fa0
@@ -222,12 +222,12 @@ define i32 @convert_float_to_u32(float %a) nounwind {
 ; LA64F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI6_0)
 ; LA64F-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI6_0)
 ; LA64F-NEXT:    fld.s $fa1, $a0, 0
-; LA64F-NEXT:    fsub.s $fa2, $fa0, $fa1
-; LA64F-NEXT:    ftintrz.w.s $fa2, $fa2
-; LA64F-NEXT:    movfr2gr.s $a0, $fa2
+; LA64F-NEXT:    fcmp.clt.s $fcc0, $fa0, $fa1
+; LA64F-NEXT:    fsub.s $fa1, $fa0, $fa1
+; LA64F-NEXT:    ftintrz.w.s $fa1, $fa1
+; LA64F-NEXT:    movfr2gr.s $a0, $fa1
 ; LA64F-NEXT:    lu12i.w $a1, -524288
 ; LA64F-NEXT:    xor $a0, $a0, $a1
-; LA64F-NEXT:    fcmp.clt.s $fcc0, $fa0, $fa1
 ; LA64F-NEXT:    movcf2gr $a1, $fcc0
 ; LA64F-NEXT:    masknez $a0, $a0, $a1
 ; LA64F-NEXT:    ftintrz.w.s $fa0, $fa0
@@ -269,12 +269,12 @@ define i64 @convert_float_to_u64(float %a) nounwind {
 ; LA64F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI7_0)
 ; LA64F-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI7_0)
 ; LA64F-NEXT:    fld.s $fa1, $a0, 0
-; LA64F-NEXT:    fsub.s $fa2, $fa0, $fa1
-; LA64F-NEXT:    ftintrz.w.s $fa2, $fa2
-; LA64F-NEXT:    movfr2gr.s $a0, $fa2
+; LA64F-NEXT:    fcmp.clt.s $fcc0, $fa0, $fa1
+; LA64F-NEXT:    fsub.s $fa1, $fa0, $fa1
+; LA64F-NEXT:    ftintrz.w.s $fa1, $fa1
+; LA64F-NEXT:    movfr2gr.s $a0, $fa1
 ; LA64F-NEXT:    lu52i.d $a1, $zero, -2048
 ; LA64F-NEXT:    xor $a0, $a0, $a1
-; LA64F-NEXT:    fcmp.clt.s $fcc0, $fa0, $fa1
 ; LA64F-NEXT:    movcf2gr $a1, $fcc0
 ; LA64F-NEXT:    masknez $a0, $a0, $a1
 ; LA64F-NEXT:    ftintrz.w.s $fa0, $fa0
@@ -288,12 +288,12 @@ define i64 @convert_float_to_u64(float %a) nounwind {
 ; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI7_0)
 ; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI7_0)
 ; LA64D-NEXT:    fld.s $fa1, $a0, 0
-; LA64D-NEXT:    fsub.s $fa2, $fa0, $fa1
-; LA64D-NEXT:    ftintrz.l.s $fa2, $fa2
-; LA64D-NEXT:    movfr2gr.d $a0, $fa2
+; LA64D-NEXT:    fcmp.clt.s $fcc0, $fa0, $fa1
+; LA64D-NEXT:    fsub.s $fa1, $fa0, $fa1
+; LA64D-NEXT:    ftintrz.l.s $fa1, $fa1
+; LA64D-NEXT:    movfr2gr.d $a0, $fa1
 ; LA64D-NEXT:    lu52i.d $a1, $zero, -2048
 ; LA64D-NEXT:    xor $a0, $a0, $a1
-; LA64D-NEXT:    fcmp.clt.s $fcc0, $fa0, $fa1
 ; LA64D-NEXT:    movcf2gr $a1, $fcc0
 ; LA64D-NEXT:    masknez $a0, $a0, $a1
 ; LA64D-NEXT:    ftintrz.l.s $fa0, $fa0
@@ -504,11 +504,11 @@ define float @convert_u32_to_float(i32 %a) nounwind {
 ; LA32D-NEXT:    lu12i.w $a1, 275200
 ; LA32D-NEXT:    st.w $a1, $sp, 12
 ; LA32D-NEXT:    st.w $a0, $sp, 8
+; LA32D-NEXT:    fld.d $fa0, $sp, 8
 ; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI14_0)
 ; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI14_0)
-; LA32D-NEXT:    fld.d $fa0, $a0, 0
-; LA32D-NEXT:    fld.d $fa1, $sp, 8
-; LA32D-NEXT:    fsub.d $fa0, $fa1, $fa0
+; LA32D-NEXT:    fld.d $fa1, $a0, 0
+; LA32D-NEXT:    fsub.d $fa0, $fa0, $fa1
 ; LA32D-NEXT:    fcvt.s.d $fa0, $fa0
 ; LA32D-NEXT:    addi.w $sp, $sp, 16
 ; LA32D-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll
index 387a62bd6c00ff..1b3cda5547da83 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll
@@ -1,34 +1,49 @@
-; RUN: llc --mtriple=loongarch32 --mattr=+d --relocation-model=static < %s | FileCheck %s --check-prefixes=ALL,LA32NOPIC,LA32
-; RUN: llc --mtriple=loongarch32 --mattr=+d --relocation-model=pic < %s | FileCheck %s --check-prefixes=ALL,LA32PIC,LA32
-; RUN: llc --mtriple=loongarch64 --mattr=+d --relocation-model=static < %s | FileCheck %s --check-prefixes=ALL,LA64NOPIC,LA64
-; RUN: llc --mtriple=loongarch64 --mattr=+d --relocation-model=pic < %s | FileCheck %s --check-prefixes=ALL,LA64PIC,LA64
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc --mtriple=loongarch32 --mattr=+d --relocation-model=static < %s | FileCheck %s --check-prefixes=ALL,LA32NOPIC
+; RUN: llc --mtriple=loongarch32 --mattr=+d --relocation-model=pic < %s | FileCheck %s --check-prefixes=ALL,LA32PIC
+; RUN: llc --mtriple=loongarch64 --mattr=+d --relocation-model=static < %s | FileCheck %s --check-prefixes=ALL,LA64NOPIC
+; RUN: llc --mtriple=loongarch64 --mattr=+d --relocation-model=pic < %s | FileCheck %s --check-prefixes=ALL,LA64PIC
 
 ;; Check load from and store to global variables.
 @G = dso_local global i32 zeroinitializer, align 4
 @arr = dso_local global [10 x i32] zeroinitializer, align 4
 
 define i32 @load_store_global() nounwind {
-; ALL-LABEL:      load_store_global:
-; ALL:            # %bb.0:
-
-; LA32NOPIC-NEXT:   pcalau12i $a0, %pc_hi20(G)
-; LA32NOPIC-NEXT:   addi.w $a1, $a0, %pc_lo12(G)
-; LA32PIC-NEXT:     pcalau12i $a0, %pc_hi20(.LG$local)
-; LA32PIC-NEXT:     addi.w $a1, $a0, %pc_lo12(.LG$local)
-; LA32-NEXT:        ld.w $a0, $a1, 0
-; LA32-NEXT:        addi.w $a0, $a0, 1
-; LA32-NEXT:        st.w $a0, $a1, 0
-
-; LA64NOPIC-NEXT:   pcalau12i $a0, %pc_hi20(G)
-; LA64NOPIC-NEXT:   addi.d $a1, $a0, %pc_lo12(G)
-; LA64PIC-NEXT:     pcalau12i $a0, %pc_hi20(.LG$local)
-; LA64PIC-NEXT:     addi.d $a1, $a0, %pc_lo12(.LG$local)
-; LA64-NEXT:        ld.w $a0, $a1, 0
-; LA64-NEXT:        addi.d $a0, $a0, 1
-; LA64-NEXT:        st.w $a0, $a1, 0
-
-; ALL-NEXT:         ret
-
+; LA32NOPIC-LABEL: load_store_global:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    pcalau12i $a0, %pc_hi20(G)
+; LA32NOPIC-NEXT:    addi.w $a1, $a0, %pc_lo12(G)
+; LA32NOPIC-NEXT:    ld.w $a0, $a1, 0
+; LA32NOPIC-NEXT:    addi.w $a0, $a0, 1
+; LA32NOPIC-NEXT:    st.w $a0, $a1, 0
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: load_store_global:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    pcalau12i $a0, %pc_hi20(.LG$local)
+; LA32PIC-NEXT:    addi.w $a1, $a0, %pc_lo12(.LG$local)
+; LA32PIC-NEXT:    ld.w $a0, $a1, 0
+; LA32PIC-NEXT:    addi.w $a0, $a0, 1
+; LA32PIC-NEXT:    st.w $a0, $a1, 0
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: load_store_global:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    pcalau12i $a0, %pc_hi20(G)
+; LA64NOPIC-NEXT:    addi.d $a1, $a0, %pc_lo12(G)
+; LA64NOPIC-NEXT:    ld.w $a0, $a1, 0
+; LA64NOPIC-NEXT:    addi.d $a0, $a0, 1
+; LA64NOPIC-NEXT:    st.w $a0, $a1, 0
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: load_store_global:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    pcalau12i $a0, %pc_hi20(.LG$local)
+; LA64PIC-NEXT:    addi.d $a1, $a0, %pc_lo12(.LG$local)
+; LA64PIC-NEXT:    ld.w $a0, $a1, 0
+; LA64PIC-NEXT:    addi.d $a0, $a0, 1
+; LA64PIC-NEXT:    st.w $a0, $a1, 0
+; LA64PIC-NEXT:    ret
   %v = load i32, ptr @G
   %sum = add i32 %v, 1
   store i32 %sum, ptr @G
@@ -36,34 +51,49 @@ define i32 @load_store_global() nounwind {
 }
 
 define i32 @load_store_global_array(i32 %a) nounwind {
-; ALL-LABEL: load_store_global_array:
-; ALL:       # %bb.0:
-
-; LA32NOPIC-NEXT:   pcalau12i $a1, %pc_hi20(arr)
-; LA32NOPIC-NEXT:   addi.w $a2, $a1, %pc_lo12(arr)
-; LA32PIC-NEXT:     pcalau12i $a1, %pc_hi20(.Larr$local)
-; LA32PIC-NEXT:     addi.w $a2, $a1, %pc_lo12(.Larr$local)
-; LA32-NEXT:        ld.w $a1, $a2, 0
-; LA32-NEXT:        st.w $a0, $a2, 0
-; LA32NOPIC-NEXT:   ld.w $a3, $a2, 36
-; LA32NOPIC-NEXT:   st.w $a0, $a2, 36
-; LA32PIC-NEXT:     ld.w $a3, $a2, 36
-; LA32PIC-NEXT:     st.w $a0, $a2, 36
-
-; LA64NOPIC-NEXT:   pcalau12i $a1, %pc_hi20(arr)
-; LA64NOPIC-NEXT:   addi.d $a2, $a1, %pc_lo12(arr)
-; LA64PIC-NEXT:     pcalau12i $a1, %pc_hi20(.Larr$local)
-; LA64PIC-NEXT:     addi.d $a2, $a1, %pc_lo12(.Larr$local)
-; LA64-NEXT:        ld.w $a1, $a2, 0
-; LA64-NEXT:        st.w $a0, $a2, 0
-; LA64NOPIC-NEXT:   ld.w $a3, $a2, 36
-; LA64NOPIC-NEXT:   st.w $a0, $a2, 36
-; LA64PIC-NEXT:     ld.w $a3, $a2, 36
-; LA64PIC-NEXT:     st.w $a0, $a2, 36
-
-; ALL-NEXT:         move $a0, $a1
-; ALL-NEXT:         ret
-
+; LA32NOPIC-LABEL: load_store_global_array:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    pcalau12i $a1, %pc_hi20(arr)
+; LA32NOPIC-NEXT:    addi.w $a2, $a1, %pc_lo12(arr)
+; LA32NOPIC-NEXT:    ld.w $a1, $a2, 0
+; LA32NOPIC-NEXT:    st.w $a0, $a2, 0
+; LA32NOPIC-NEXT:    ld.w $a3, $a2, 36
+; LA32NOPIC-NEXT:    st.w $a0, $a2, 36
+; LA32NOPIC-NEXT:    move $a0, $a1
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: load_store_global_array:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    pcalau12i $a1, %pc_hi20(.Larr$local)
+; LA32PIC-NEXT:    addi.w $a2, $a1, %pc_lo12(.Larr$local)
+; LA32PIC-NEXT:    ld.w $a1, $a2, 0
+; LA32PIC-NEXT:    st.w $a0, $a2, 0
+; LA32PIC-NEXT:    ld.w $a3, $a2, 36
+; LA32PIC-NEXT:    st.w $a0, $a2, 36
+; LA32PIC-NEXT:    move $a0, $a1
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: load_store_global_array:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    pcalau12i $a1, %pc_hi20(arr)
+; LA64NOPIC-NEXT:    addi.d $a2, $a1, %pc_lo12(arr)
+; LA64NOPIC-NEXT:    ld.w $a1, $a2, 0
+; LA64NOPIC-NEXT:    st.w $a0, $a2, 0
+; LA64NOPIC-NEXT:    ld.w $a3, $a2, 36
+; LA64NOPIC-NEXT:    st.w $a0, $a2, 36
+; LA64NOPIC-NEXT:    move $a0, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: load_store_global_array:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    pcalau12i $a1, %pc_hi20(.Larr$local)
+; LA64PIC-NEXT:    addi.d $a2, $a1, %pc_lo12(.Larr$local)
+; LA64PIC-NEXT:    ld.w $a1, $a2, 0
+; LA64PIC-NEXT:    st.w $a0, $a2, 0
+; LA64PIC-NEXT:    ld.w $a3, $a2, 36
+; LA64PIC-NEXT:    st.w $a0, $a2, 36
+; LA64PIC-NEXT:    move $a0, $a1
+; LA64PIC-NEXT:    ret
   %1 = load volatile i32, ptr @arr, align 4
   store i32 %a, ptr @arr, align 4
   %2 = getelementptr [10 x i32], ptr @arr, i32 0, i32 9
@@ -75,18 +105,35 @@ define i32 @load_store_global_array(i32 %a) nounwind {
 ;; Check indexed and unindexed, sext, zext and anyext loads.
 
 define i64 @ld_b(ptr %a) nounwind {
-; LA32-LABEL: ld_b:
-; LA32:       # %bb.0:
-; LA32-NEXT:    ld.b $a1, $a0, 0
-; LA32-NEXT:    ld.b $a0, $a0, 1
-; LA32-NEXT:    srai.w $a1, $a0, 31
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: ld_b:
-; LA64:       # %bb.0:
-; LA64-NEXT:    ld.b $a1, $a0, 0
-; LA64-NEXT:    ld.b $a0, $a0, 1
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: ld_b:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    ld.b $a2, $a0, 1
+; LA32NOPIC-NEXT:    ld.b $a0, $a0, 0
+; LA32NOPIC-NEXT:    srai.w $a1, $a2, 31
+; LA32NOPIC-NEXT:    move $a0, $a2
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: ld_b:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    ld.b $a2, $a0, 1
+; LA32PIC-NEXT:    ld.b $a0, $a0, 0
+; LA32PIC-NEXT:    srai.w $a1, $a2, 31
+; LA32PIC-NEXT:    move $a0, $a2
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: ld_b:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    ld.b $a1, $a0, 1
+; LA64NOPIC-NEXT:    ld.b $a0, $a0, 0
+; LA64NOPIC-NEXT:    move $a0, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: ld_b:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    ld.b $a1, $a0, 1
+; LA64PIC-NEXT:    ld.b $a0, $a0, 0
+; LA64PIC-NEXT:    move $a0, $a1
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i8, ptr %a, i64 1
   %2 = load i8, ptr %1
   %3 = sext i8 %2 to i64
@@ -95,18 +142,35 @@ define i64 @ld_b(ptr %a) nounwind {
 }
 
 define i64 @ld_h(ptr %a) nounwind {
-; LA32-LABEL: ld_h:
-; LA32:       # %bb.0:
-; LA32-NEXT:    ld.h $a1, $a0, 0
-; LA32-NEXT:    ld.h $a0, $a0, 4
-; LA32-NEXT:    srai.w $a1, $a0, 31
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: ld_h:
-; LA64:       # %bb.0:
-; LA64-NEXT:    ld.h $a1, $a0, 0
-; LA64-NEXT:    ld.h $a0, $a0, 4
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: ld_h:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    ld.h $a2, $a0, 4
+; LA32NOPIC-NEXT:    ld.h $a0, $a0, 0
+; LA32NOPIC-NEXT:    srai.w $a1, $a2, 31
+; LA32NOPIC-NEXT:    move $a0, $a2
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: ld_h:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    ld.h $a2, $a0, 4
+; LA32PIC-NEXT:    ld.h $a0, $a0, 0
+; LA32PIC-NEXT:    srai.w $a1, $a2, 31
+; LA32PIC-NEXT:    move $a0, $a2
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: ld_h:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    ld.h $a1, $a0, 4
+; LA64NOPIC-NEXT:    ld.h $a0, $a0, 0
+; LA64NOPIC-NEXT:    move $a0, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: ld_h:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    ld.h $a1, $a0, 4
+; LA64PIC-NEXT:    ld.h $a0, $a0, 0
+; LA64PIC-NEXT:    move $a0, $a1
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i16, ptr %a, i64 2
   %2 = load i16, ptr %1
   %3 = sext i16 %2 to i64
@@ -115,18 +179,35 @@ define i64 @ld_h(ptr %a) nounwind {
 }
 
 define i64 @ld_w(ptr %a) nounwind {
-; LA32-LABEL: ld_w:
-; LA32:       # %bb.0:
-; LA32-NEXT:    ld.w $a1, $a0, 0
-; LA32-NEXT:    ld.w $a0, $a0, 12
-; LA32-NEXT:    srai.w $a1, $a0, 31
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: ld_w:
-; LA64:       # %bb.0:
-; LA64-NEXT:    ld.w $a1, $a0, 0
-; LA64-NEXT:    ld.w $a0, $a0, 12
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: ld_w:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    ld.w $a2, $a0, 12
+; LA32NOPIC-NEXT:    ld.w $a0, $a0, 0
+; LA32NOPIC-NEXT:    srai.w $a1, $a2, 31
+; LA32NOPIC-NEXT:    move $a0, $a2
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: ld_w:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    ld.w $a2, $a0, 12
+; LA32PIC-NEXT:    ld.w $a0, $a0, 0
+; LA32PIC-NEXT:    srai.w $a1, $a2, 31
+; LA32PIC-NEXT:    move $a0, $a2
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: ld_w:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    ld.w $a1, $a0, 12
+; LA64NOPIC-NEXT:    ld.w $a0, $a0, 0
+; LA64NOPIC-NEXT:    move $a0, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: ld_w:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    ld.w $a1, $a0, 12
+; LA64PIC-NEXT:    ld.w $a0, $a0, 0
+; LA64PIC-NEXT:    move $a0, $a1
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i32, ptr %a, i64 3
   %2 = load i32, ptr %1
   %3 = sext i32 %2 to i64
@@ -135,19 +216,37 @@ define i64 @ld_w(ptr %a) nounwind {
 }
 
 define i64 @ld_d(ptr %a) nounwind {
-; LA32-LABEL: ld_d:
-; LA32:       # %bb.0:
-; LA32-NEXT:    ld.w $a1, $a0, 4
-; LA32-NEXT:    ld.w $a1, $a0, 0
-; LA32-NEXT:    ld.w $a1, $a0, 28
-; LA32-NEXT:    ld.w $a0, $a0, 24
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: ld_d:
-; LA64:       # %bb.0:
-; LA64-NEXT:    ld.d $a1, $a0, 0
-; LA64-NEXT:    ld.d $a0, $a0, 24
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: ld_d:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    ld.w $a1, $a0, 28
+; LA32NOPIC-NEXT:    ld.w $a2, $a0, 24
+; LA32NOPIC-NEXT:    ld.w $a3, $a0, 4
+; LA32NOPIC-NEXT:    ld.w $a0, $a0, 0
+; LA32NOPIC-NEXT:    move $a0, $a2
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: ld_d:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    ld.w $a1, $a0, 28
+; LA32PIC-NEXT:    ld.w $a2, $a0, 24
+; LA32PIC-NEXT:    ld.w $a3, $a0, 4
+; LA32PIC-NEXT:    ld.w $a0, $a0, 0
+; LA32PIC-NEXT:    move $a0, $a2
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: ld_d:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    ld.d $a1, $a0, 24
+; LA64NOPIC-NEXT:    ld.d $a0, $a0, 0
+; LA64NOPIC-NEXT:    move $a0, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: ld_d:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    ld.d $a1, $a0, 24
+; LA64PIC-NEXT:    ld.d $a0, $a0, 0
+; LA64PIC-NEXT:    move $a0, $a1
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i64, ptr %a, i64 3
   %2 = load i64, ptr %1
   %3 = load volatile i64, ptr %a
@@ -155,20 +254,35 @@ define i64 @ld_d(ptr %a) nounwind {
 }
 
 define i64 @ld_bu(ptr %a) nounwind {
-; LA32-LABEL: ld_bu:
-; LA32:       # %bb.0:
-; LA32-NEXT:    ld.bu $a1, $a0, 0
-; LA32-NEXT:    ld.bu $a2, $a0, 4
-; LA32-NEXT:    add.w $a0, $a2, $a1
-; LA32-NEXT:    sltu $a1, $a0, $a2
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: ld_bu:
-; LA64:       # %bb.0:
-; LA64-NEXT:    ld.bu $a1, $a0, 0
-; LA64-NEXT:    ld.bu $a0, $a0, 4
-; LA64-NEXT:    add.d $a0, $a0, $a1
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: ld_bu:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    ld.bu $a1, $a0, 4
+; LA32NOPIC-NEXT:    ld.bu $a0, $a0, 0
+; LA32NOPIC-NEXT:    add.w $a0, $a1, $a0
+; LA32NOPIC-NEXT:    sltu $a1, $a0, $a1
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: ld_bu:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    ld.bu $a1, $a0, 4
+; LA32PIC-NEXT:    ld.bu $a0, $a0, 0
+; LA32PIC-NEXT:    add.w $a0, $a1, $a0
+; LA32PIC-NEXT:    sltu $a1, $a0, $a1
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: ld_bu:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    ld.bu $a1, $a0, 4
+; LA64NOPIC-NEXT:    ld.bu $a0, $a0, 0
+; LA64NOPIC-NEXT:    add.d $a0, $a1, $a0
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: ld_bu:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    ld.bu $a1, $a0, 4
+; LA64PIC-NEXT:    ld.bu $a0, $a0, 0
+; LA64PIC-NEXT:    add.d $a0, $a1, $a0
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i8, ptr %a, i64 4
   %2 = load i8, ptr %1
   %3 = zext i8 %2 to i64
@@ -179,20 +293,35 @@ define i64 @ld_bu(ptr %a) nounwind {
 }
 
 define i64 @ld_hu(ptr %a) nounwind {
-; LA32-LABEL: ld_hu:
-; LA32:       # %bb.0:
-; LA32-NEXT:    ld.hu $a1, $a0, 0
-; LA32-NEXT:    ld.hu $a2, $a0, 10
-; LA32-NEXT:    add.w $a0, $a2, $a1
-; LA32-NEXT:    sltu $a1, $a0, $a2
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: ld_hu:
-; LA64:       # %bb.0:
-; LA64-NEXT:    ld.hu $a1, $a0, 0
-; LA64-NEXT:    ld.hu $a0, $a0, 10
-; LA64-NEXT:    add.d $a0, $a0, $a1
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: ld_hu:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    ld.hu $a1, $a0, 10
+; LA32NOPIC-NEXT:    ld.hu $a0, $a0, 0
+; LA32NOPIC-NEXT:    add.w $a0, $a1, $a0
+; LA32NOPIC-NEXT:    sltu $a1, $a0, $a1
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: ld_hu:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    ld.hu $a1, $a0, 10
+; LA32PIC-NEXT:    ld.hu $a0, $a0, 0
+; LA32PIC-NEXT:    add.w $a0, $a1, $a0
+; LA32PIC-NEXT:    sltu $a1, $a0, $a1
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: ld_hu:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    ld.hu $a1, $a0, 10
+; LA64NOPIC-NEXT:    ld.hu $a0, $a0, 0
+; LA64NOPIC-NEXT:    add.d $a0, $a1, $a0
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: ld_hu:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    ld.hu $a1, $a0, 10
+; LA64PIC-NEXT:    ld.hu $a0, $a0, 0
+; LA64PIC-NEXT:    add.d $a0, $a1, $a0
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i16, ptr %a, i64 5
   %2 = load i16, ptr %1
   %3 = zext i16 %2 to i64
@@ -203,20 +332,35 @@ define i64 @ld_hu(ptr %a) nounwind {
 }
 
 define i64 @ld_wu(ptr %a) nounwind {
-; LA32-LABEL: ld_wu:
-; LA32:       # %bb.0:
-; LA32-NEXT:    ld.w $a1, $a0, 0
-; LA32-NEXT:    ld.w $a2, $a0, 20
-; LA32-NEXT:    add.w $a0, $a2, $a1
-; LA32-NEXT:    sltu $a1, $a0, $a2
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: ld_wu:
-; LA64:       # %bb.0:
-; LA64-NEXT:    ld.wu $a1, $a0, 0
-; LA64-NEXT:    ld.wu $a0, $a0, 20
-; LA64-NEXT:    add.d $a0, $a0, $a1
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: ld_wu:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    ld.w $a1, $a0, 20
+; LA32NOPIC-NEXT:    ld.w $a0, $a0, 0
+; LA32NOPIC-NEXT:    add.w $a0, $a1, $a0
+; LA32NOPIC-NEXT:    sltu $a1, $a0, $a1
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: ld_wu:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    ld.w $a1, $a0, 20
+; LA32PIC-NEXT:    ld.w $a0, $a0, 0
+; LA32PIC-NEXT:    add.w $a0, $a1, $a0
+; LA32PIC-NEXT:    sltu $a1, $a0, $a1
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: ld_wu:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    ld.wu $a1, $a0, 20
+; LA64NOPIC-NEXT:    ld.wu $a0, $a0, 0
+; LA64NOPIC-NEXT:    add.d $a0, $a1, $a0
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: ld_wu:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    ld.wu $a1, $a0, 20
+; LA64PIC-NEXT:    ld.wu $a0, $a0, 0
+; LA64PIC-NEXT:    add.d $a0, $a1, $a0
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i32, ptr %a, i64 5
   %2 = load i32, ptr %1
   %3 = zext i32 %2 to i64
@@ -227,21 +371,37 @@ define i64 @ld_wu(ptr %a) nounwind {
 }
 
 define i64 @ldx_b(ptr %a, i64 %idx) nounwind {
-; LA32-LABEL: ldx_b:
-; LA32:       # %bb.0:
-; LA32-NEXT:    add.w $a1, $a0, $a1
-; LA32-NEXT:    ld.b $a2, $a1, 0
-; LA32-NEXT:    ld.b $a0, $a0, 0
-; LA32-NEXT:    srai.w $a1, $a2, 31
-; LA32-NEXT:    move $a0, $a2
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: ldx_b:
-; LA64:       # %bb.0:
-; LA64-NEXT:    ldx.b $a1, $a0, $a1
-; LA64-NEXT:    ld.b $a0, $a0, 0
-; LA64-NEXT:    move $a0, $a1
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: ldx_b:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    add.w $a1, $a0, $a1
+; LA32NOPIC-NEXT:    ld.b $a2, $a1, 0
+; LA32NOPIC-NEXT:    ld.b $a0, $a0, 0
+; LA32NOPIC-NEXT:    srai.w $a1, $a2, 31
+; LA32NOPIC-NEXT:    move $a0, $a2
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: ldx_b:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    add.w $a1, $a0, $a1
+; LA32PIC-NEXT:    ld.b $a2, $a1, 0
+; LA32PIC-NEXT:    ld.b $a0, $a0, 0
+; LA32PIC-NEXT:    srai.w $a1, $a2, 31
+; LA32PIC-NEXT:    move $a0, $a2
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: ldx_b:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    ldx.b $a1, $a0, $a1
+; LA64NOPIC-NEXT:    ld.b $a0, $a0, 0
+; LA64NOPIC-NEXT:    move $a0, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: ldx_b:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    ldx.b $a1, $a0, $a1
+; LA64PIC-NEXT:    ld.b $a0, $a0, 0
+; LA64PIC-NEXT:    move $a0, $a1
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i8, ptr %a, i64 %idx
   %2 = load i8, ptr %1
   %3 = sext i8 %2 to i64
@@ -250,22 +410,39 @@ define i64 @ldx_b(ptr %a, i64 %idx) nounwind {
 }
 
 define i64 @ldx_h(ptr %a, i64 %idx) nounwind {
-; LA32-LABEL: ldx_h:
-; LA32:       # %bb.0:
-; LA32-NEXT:    alsl.w $a1, $a1, $a0, 1
-; LA32-NEXT:    ld.h $a2, $a1, 0
-; LA32-NEXT:    ld.h $a0, $a0, 0
-; LA32-NEXT:    srai.w $a1, $a2, 31
-; LA32-NEXT:    move $a0, $a2
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: ldx_h:
-; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a1, $a1, 1
-; LA64-NEXT:    ldx.h $a1, $a0, $a1
-; LA64-NEXT:    ld.h $a0, $a0, 0
-; LA64-NEXT:    move $a0, $a1
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: ldx_h:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    alsl.w $a1, $a1, $a0, 1
+; LA32NOPIC-NEXT:    ld.h $a2, $a1, 0
+; LA32NOPIC-NEXT:    ld.h $a0, $a0, 0
+; LA32NOPIC-NEXT:    srai.w $a1, $a2, 31
+; LA32NOPIC-NEXT:    move $a0, $a2
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: ldx_h:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    alsl.w $a1, $a1, $a0, 1
+; LA32PIC-NEXT:    ld.h $a2, $a1, 0
+; LA32PIC-NEXT:    ld.h $a0, $a0, 0
+; LA32PIC-NEXT:    srai.w $a1, $a2, 31
+; LA32PIC-NEXT:    move $a0, $a2
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: ldx_h:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    slli.d $a1, $a1, 1
+; LA64NOPIC-NEXT:    ldx.h $a1, $a0, $a1
+; LA64NOPIC-NEXT:    ld.h $a0, $a0, 0
+; LA64NOPIC-NEXT:    move $a0, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: ldx_h:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    slli.d $a1, $a1, 1
+; LA64PIC-NEXT:    ldx.h $a1, $a0, $a1
+; LA64PIC-NEXT:    ld.h $a0, $a0, 0
+; LA64PIC-NEXT:    move $a0, $a1
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i16, ptr %a, i64 %idx
   %2 = load i16, ptr %1
   %3 = sext i16 %2 to i64
@@ -274,22 +451,39 @@ define i64 @ldx_h(ptr %a, i64 %idx) nounwind {
 }
 
 define i64 @ldx_w(ptr %a, i64 %idx) nounwind {
-; LA32-LABEL: ldx_w:
-; LA32:       # %bb.0:
-; LA32-NEXT:    alsl.w $a1, $a1, $a0, 2
-; LA32-NEXT:    ld.w $a2, $a1, 0
-; LA32-NEXT:    ld.w $a0, $a0, 0
-; LA32-NEXT:    srai.w $a1, $a2, 31
-; LA32-NEXT:    move $a0, $a2
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: ldx_w:
-; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a1, $a1, 2
-; LA64-NEXT:    ldx.w $a1, $a0, $a1
-; LA64-NEXT:    ld.w $a0, $a0, 0
-; LA64-NEXT:    move $a0, $a1
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: ldx_w:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    alsl.w $a1, $a1, $a0, 2
+; LA32NOPIC-NEXT:    ld.w $a2, $a1, 0
+; LA32NOPIC-NEXT:    ld.w $a0, $a0, 0
+; LA32NOPIC-NEXT:    srai.w $a1, $a2, 31
+; LA32NOPIC-NEXT:    move $a0, $a2
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: ldx_w:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    alsl.w $a1, $a1, $a0, 2
+; LA32PIC-NEXT:    ld.w $a2, $a1, 0
+; LA32PIC-NEXT:    ld.w $a0, $a0, 0
+; LA32PIC-NEXT:    srai.w $a1, $a2, 31
+; LA32PIC-NEXT:    move $a0, $a2
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: ldx_w:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    slli.d $a1, $a1, 2
+; LA64NOPIC-NEXT:    ldx.w $a1, $a0, $a1
+; LA64NOPIC-NEXT:    ld.w $a0, $a0, 0
+; LA64NOPIC-NEXT:    move $a0, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: ldx_w:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    slli.d $a1, $a1, 2
+; LA64PIC-NEXT:    ldx.w $a1, $a0, $a1
+; LA64PIC-NEXT:    ld.w $a0, $a0, 0
+; LA64PIC-NEXT:    move $a0, $a1
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i32, ptr %a, i64 %idx
   %2 = load i32, ptr %1
   %3 = sext i32 %2 to i64
@@ -298,23 +492,41 @@ define i64 @ldx_w(ptr %a, i64 %idx) nounwind {
 }
 
 define i64 @ldx_d(ptr %a, i64 %idx) nounwind {
-; LA32-LABEL: ldx_d:
-; LA32:       # %bb.0:
-; LA32-NEXT:    alsl.w $a1, $a1, $a0, 3
-; LA32-NEXT:    ld.w $a2, $a1, 0
-; LA32-NEXT:    ld.w $a3, $a0, 0
-; LA32-NEXT:    ld.w $a1, $a1, 4
-; LA32-NEXT:    ld.w $a0, $a0, 4
-; LA32-NEXT:    move $a0, $a2
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: ldx_d:
-; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a1, $a1, 3
-; LA64-NEXT:    ldx.d $a1, $a0, $a1
-; LA64-NEXT:    ld.d $a0, $a0, 0
-; LA64-NEXT:    move $a0, $a1
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: ldx_d:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    alsl.w $a1, $a1, $a0, 3
+; LA32NOPIC-NEXT:    ld.w $a2, $a1, 0
+; LA32NOPIC-NEXT:    ld.w $a1, $a1, 4
+; LA32NOPIC-NEXT:    ld.w $a3, $a0, 0
+; LA32NOPIC-NEXT:    ld.w $a0, $a0, 4
+; LA32NOPIC-NEXT:    move $a0, $a2
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: ldx_d:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    alsl.w $a1, $a1, $a0, 3
+; LA32PIC-NEXT:    ld.w $a2, $a1, 0
+; LA32PIC-NEXT:    ld.w $a1, $a1, 4
+; LA32PIC-NEXT:    ld.w $a3, $a0, 0
+; LA32PIC-NEXT:    ld.w $a0, $a0, 4
+; LA32PIC-NEXT:    move $a0, $a2
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: ldx_d:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    slli.d $a1, $a1, 3
+; LA64NOPIC-NEXT:    ldx.d $a1, $a0, $a1
+; LA64NOPIC-NEXT:    ld.d $a0, $a0, 0
+; LA64NOPIC-NEXT:    move $a0, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: ldx_d:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    slli.d $a1, $a1, 3
+; LA64PIC-NEXT:    ldx.d $a1, $a0, $a1
+; LA64PIC-NEXT:    ld.d $a0, $a0, 0
+; LA64PIC-NEXT:    move $a0, $a1
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i64, ptr %a, i64 %idx
   %2 = load i64, ptr %1
   %3 = load volatile i64, ptr %a
@@ -322,21 +534,37 @@ define i64 @ldx_d(ptr %a, i64 %idx) nounwind {
 }
 
 define i64 @ldx_bu(ptr %a, i64 %idx) nounwind {
-; LA32-LABEL: ldx_bu:
-; LA32:       # %bb.0:
-; LA32-NEXT:    add.w $a1, $a0, $a1
-; LA32-NEXT:    ld.bu $a1, $a1, 0
-; LA32-NEXT:    ld.bu $a0, $a0, 0
-; LA32-NEXT:    add.w $a0, $a1, $a0
-; LA32-NEXT:    sltu $a1, $a0, $a1
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: ldx_bu:
-; LA64:       # %bb.0:
-; LA64-NEXT:    ldx.bu $a1, $a0, $a1
-; LA64-NEXT:    ld.bu $a0, $a0, 0
-; LA64-NEXT:    add.d $a0, $a1, $a0
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: ldx_bu:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    add.w $a1, $a0, $a1
+; LA32NOPIC-NEXT:    ld.bu $a1, $a1, 0
+; LA32NOPIC-NEXT:    ld.bu $a0, $a0, 0
+; LA32NOPIC-NEXT:    add.w $a0, $a1, $a0
+; LA32NOPIC-NEXT:    sltu $a1, $a0, $a1
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: ldx_bu:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    add.w $a1, $a0, $a1
+; LA32PIC-NEXT:    ld.bu $a1, $a1, 0
+; LA32PIC-NEXT:    ld.bu $a0, $a0, 0
+; LA32PIC-NEXT:    add.w $a0, $a1, $a0
+; LA32PIC-NEXT:    sltu $a1, $a0, $a1
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: ldx_bu:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    ldx.bu $a1, $a0, $a1
+; LA64NOPIC-NEXT:    ld.bu $a0, $a0, 0
+; LA64NOPIC-NEXT:    add.d $a0, $a1, $a0
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: ldx_bu:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    ldx.bu $a1, $a0, $a1
+; LA64PIC-NEXT:    ld.bu $a0, $a0, 0
+; LA64PIC-NEXT:    add.d $a0, $a1, $a0
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i8, ptr %a, i64 %idx
   %2 = load i8, ptr %1
   %3 = zext i8 %2 to i64
@@ -347,22 +575,39 @@ define i64 @ldx_bu(ptr %a, i64 %idx) nounwind {
 }
 
 define i64 @ldx_hu(ptr %a, i64 %idx) nounwind {
-; LA32-LABEL: ldx_hu:
-; LA32:       # %bb.0:
-; LA32-NEXT:    alsl.w $a1, $a1, $a0, 1
-; LA32-NEXT:    ld.hu $a1, $a1, 0
-; LA32-NEXT:    ld.hu $a0, $a0, 0
-; LA32-NEXT:    add.w $a0, $a1, $a0
-; LA32-NEXT:    sltu $a1, $a0, $a1
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: ldx_hu:
-; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a1, $a1, 1
-; LA64-NEXT:    ldx.hu $a1, $a0, $a1
-; LA64-NEXT:    ld.hu $a0, $a0, 0
-; LA64-NEXT:    add.d $a0, $a1, $a0
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: ldx_hu:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    alsl.w $a1, $a1, $a0, 1
+; LA32NOPIC-NEXT:    ld.hu $a1, $a1, 0
+; LA32NOPIC-NEXT:    ld.hu $a0, $a0, 0
+; LA32NOPIC-NEXT:    add.w $a0, $a1, $a0
+; LA32NOPIC-NEXT:    sltu $a1, $a0, $a1
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: ldx_hu:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    alsl.w $a1, $a1, $a0, 1
+; LA32PIC-NEXT:    ld.hu $a1, $a1, 0
+; LA32PIC-NEXT:    ld.hu $a0, $a0, 0
+; LA32PIC-NEXT:    add.w $a0, $a1, $a0
+; LA32PIC-NEXT:    sltu $a1, $a0, $a1
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: ldx_hu:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    slli.d $a1, $a1, 1
+; LA64NOPIC-NEXT:    ldx.hu $a1, $a0, $a1
+; LA64NOPIC-NEXT:    ld.hu $a0, $a0, 0
+; LA64NOPIC-NEXT:    add.d $a0, $a1, $a0
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: ldx_hu:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    slli.d $a1, $a1, 1
+; LA64PIC-NEXT:    ldx.hu $a1, $a0, $a1
+; LA64PIC-NEXT:    ld.hu $a0, $a0, 0
+; LA64PIC-NEXT:    add.d $a0, $a1, $a0
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i16, ptr %a, i64 %idx
   %2 = load i16, ptr %1
   %3 = zext i16 %2 to i64
@@ -373,22 +618,39 @@ define i64 @ldx_hu(ptr %a, i64 %idx) nounwind {
 }
 
 define i64 @ldx_wu(ptr %a, i64 %idx) nounwind {
-; LA32-LABEL: ldx_wu:
-; LA32:       # %bb.0:
-; LA32-NEXT:    alsl.w $a1, $a1, $a0, 2
-; LA32-NEXT:    ld.w $a1, $a1, 0
-; LA32-NEXT:    ld.w $a0, $a0, 0
-; LA32-NEXT:    add.w $a0, $a1, $a0
-; LA32-NEXT:    sltu $a1, $a0, $a1
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: ldx_wu:
-; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a1, $a1, 2
-; LA64-NEXT:    ldx.wu $a1, $a0, $a1
-; LA64-NEXT:    ld.wu $a0, $a0, 0
-; LA64-NEXT:    add.d $a0, $a1, $a0
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: ldx_wu:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    alsl.w $a1, $a1, $a0, 2
+; LA32NOPIC-NEXT:    ld.w $a1, $a1, 0
+; LA32NOPIC-NEXT:    ld.w $a0, $a0, 0
+; LA32NOPIC-NEXT:    add.w $a0, $a1, $a0
+; LA32NOPIC-NEXT:    sltu $a1, $a0, $a1
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: ldx_wu:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    alsl.w $a1, $a1, $a0, 2
+; LA32PIC-NEXT:    ld.w $a1, $a1, 0
+; LA32PIC-NEXT:    ld.w $a0, $a0, 0
+; LA32PIC-NEXT:    add.w $a0, $a1, $a0
+; LA32PIC-NEXT:    sltu $a1, $a0, $a1
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: ldx_wu:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    slli.d $a1, $a1, 2
+; LA64NOPIC-NEXT:    ldx.wu $a1, $a0, $a1
+; LA64NOPIC-NEXT:    ld.wu $a0, $a0, 0
+; LA64NOPIC-NEXT:    add.d $a0, $a1, $a0
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: ldx_wu:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    slli.d $a1, $a1, 2
+; LA64PIC-NEXT:    ldx.wu $a1, $a0, $a1
+; LA64PIC-NEXT:    ld.wu $a0, $a0, 0
+; LA64PIC-NEXT:    add.d $a0, $a1, $a0
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i32, ptr %a, i64 %idx
   %2 = load i32, ptr %1
   %3 = zext i32 %2 to i64
@@ -403,8 +665,8 @@ define i64 @ldx_wu(ptr %a, i64 %idx) nounwind {
 define void @st_b(ptr %a, i8 %b) nounwind {
 ; ALL-LABEL: st_b:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    st.b $a1, $a0, 6
 ; ALL-NEXT:    st.b $a1, $a0, 0
+; ALL-NEXT:    st.b $a1, $a0, 6
 ; ALL-NEXT:    ret
   store i8 %b, ptr %a
   %1 = getelementptr i8, ptr %a, i64 6
@@ -415,8 +677,8 @@ define void @st_b(ptr %a, i8 %b) nounwind {
 define void @st_h(ptr %a, i16 %b) nounwind {
 ; ALL-LABEL: st_h:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    st.h $a1, $a0, 14
 ; ALL-NEXT:    st.h $a1, $a0, 0
+; ALL-NEXT:    st.h $a1, $a0, 14
 ; ALL-NEXT:    ret
   store i16 %b, ptr %a
   %1 = getelementptr i16, ptr %a, i64 7
@@ -427,8 +689,8 @@ define void @st_h(ptr %a, i16 %b) nounwind {
 define void @st_w(ptr %a, i32 %b) nounwind {
 ; ALL-LABEL: st_w:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    st.w $a1, $a0, 28
 ; ALL-NEXT:    st.w $a1, $a0, 0
+; ALL-NEXT:    st.w $a1, $a0, 28
 ; ALL-NEXT:    ret
   store i32 %b, ptr %a
   %1 = getelementptr i32, ptr %a, i64 7
@@ -437,19 +699,33 @@ define void @st_w(ptr %a, i32 %b) nounwind {
 }
 
 define void @st_d(ptr %a, i64 %b) nounwind {
-; LA32-LABEL: st_d:
-; LA32:       # %bb.0:
-; LA32-NEXT:    st.w $a2, $a0, 68
-; LA32-NEXT:    st.w $a2, $a0, 4
-; LA32-NEXT:    st.w $a1, $a0, 64
-; LA32-NEXT:    st.w $a1, $a0, 0
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: st_d:
-; LA64:       # %bb.0:
-; LA64-NEXT:    st.d $a1, $a0, 64
-; LA64-NEXT:    st.d $a1, $a0, 0
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: st_d:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    st.w $a2, $a0, 4
+; LA32NOPIC-NEXT:    st.w $a1, $a0, 0
+; LA32NOPIC-NEXT:    st.w $a2, $a0, 68
+; LA32NOPIC-NEXT:    st.w $a1, $a0, 64
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: st_d:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    st.w $a2, $a0, 4
+; LA32PIC-NEXT:    st.w $a1, $a0, 0
+; LA32PIC-NEXT:    st.w $a2, $a0, 68
+; LA32PIC-NEXT:    st.w $a1, $a0, 64
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: st_d:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    st.d $a1, $a0, 0
+; LA64NOPIC-NEXT:    st.d $a1, $a0, 64
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: st_d:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    st.d $a1, $a0, 0
+; LA64PIC-NEXT:    st.d $a1, $a0, 64
+; LA64PIC-NEXT:    ret
   store i64 %b, ptr %a
   %1 = getelementptr i64, ptr %a, i64 8
   store i64 %b, ptr %1
@@ -457,68 +733,116 @@ define void @st_d(ptr %a, i64 %b) nounwind {
 }
 
 define void @stx_b(ptr %dst, i64 %idx, i8 %val) nounwind {
-; LA32-LABEL: stx_b:
-; LA32:       # %bb.0:
-; LA32-NEXT:    add.w $a0, $a0, $a1
-; LA32-NEXT:    st.b $a3, $a0, 0
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: stx_b:
-; LA64:       # %bb.0:
-; LA64-NEXT:    stx.b $a2, $a0, $a1
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: stx_b:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    add.w $a0, $a0, $a1
+; LA32NOPIC-NEXT:    st.b $a3, $a0, 0
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: stx_b:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    add.w $a0, $a0, $a1
+; LA32PIC-NEXT:    st.b $a3, $a0, 0
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: stx_b:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    stx.b $a2, $a0, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: stx_b:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    stx.b $a2, $a0, $a1
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i8, ptr %dst, i64 %idx
   store i8 %val, ptr %1
   ret void
 }
 
 define void @stx_h(ptr %dst, i64 %idx, i16 %val) nounwind {
-; LA32-LABEL: stx_h:
-; LA32:       # %bb.0:
-; LA32-NEXT:    alsl.w $a0, $a1, $a0, 1
-; LA32-NEXT:    st.h $a3, $a0, 0
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: stx_h:
-; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a1, $a1, 1
-; LA64-NEXT:    stx.h $a2, $a0, $a1
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: stx_h:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    alsl.w $a0, $a1, $a0, 1
+; LA32NOPIC-NEXT:    st.h $a3, $a0, 0
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: stx_h:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    alsl.w $a0, $a1, $a0, 1
+; LA32PIC-NEXT:    st.h $a3, $a0, 0
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: stx_h:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    slli.d $a1, $a1, 1
+; LA64NOPIC-NEXT:    stx.h $a2, $a0, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: stx_h:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    slli.d $a1, $a1, 1
+; LA64PIC-NEXT:    stx.h $a2, $a0, $a1
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i16, ptr %dst, i64 %idx
   store i16 %val, ptr %1
   ret void
 }
 
 define void @stx_w(ptr %dst, i64 %idx, i32 %val) nounwind {
-; LA32-LABEL: stx_w:
-; LA32:       # %bb.0:
-; LA32-NEXT:    alsl.w $a0, $a1, $a0, 2
-; LA32-NEXT:    st.w $a3, $a0, 0
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: stx_w:
-; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a1, $a1, 2
-; LA64-NEXT:    stx.w $a2, $a0, $a1
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: stx_w:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    alsl.w $a0, $a1, $a0, 2
+; LA32NOPIC-NEXT:    st.w $a3, $a0, 0
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: stx_w:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    alsl.w $a0, $a1, $a0, 2
+; LA32PIC-NEXT:    st.w $a3, $a0, 0
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: stx_w:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    slli.d $a1, $a1, 2
+; LA64NOPIC-NEXT:    stx.w $a2, $a0, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: stx_w:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    slli.d $a1, $a1, 2
+; LA64PIC-NEXT:    stx.w $a2, $a0, $a1
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i32, ptr %dst, i64 %idx
   store i32 %val, ptr %1
   ret void
 }
 
 define void @stx_d(ptr %dst, i64 %idx, i64 %val) nounwind {
-; LA32-LABEL: stx_d:
-; LA32:       # %bb.0:
-; LA32-NEXT:    alsl.w $a0, $a1, $a0, 3
-; LA32-NEXT:    st.w $a4, $a0, 4
-; LA32-NEXT:    st.w $a3, $a0, 0
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: stx_d:
-; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a1, $a1, 3
-; LA64-NEXT:    stx.d $a2, $a0, $a1
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: stx_d:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    alsl.w $a0, $a1, $a0, 3
+; LA32NOPIC-NEXT:    st.w $a4, $a0, 4
+; LA32NOPIC-NEXT:    st.w $a3, $a0, 0
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: stx_d:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    alsl.w $a0, $a1, $a0, 3
+; LA32PIC-NEXT:    st.w $a4, $a0, 4
+; LA32PIC-NEXT:    st.w $a3, $a0, 0
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: stx_d:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    slli.d $a1, $a1, 3
+; LA64NOPIC-NEXT:    stx.d $a2, $a0, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: stx_d:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    slli.d $a1, $a1, 3
+; LA64PIC-NEXT:    stx.d $a2, $a0, $a1
+; LA64PIC-NEXT:    ret
   %1 = getelementptr i64, ptr %dst, i64 %idx
   store i64 %val, ptr %1
   ret void
@@ -526,24 +850,44 @@ define void @stx_d(ptr %dst, i64 %idx, i64 %val) nounwind {
 
 ;; Check load from and store to an i1 location.
 define i64 @load_sext_zext_anyext_i1(ptr %a) nounwind {
+; LA32NOPIC-LABEL: load_sext_zext_anyext_i1:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    ld.bu $a1, $a0, 1
+; LA32NOPIC-NEXT:    ld.bu $a3, $a0, 2
+; LA32NOPIC-NEXT:    sub.w $a2, $a3, $a1
+; LA32NOPIC-NEXT:    ld.b $a0, $a0, 0
+; LA32NOPIC-NEXT:    sltu $a0, $a3, $a1
+; LA32NOPIC-NEXT:    sub.w $a1, $zero, $a0
+; LA32NOPIC-NEXT:    move $a0, $a2
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: load_sext_zext_anyext_i1:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    ld.bu $a1, $a0, 1
+; LA32PIC-NEXT:    ld.bu $a3, $a0, 2
+; LA32PIC-NEXT:    sub.w $a2, $a3, $a1
+; LA32PIC-NEXT:    ld.b $a0, $a0, 0
+; LA32PIC-NEXT:    sltu $a0, $a3, $a1
+; LA32PIC-NEXT:    sub.w $a1, $zero, $a0
+; LA32PIC-NEXT:    move $a0, $a2
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: load_sext_zext_anyext_i1:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    ld.bu $a1, $a0, 1
+; LA64NOPIC-NEXT:    ld.bu $a2, $a0, 2
+; LA64NOPIC-NEXT:    ld.b $a0, $a0, 0
+; LA64NOPIC-NEXT:    sub.d $a0, $a2, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: load_sext_zext_anyext_i1:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    ld.bu $a1, $a0, 1
+; LA64PIC-NEXT:    ld.bu $a2, $a0, 2
+; LA64PIC-NEXT:    ld.b $a0, $a0, 0
+; LA64PIC-NEXT:    sub.d $a0, $a2, $a1
+; LA64PIC-NEXT:    ret
   ;; sextload i1
-; LA32-LABEL: load_sext_zext_anyext_i1:
-; LA32:       # %bb.0:
-; LA32-NEXT:    ld.b $a1, $a0, 0
-; LA32-NEXT:    ld.bu $a1, $a0, 1
-; LA32-NEXT:    ld.bu $a2, $a0, 2
-; LA32-NEXT:    sub.w $a0, $a2, $a1
-; LA32-NEXT:    sltu $a1, $a2, $a1
-; LA32-NEXT:    sub.w $a1, $zero, $a1
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: load_sext_zext_anyext_i1:
-; LA64:       # %bb.0:
-; LA64-NEXT:    ld.b $a1, $a0, 0
-; LA64-NEXT:    ld.bu $a1, $a0, 1
-; LA64-NEXT:    ld.bu $a0, $a0, 2
-; LA64-NEXT:    sub.d $a0, $a0, $a1
-; LA64-NEXT:    ret
   %1 = getelementptr i1, ptr %a, i64 1
   %2 = load i1, ptr %1
   %3 = sext i1 %2 to i64
@@ -558,22 +902,38 @@ define i64 @load_sext_zext_anyext_i1(ptr %a) nounwind {
 }
 
 define i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind {
+; LA32NOPIC-LABEL: load_sext_zext_anyext_i1_i16:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    ld.bu $a1, $a0, 1
+; LA32NOPIC-NEXT:    ld.bu $a2, $a0, 2
+; LA32NOPIC-NEXT:    ld.b $a0, $a0, 0
+; LA32NOPIC-NEXT:    sub.w $a0, $a2, $a1
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: load_sext_zext_anyext_i1_i16:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    ld.bu $a1, $a0, 1
+; LA32PIC-NEXT:    ld.bu $a2, $a0, 2
+; LA32PIC-NEXT:    ld.b $a0, $a0, 0
+; LA32PIC-NEXT:    sub.w $a0, $a2, $a1
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: load_sext_zext_anyext_i1_i16:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    ld.bu $a1, $a0, 1
+; LA64NOPIC-NEXT:    ld.bu $a2, $a0, 2
+; LA64NOPIC-NEXT:    ld.b $a0, $a0, 0
+; LA64NOPIC-NEXT:    sub.d $a0, $a2, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: load_sext_zext_anyext_i1_i16:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    ld.bu $a1, $a0, 1
+; LA64PIC-NEXT:    ld.bu $a2, $a0, 2
+; LA64PIC-NEXT:    ld.b $a0, $a0, 0
+; LA64PIC-NEXT:    sub.d $a0, $a2, $a1
+; LA64PIC-NEXT:    ret
   ;; sextload i1
-; LA32-LABEL: load_sext_zext_anyext_i1_i16:
-; LA32:       # %bb.0:
-; LA32-NEXT:    ld.b $a1, $a0, 0
-; LA32-NEXT:    ld.bu $a1, $a0, 1
-; LA32-NEXT:    ld.bu $a0, $a0, 2
-; LA32-NEXT:    sub.w $a0, $a0, $a1
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: load_sext_zext_anyext_i1_i16:
-; LA64:       # %bb.0:
-; LA64-NEXT:    ld.b $a1, $a0, 0
-; LA64-NEXT:    ld.bu $a1, $a0, 1
-; LA64-NEXT:    ld.bu $a0, $a0, 2
-; LA64-NEXT:    sub.d $a0, $a0, $a1
-; LA64-NEXT:    ret
   %1 = getelementptr i1, ptr %a, i64 1
   %2 = load i1, ptr %1
   %3 = sext i1 %2 to i16
@@ -588,29 +948,53 @@ define i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind {
 }
 
 define i64 @ld_sd_constant(i64 %a) nounwind {
-; LA32-LABEL: ld_sd_constant:
-; LA32:       # %bb.0:
-; LA32-NEXT:    lu12i.w $a3, -136485
-; LA32-NEXT:    ori $a4, $a3, 3823
-; LA32-NEXT:    ld.w $a2, $a4, 0
-; LA32-NEXT:    st.w $a0, $a4, 0
-; LA32-NEXT:    ori $a0, $a3, 3827
-; LA32-NEXT:    ld.w $a3, $a0, 0
-; LA32-NEXT:    st.w $a1, $a0, 0
-; LA32-NEXT:    move $a0, $a2
-; LA32-NEXT:    move $a1, $a3
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: ld_sd_constant:
-; LA64:       # %bb.0:
-; LA64-NEXT:    lu12i.w $a1, -136485
-; LA64-NEXT:    ori $a1, $a1, 3823
-; LA64-NEXT:    lu32i.d $a1, -147729
-; LA64-NEXT:    lu52i.d $a2, $a1, -534
-; LA64-NEXT:    ld.d $a1, $a2, 0
-; LA64-NEXT:    st.d $a0, $a2, 0
-; LA64-NEXT:    move $a0, $a1
-; LA64-NEXT:    ret
+; LA32NOPIC-LABEL: ld_sd_constant:
+; LA32NOPIC:       # %bb.0:
+; LA32NOPIC-NEXT:    lu12i.w $a3, -136485
+; LA32NOPIC-NEXT:    ori $a4, $a3, 3823
+; LA32NOPIC-NEXT:    ld.w $a2, $a4, 0
+; LA32NOPIC-NEXT:    ori $a5, $a3, 3827
+; LA32NOPIC-NEXT:    ld.w $a3, $a5, 0
+; LA32NOPIC-NEXT:    st.w $a0, $a4, 0
+; LA32NOPIC-NEXT:    st.w $a1, $a5, 0
+; LA32NOPIC-NEXT:    move $a0, $a2
+; LA32NOPIC-NEXT:    move $a1, $a3
+; LA32NOPIC-NEXT:    ret
+;
+; LA32PIC-LABEL: ld_sd_constant:
+; LA32PIC:       # %bb.0:
+; LA32PIC-NEXT:    lu12i.w $a3, -136485
+; LA32PIC-NEXT:    ori $a4, $a3, 3823
+; LA32PIC-NEXT:    ld.w $a2, $a4, 0
+; LA32PIC-NEXT:    ori $a5, $a3, 3827
+; LA32PIC-NEXT:    ld.w $a3, $a5, 0
+; LA32PIC-NEXT:    st.w $a0, $a4, 0
+; LA32PIC-NEXT:    st.w $a1, $a5, 0
+; LA32PIC-NEXT:    move $a0, $a2
+; LA32PIC-NEXT:    move $a1, $a3
+; LA32PIC-NEXT:    ret
+;
+; LA64NOPIC-LABEL: ld_sd_constant:
+; LA64NOPIC:       # %bb.0:
+; LA64NOPIC-NEXT:    lu12i.w $a1, -136485
+; LA64NOPIC-NEXT:    ori $a1, $a1, 3823
+; LA64NOPIC-NEXT:    lu32i.d $a1, -147729
+; LA64NOPIC-NEXT:    lu52i.d $a2, $a1, -534
+; LA64NOPIC-NEXT:    ld.d $a1, $a2, 0
+; LA64NOPIC-NEXT:    st.d $a0, $a2, 0
+; LA64NOPIC-NEXT:    move $a0, $a1
+; LA64NOPIC-NEXT:    ret
+;
+; LA64PIC-LABEL: ld_sd_constant:
+; LA64PIC:       # %bb.0:
+; LA64PIC-NEXT:    lu12i.w $a1, -136485
+; LA64PIC-NEXT:    ori $a1, $a1, 3823
+; LA64PIC-NEXT:    lu32i.d $a1, -147729
+; LA64PIC-NEXT:    lu52i.d $a2, $a1, -534
+; LA64PIC-NEXT:    ld.d $a1, $a2, 0
+; LA64PIC-NEXT:    st.d $a0, $a2, 0
+; LA64PIC-NEXT:    move $a0, $a1
+; LA64PIC-NEXT:    ret
   %1 = inttoptr i64 16045690984833335023 to ptr
   %2 = load volatile i64, ptr %1
   store i64 %a, ptr %1
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll
index 3916298e298f45..7b28872780e824 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll
@@ -65,10 +65,10 @@ define i32 @lshr_i32(i32 %x, i32 %y) {
 define i64 @lshr_i64(i64 %x, i64 %y) {
 ; LA32-LABEL: lshr_i64:
 ; LA32:       # %bb.0:
+; LA32-NEXT:    srl.w $a0, $a0, $a2
 ; LA32-NEXT:    xori $a3, $a2, 31
 ; LA32-NEXT:    slli.w $a4, $a1, 1
 ; LA32-NEXT:    sll.w $a3, $a4, $a3
-; LA32-NEXT:    srl.w $a0, $a0, $a2
 ; LA32-NEXT:    or $a0, $a0, $a3
 ; LA32-NEXT:    addi.w $a3, $a2, -32
 ; LA32-NEXT:    slti $a4, $a3, 0
@@ -146,8 +146,8 @@ define i32 @lshr_i32_3(i32 %x) {
 define i64 @lshr_i64_3(i64 %x) {
 ; LA32-LABEL: lshr_i64_3:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a0, $a0, 3
 ; LA32-NEXT:    slli.w $a2, $a1, 29
+; LA32-NEXT:    srli.w $a0, $a0, 3
 ; LA32-NEXT:    or $a0, $a0, $a2
 ; LA32-NEXT:    srli.w $a1, $a1, 3
 ; LA32-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll
index 53a3529219fefb..cfa6ceae78f954 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll
@@ -87,8 +87,8 @@ entry:
 define i64 @mul_pow2(i64 %a) {
 ; LA32-LABEL: mul_pow2:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    slli.w $a1, $a1, 3
 ; LA32-NEXT:    srli.w $a2, $a0, 29
+; LA32-NEXT:    slli.w $a1, $a1, 3
 ; LA32-NEXT:    or $a1, $a1, $a2
 ; LA32-NEXT:    slli.w $a0, $a0, 3
 ; LA32-NEXT:    ret
@@ -160,45 +160,46 @@ define i32 @mulh_wu(i32 %a, i32 %b) {
 define i64 @mulh_d(i64 %a, i64 %b) {
 ; LA32-LABEL: mulh_d:
 ; LA32:       # %bb.0:
+; LA32-NEXT:    srai.w $a5, $a1, 31
+; LA32-NEXT:    srai.w $a6, $a3, 31
 ; LA32-NEXT:    mulh.wu $a4, $a0, $a2
-; LA32-NEXT:    mul.w $a5, $a1, $a2
-; LA32-NEXT:    add.w $a4, $a5, $a4
-; LA32-NEXT:    sltu $a5, $a4, $a5
-; LA32-NEXT:    mulh.wu $a6, $a1, $a2
-; LA32-NEXT:    add.w $a5, $a6, $a5
-; LA32-NEXT:    mul.w $a6, $a0, $a3
-; LA32-NEXT:    add.w $a4, $a6, $a4
-; LA32-NEXT:    sltu $a4, $a4, $a6
-; LA32-NEXT:    mulh.wu $a6, $a0, $a3
-; LA32-NEXT:    add.w $a4, $a6, $a4
-; LA32-NEXT:    add.w $a4, $a5, $a4
-; LA32-NEXT:    sltu $a5, $a4, $a5
-; LA32-NEXT:    mulh.wu $a6, $a1, $a3
-; LA32-NEXT:    add.w $a5, $a6, $a5
-; LA32-NEXT:    mul.w $a6, $a1, $a3
-; LA32-NEXT:    add.w $a4, $a6, $a4
-; LA32-NEXT:    sltu $a6, $a4, $a6
-; LA32-NEXT:    add.w $a5, $a5, $a6
-; LA32-NEXT:    srai.w $a6, $a1, 31
-; LA32-NEXT:    mul.w $a7, $a2, $a6
-; LA32-NEXT:    mulh.wu $a2, $a2, $a6
-; LA32-NEXT:    add.w $a2, $a2, $a7
-; LA32-NEXT:    mul.w $a6, $a3, $a6
-; LA32-NEXT:    add.w $a2, $a2, $a6
-; LA32-NEXT:    srai.w $a3, $a3, 31
-; LA32-NEXT:    mul.w $a1, $a3, $a1
-; LA32-NEXT:    mulh.wu $a6, $a3, $a0
-; LA32-NEXT:    add.w $a1, $a6, $a1
-; LA32-NEXT:    mul.w $a0, $a3, $a0
-; LA32-NEXT:    add.w $a1, $a1, $a0
-; LA32-NEXT:    add.w $a1, $a1, $a2
-; LA32-NEXT:    add.w $a2, $a0, $a7
-; LA32-NEXT:    sltu $a0, $a2, $a0
-; LA32-NEXT:    add.w $a0, $a1, $a0
-; LA32-NEXT:    add.w $a1, $a5, $a0
-; LA32-NEXT:    add.w $a0, $a4, $a2
-; LA32-NEXT:    sltu $a2, $a0, $a4
-; LA32-NEXT:    add.w $a1, $a1, $a2
+; LA32-NEXT:    mul.w $a7, $a1, $a2
+; LA32-NEXT:    add.w $a4, $a7, $a4
+; LA32-NEXT:    sltu $a7, $a4, $a7
+; LA32-NEXT:    mulh.wu $t0, $a1, $a2
+; LA32-NEXT:    add.w $a7, $t0, $a7
+; LA32-NEXT:    mul.w $t0, $a0, $a3
+; LA32-NEXT:    add.w $a4, $t0, $a4
+; LA32-NEXT:    sltu $a4, $a4, $t0
+; LA32-NEXT:    mulh.wu $t0, $a0, $a3
+; LA32-NEXT:    add.w $a4, $t0, $a4
+; LA32-NEXT:    add.w $t0, $a7, $a4
+; LA32-NEXT:    mul.w $t1, $a1, $a3
+; LA32-NEXT:    add.w $t2, $t1, $t0
+; LA32-NEXT:    mul.w $t3, $a2, $a5
+; LA32-NEXT:    mul.w $t4, $a6, $a0
+; LA32-NEXT:    add.w $t5, $t4, $t3
+; LA32-NEXT:    add.w $a4, $t2, $t5
+; LA32-NEXT:    sltu $t6, $a4, $t2
+; LA32-NEXT:    sltu $t1, $t2, $t1
+; LA32-NEXT:    sltu $a7, $t0, $a7
+; LA32-NEXT:    mulh.wu $t0, $a1, $a3
+; LA32-NEXT:    add.w $a7, $t0, $a7
+; LA32-NEXT:    add.w $a7, $a7, $t1
+; LA32-NEXT:    mulh.wu $a2, $a2, $a5
+; LA32-NEXT:    add.w $a2, $a2, $t3
+; LA32-NEXT:    mul.w $a3, $a3, $a5
+; LA32-NEXT:    add.w $a2, $a2, $a3
+; LA32-NEXT:    mul.w $a1, $a6, $a1
+; LA32-NEXT:    mulh.wu $a0, $a6, $a0
+; LA32-NEXT:    add.w $a0, $a0, $a1
+; LA32-NEXT:    add.w $a0, $a0, $t4
+; LA32-NEXT:    add.w $a0, $a0, $a2
+; LA32-NEXT:    sltu $a1, $t5, $t4
+; LA32-NEXT:    add.w $a0, $a0, $a1
+; LA32-NEXT:    add.w $a0, $a7, $a0
+; LA32-NEXT:    add.w $a1, $a0, $t6
+; LA32-NEXT:    move $a0, $a4
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: mulh_d:
@@ -227,14 +228,14 @@ define i64 @mulh_du(i64 %a, i64 %b) {
 ; LA32-NEXT:    sltu $a4, $a4, $a5
 ; LA32-NEXT:    mulh.wu $a0, $a0, $a3
 ; LA32-NEXT:    add.w $a0, $a0, $a4
-; LA32-NEXT:    mul.w $a4, $a1, $a3
+; LA32-NEXT:    add.w $a4, $a2, $a0
+; LA32-NEXT:    mul.w $a5, $a1, $a3
+; LA32-NEXT:    add.w $a0, $a5, $a4
+; LA32-NEXT:    sltu $a5, $a0, $a5
+; LA32-NEXT:    sltu $a2, $a4, $a2
 ; LA32-NEXT:    mulh.wu $a1, $a1, $a3
-; LA32-NEXT:    add.w $a0, $a2, $a0
-; LA32-NEXT:    sltu $a2, $a0, $a2
-; LA32-NEXT:    add.w $a1, $a1, $a2
-; LA32-NEXT:    add.w $a0, $a4, $a0
-; LA32-NEXT:    sltu $a2, $a0, $a4
 ; LA32-NEXT:    add.w $a1, $a1, $a2
+; LA32-NEXT:    add.w $a1, $a1, $a5
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: mulh_du:
@@ -1494,15 +1495,15 @@ define i64 @mul_i64_65280_twice(i64 %a, i64 %b) {
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    lu12i.w $a4, 15
 ; LA32-NEXT:    ori $a4, $a4, 3840
-; LA32-NEXT:    mul.w $a3, $a3, $a4
-; LA32-NEXT:    mulh.wu $a5, $a2, $a4
-; LA32-NEXT:    add.w $a3, $a5, $a3
 ; LA32-NEXT:    mul.w $a1, $a1, $a4
 ; LA32-NEXT:    mulh.wu $a5, $a0, $a4
 ; LA32-NEXT:    add.w $a1, $a5, $a1
-; LA32-NEXT:    xor $a1, $a1, $a3
-; LA32-NEXT:    mul.w $a2, $a2, $a4
 ; LA32-NEXT:    mul.w $a0, $a0, $a4
+; LA32-NEXT:    mul.w $a3, $a3, $a4
+; LA32-NEXT:    mulh.wu $a5, $a2, $a4
+; LA32-NEXT:    add.w $a3, $a5, $a3
+; LA32-NEXT:    mul.w $a2, $a2, $a4
+; LA32-NEXT:    xor $a1, $a1, $a3
 ; LA32-NEXT:    xor $a0, $a0, $a2
 ; LA32-NEXT:    ret
 ;
@@ -1510,8 +1511,8 @@ define i64 @mul_i64_65280_twice(i64 %a, i64 %b) {
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    lu12i.w $a2, 15
 ; LA64-NEXT:    ori $a2, $a2, 3840
-; LA64-NEXT:    mul.d $a1, $a1, $a2
 ; LA64-NEXT:    mul.d $a0, $a0, $a2
+; LA64-NEXT:    mul.d $a1, $a1, $a2
 ; LA64-NEXT:    xor $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %c = mul i64 %a, 65280
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll
index 4baf18931dc57c..3f35b76b160342 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll
@@ -61,10 +61,10 @@ define i32 @shl_i32(i32 %x, i32 %y) {
 define i64 @shl_i64(i64 %x, i64 %y) {
 ; LA32-LABEL: shl_i64:
 ; LA32:       # %bb.0:
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    xori $a3, $a2, 31
 ; LA32-NEXT:    srli.w $a4, $a0, 1
 ; LA32-NEXT:    srl.w $a3, $a4, $a3
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    or $a1, $a1, $a3
 ; LA32-NEXT:    addi.w $a3, $a2, -32
 ; LA32-NEXT:    slti $a4, $a3, 0
@@ -142,8 +142,8 @@ define i32 @shl_i32_3(i32 %x) {
 define i64 @shl_i64_3(i64 %x) {
 ; LA32-LABEL: shl_i64_3:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    slli.w $a1, $a1, 3
 ; LA32-NEXT:    srli.w $a2, $a0, 29
+; LA32-NEXT:    slli.w $a1, $a1, 3
 ; LA32-NEXT:    or $a1, $a1, $a2
 ; LA32-NEXT:    slli.w $a0, $a0, 3
 ; LA32-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll
index 98357744f5219f..bb236c11bb8113 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll
@@ -79,9 +79,9 @@ define signext i32 @sub_i32_sext(i32 %x, i32 %y) {
 define i64 @sub_i64(i64 %x, i64 %y) {
 ; LA32-LABEL: sub_i64:
 ; LA32:       # %bb.0:
+; LA32-NEXT:    sltu $a4, $a0, $a2
 ; LA32-NEXT:    sub.w $a1, $a1, $a3
-; LA32-NEXT:    sltu $a3, $a0, $a2
-; LA32-NEXT:    sub.w $a1, $a1, $a3
+; LA32-NEXT:    sub.w $a1, $a1, $a4
 ; LA32-NEXT:    sub.w $a0, $a0, $a2
 ; LA32-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
index ae6f31aaec6434..3a0fd0758cb324 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -235,102 +235,102 @@ define void @buildvector_v32i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 2
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a4, 3
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a5, 4
+; CHECK-NEXT:    ld.b $a1, $sp, 0
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a6, 5
+; CHECK-NEXT:    ld.b $a2, $sp, 8
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a7, 6
-; CHECK-NEXT:    ld.b $a1, $sp, 0
+; CHECK-NEXT:    ld.b $a3, $sp, 16
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 7
-; CHECK-NEXT:    ld.b $a1, $sp, 8
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 8
-; CHECK-NEXT:    ld.b $a1, $sp, 16
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 9
 ; CHECK-NEXT:    ld.b $a1, $sp, 24
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a2, 8
+; CHECK-NEXT:    ld.b $a2, $sp, 32
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 9
+; CHECK-NEXT:    ld.b $a3, $sp, 40
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 10
-; CHECK-NEXT:    ld.b $a1, $sp, 32
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 11
-; CHECK-NEXT:    ld.b $a1, $sp, 40
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 12
 ; CHECK-NEXT:    ld.b $a1, $sp, 48
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a2, 11
+; CHECK-NEXT:    ld.b $a2, $sp, 56
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 12
+; CHECK-NEXT:    ld.b $a3, $sp, 64
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 13
-; CHECK-NEXT:    ld.b $a1, $sp, 56
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 14
-; CHECK-NEXT:    ld.b $a1, $sp, 64
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 15
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a2, 14
 ; CHECK-NEXT:    ld.b $a1, $sp, 72
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 15
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 0
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 80
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 1
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 88
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 2
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 96
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 3
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 104
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 4
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 112
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 5
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 120
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 6
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 128
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 7
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 136
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 8
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 144
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 9
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 152
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 10
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 160
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 11
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 168
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 12
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 176
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 13
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 184
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 14
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 192
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 15
@@ -377,6 +377,8 @@ entry:
 define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind {
 ; CHECK-LABEL: buildvector_v16i16:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.h $t0, $sp, 8
+; CHECK-NEXT:    ld.h $t1, $sp, 0
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 0
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a2, 1
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a3, 2
@@ -384,44 +386,42 @@ define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i1
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a5, 4
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a6, 5
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a7, 6
-; CHECK-NEXT:    ld.h $a1, $sp, 0
-; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 7
-; CHECK-NEXT:    ld.h $a1, $sp, 8
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $t1, 7
+; CHECK-NEXT:    ld.h $a1, $sp, 16
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 0
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $t0, 0
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a1, $sp, 16
+; CHECK-NEXT:    ld.h $a2, $sp, 24
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 1
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a1, $sp, 24
+; CHECK-NEXT:    ld.h $a1, $sp, 32
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 2
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a2, 2
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a1, $sp, 32
+; CHECK-NEXT:    ld.h $a2, $sp, 40
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 3
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a1, $sp, 40
+; CHECK-NEXT:    ld.h $a1, $sp, 48
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 4
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a2, 4
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a1, $sp, 48
+; CHECK-NEXT:    ld.h $a2, $sp, 56
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 5
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a1, $sp, 56
+; CHECK-NEXT:    ld.h $a1, $sp, 64
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 6
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a2, 6
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a1, $sp, 64
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 7
@@ -452,6 +452,7 @@ entry:
 define void @buildvector_v8i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind {
 ; CHECK-LABEL: buildvector_v8i32:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.w $t0, $sp, 0
 ; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a1, 0
 ; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a2, 1
 ; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a3, 2
@@ -459,8 +460,7 @@ define void @buildvector_v8i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32
 ; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a5, 4
 ; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a6, 5
 ; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a7, 6
-; CHECK-NEXT:    ld.w $a1, $sp, 0
-; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a1, 7
+; CHECK-NEXT:    xvinsgr2vr.w $xr0, $t0, 7
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll
index af18c52b096c80..0f3df3d573b654 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll
@@ -9,30 +9,30 @@
 define void @xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfadd.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfadd.d $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfadd.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfadd.d $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -48,30 +48,30 @@ entry:
 define void @xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -87,31 +87,31 @@ entry:
 define void @xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfadd.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfadd.d $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvbitrevi.d $xr0, $xr0, 63
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfadd.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfadd.d $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr0, $xr0, 63
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -129,32 +129,32 @@ entry:
 define void @xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmadd_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmadd_d_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvbitrevi.d $xr1, $xr1, 63
-; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvbitrevi.d $xr0, $xr0, 63
+; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmadd_d_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr1, $xr1, 63
-; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr0, $xr0, 63
+; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -173,33 +173,33 @@ entry:
 define void @not_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_xvfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr2, $xr2, 63
-; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr0, $xr0, 63
+; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_xvfnmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvbitrevi.d $xr1, $xr1, 63
-; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvbitrevi.d $xr0, $xr0, 63
+; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_xvfnmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr1, $xr1, 63
-; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr0, $xr0, 63
+; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -217,31 +217,31 @@ entry:
 define void @xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvbitrevi.d $xr0, $xr0, 63
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr0, $xr0, 63
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -260,30 +260,30 @@ entry:
 define void @xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmsub_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmsub_d_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr2, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmsub_d_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr2, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -301,31 +301,31 @@ entry:
 define void @not_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_xvfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr2, $xr2, 63
-; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr0, $xr0, 63
+; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_xvfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr2, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_xvfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr2, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -342,28 +342,28 @@ entry:
 define void @contract_xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -379,28 +379,28 @@ entry:
 define void @contract_xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -416,28 +416,28 @@ entry:
 define void @contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -454,28 +454,28 @@ entry:
 define void @contract_xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmadd_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmadd_d_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmadd_d_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -494,31 +494,31 @@ entry:
 define void @not_contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr2, $xr2, 63
-; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr0, $xr0, 63
+; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_xvfnmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvbitrevi.d $xr2, $xr2, 63
-; CONTRACT-ON-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvbitrevi.d $xr0, $xr0, 63
+; CONTRACT-ON-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr2, $xr2, 63
-; CONTRACT-OFF-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr0, $xr0, 63
+; CONTRACT-OFF-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -536,28 +536,28 @@ entry:
 define void @contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -575,28 +575,28 @@ entry:
 define void @contract_xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmsub_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmsub_d_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmsub_d_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -614,31 +614,31 @@ entry:
 define void @not_contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr2, $xr2, 63
-; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr0, $xr0, 63
+; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_xvfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvbitrevi.d $xr2, $xr2, 63
-; CONTRACT-ON-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvbitrevi.d $xr0, $xr0, 63
+; CONTRACT-ON-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr2, $xr2, 63
-; CONTRACT-OFF-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr0, $xr0, 63
+; CONTRACT-OFF-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -655,28 +655,28 @@ entry:
 define void @xvfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmadd_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmadd_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmadd_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -692,28 +692,28 @@ entry:
 define void @xvfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmsub_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmsub_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmsub_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -729,28 +729,28 @@ entry:
 define void @xvfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmadd_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmadd_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmadd_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -767,28 +767,28 @@ entry:
 define void @xvfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmsub_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmsub_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmsub_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll
index b7b3cb3a2e665b..6fd14d93a751e6 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll
@@ -9,30 +9,30 @@
 define void @xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfadd.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfadd.s $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfadd.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfadd.s $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -48,30 +48,30 @@ entry:
 define void @xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -87,31 +87,31 @@ entry:
 define void @xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfadd.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfadd.s $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvbitrevi.w $xr0, $xr0, 31
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfadd.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfadd.s $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr0, $xr0, 31
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -129,32 +129,32 @@ entry:
 define void @xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmadd_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmadd_s_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvbitrevi.w $xr1, $xr1, 31
-; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvbitrevi.w $xr0, $xr0, 31
+; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmadd_s_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr1, $xr1, 31
-; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr0, $xr0, 31
+; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -173,33 +173,33 @@ entry:
 define void @not_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_xvfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr2, $xr2, 31
-; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr0, $xr0, 31
+; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_xvfnmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvbitrevi.w $xr1, $xr1, 31
-; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvbitrevi.w $xr0, $xr0, 31
+; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_xvfnmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr1, $xr1, 31
-; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr0, $xr0, 31
+; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -217,31 +217,31 @@ entry:
 define void @xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvbitrevi.w $xr0, $xr0, 31
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr0, $xr0, 31
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -260,30 +260,30 @@ entry:
 define void @xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmsub_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmsub_s_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr2, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmsub_s_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr2, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -301,31 +301,31 @@ entry:
 define void @not_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_xvfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr2, $xr2, 31
-; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr0, $xr0, 31
+; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_xvfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr2, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_xvfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr1, $xr0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr2, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -342,28 +342,28 @@ entry:
 define void @contract_xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -379,28 +379,28 @@ entry:
 define void @contract_xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -416,28 +416,28 @@ entry:
 define void @contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -454,28 +454,28 @@ entry:
 define void @contract_xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmadd_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmadd_s_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmadd_s_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -494,31 +494,31 @@ entry:
 define void @not_contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr2, $xr2, 31
-; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr0, $xr0, 31
+; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_xvfnmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvbitrevi.w $xr2, $xr2, 31
-; CONTRACT-ON-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvbitrevi.w $xr0, $xr0, 31
+; CONTRACT-ON-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr2, $xr2, 31
-; CONTRACT-OFF-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr0, $xr0, 31
+; CONTRACT-OFF-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -536,28 +536,28 @@ entry:
 define void @contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -575,28 +575,28 @@ entry:
 define void @contract_xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmsub_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmsub_s_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmsub_s_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -614,31 +614,31 @@ entry:
 define void @not_contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr2, $xr2, 31
-; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr0, $xr0, 31
+; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_xvfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvbitrevi.w $xr2, $xr2, 31
-; CONTRACT-ON-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvbitrevi.w $xr0, $xr0, 31
+; CONTRACT-ON-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr2, $xr2, 31
-; CONTRACT-OFF-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr0, $xr0, 31
+; CONTRACT-OFF-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -655,28 +655,28 @@ entry:
 define void @xvfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmadd_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmadd_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmadd_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -692,28 +692,28 @@ entry:
 define void @xvfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmsub_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmsub_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmsub_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -729,28 +729,28 @@ entry:
 define void @xvfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmadd_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmadd_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmadd_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -767,28 +767,28 @@ entry:
 define void @xvfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmsub_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmsub_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-ON-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-ON-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmsub_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
-; CONTRACT-OFF-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
+; CONTRACT-OFF-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll
index 8e4d0dc6f1c380..136f34bafb32a3 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll
@@ -4,9 +4,9 @@
 define void @add_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvadd.b $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvadd.b $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @add_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvadd.h $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvadd.h $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @add_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvadd.w $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvadd.w $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @add_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvadd.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvadd.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll
index 98c87cadeeb5a0..b06d1bea4ef629 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll
@@ -4,9 +4,9 @@
 define void @and_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvand.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvand.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @and_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvand.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvand.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @and_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvand.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvand.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @and_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvand.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvand.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll
index fcbf0f1400fe61..4dd2cee7a2ed58 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll
@@ -4,9 +4,9 @@
 define void @ashr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsra.b $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsra.b $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @ashr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsra.h $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsra.h $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @ashr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsra.w $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsra.w $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @ashr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsra.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsra.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll
index 365bb305fc5aaa..b3eb328e8d446a 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll
@@ -4,9 +4,9 @@
 define void @fadd_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fadd_v8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfadd.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfadd.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fadd_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fadd_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfadd.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfadd.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll
index ef67dbc100c045..4f56dd29c1b25d 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll
@@ -35,9 +35,9 @@ define void @v4f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_oeq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.ceq.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.ceq.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -51,9 +51,9 @@ define void @v8f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_oeq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.ceq.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.ceq.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -68,9 +68,9 @@ define void @v4f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_ueq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cueq.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cueq.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -84,9 +84,9 @@ define void @v8f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_ueq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cueq.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cueq.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -101,9 +101,9 @@ define void @v4f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.ceq.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.ceq.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -117,9 +117,9 @@ define void @v8f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.ceq.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.ceq.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -134,9 +134,9 @@ define void @v4f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_ole:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cle.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cle.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -150,9 +150,9 @@ define void @v8f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_ole:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cle.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cle.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -167,9 +167,9 @@ define void @v4f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cule.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cule.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -183,9 +183,9 @@ define void @v8f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cule.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cule.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -200,9 +200,9 @@ define void @v4f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_le:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cle.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cle.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -216,9 +216,9 @@ define void @v8f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_le:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cle.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cle.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -233,9 +233,9 @@ define void @v4f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_olt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.clt.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.clt.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -249,9 +249,9 @@ define void @v8f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_olt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.clt.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.clt.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -266,9 +266,9 @@ define void @v4f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cult.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cult.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -282,9 +282,9 @@ define void @v8f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cult.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cult.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -299,9 +299,9 @@ define void @v4f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_lt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.clt.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.clt.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -315,9 +315,9 @@ define void @v8f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_lt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.clt.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.clt.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -332,9 +332,9 @@ define void @v4f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_one:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cne.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cne.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -348,9 +348,9 @@ define void @v8f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_one:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cne.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cne.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -365,9 +365,9 @@ define void @v4f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_une:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cune.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cune.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -381,9 +381,9 @@ define void @v8f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_une:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cune.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cune.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -398,9 +398,9 @@ define void @v4f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cne.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cne.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -414,9 +414,9 @@ define void @v8f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cne.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cne.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -431,9 +431,9 @@ define void @v4f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_ord:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cor.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cor.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -447,9 +447,9 @@ define void @v8f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_ord:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cor.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cor.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -464,9 +464,9 @@ define void @v4f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_uno:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cun.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cun.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -480,9 +480,9 @@ define void @v8f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_uno:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfcmp.cun.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfcmp.cun.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll
index 6004565b0b784e..63d8c222ae54f7 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll
@@ -4,9 +4,9 @@
 define void @fdiv_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fdiv_v8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfdiv.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfdiv.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fdiv_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fdiv_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfdiv.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfdiv.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll
index a48dca8d284704..f777151cdb0ac3 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll
@@ -4,9 +4,9 @@
 define void @fmul_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fmul_v8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfmul.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfmul.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fmul_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fmul_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfmul.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfmul.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll
index 6164aa5a55c7e4..201ba5f5df66fb 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll
@@ -4,9 +4,9 @@
 define void @fsub_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fsub_v8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfsub.s $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfsub.s $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fsub_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fsub_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvfsub.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvfsub.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll
index 6693fe0f6ec7ce..d15c4133855f40 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll
@@ -19,9 +19,9 @@ define void @v32i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v32i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v32i8_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvseq.b $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvseq.b $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <32 x i8>, ptr %a0
@@ -49,9 +49,9 @@ define void @v16i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i16_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvseq.h $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvseq.h $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i16>, ptr %a0
@@ -79,9 +79,9 @@ define void @v8i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i32_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvseq.w $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvseq.w $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i32>, ptr %a0
@@ -109,9 +109,9 @@ define void @v4i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i64_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvseq.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvseq.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i64>, ptr %a0
@@ -140,9 +140,9 @@ define void @v32i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v32i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v32i8_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsle.b $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsle.b $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <32 x i8>, ptr %a0
@@ -170,9 +170,9 @@ define void @v16i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i16_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsle.h $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsle.h $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i16>, ptr %a0
@@ -200,9 +200,9 @@ define void @v8i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i32_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsle.w $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsle.w $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i32>, ptr %a0
@@ -230,9 +230,9 @@ define void @v4i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i64_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsle.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsle.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i64>, ptr %a0
@@ -261,9 +261,9 @@ define void @v32i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v32i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v32i8_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsle.bu $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsle.bu $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <32 x i8>, ptr %a0
@@ -291,9 +291,9 @@ define void @v16i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i16_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsle.hu $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsle.hu $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i16>, ptr %a0
@@ -321,9 +321,9 @@ define void @v8i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i32_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsle.wu $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsle.wu $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i32>, ptr %a0
@@ -351,9 +351,9 @@ define void @v4i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i64_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsle.du $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsle.du $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i64>, ptr %a0
@@ -382,9 +382,9 @@ define void @v32i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v32i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v32i8_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvslt.b $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvslt.b $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <32 x i8>, ptr %a0
@@ -412,9 +412,9 @@ define void @v16i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i16_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvslt.h $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvslt.h $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i16>, ptr %a0
@@ -442,9 +442,9 @@ define void @v8i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i32_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvslt.w $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvslt.w $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i32>, ptr %a0
@@ -472,9 +472,9 @@ define void @v4i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i64_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvslt.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvslt.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i64>, ptr %a0
@@ -503,9 +503,9 @@ define void @v32i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v32i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v32i8_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvslt.bu $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvslt.bu $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <32 x i8>, ptr %a0
@@ -533,9 +533,9 @@ define void @v16i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i16_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvslt.hu $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvslt.hu $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i16>, ptr %a0
@@ -563,9 +563,9 @@ define void @v8i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i32_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvslt.wu $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvslt.wu $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i32>, ptr %a0
@@ -593,9 +593,9 @@ define void @v4i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i64_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvslt.du $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvslt.du $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i64>, ptr %a0
@@ -610,9 +610,9 @@ define void @v4i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v32i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v32i8_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvseq.b $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvseq.b $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvxori.b $xr0, $xr0, 255
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
@@ -627,9 +627,9 @@ define void @v32i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v16i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i16_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvseq.h $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvseq.h $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvrepli.b $xr1, -1
 ; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
@@ -645,9 +645,9 @@ define void @v16i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i32_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvseq.w $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvseq.w $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvrepli.b $xr1, -1
 ; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
@@ -663,9 +663,9 @@ define void @v8i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i64_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvseq.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvseq.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvrepli.b $xr1, -1
 ; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
index ceaf40027ffc4a..25106b456d2f7a 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
@@ -88,10 +88,10 @@ define void @insert_4xi64(ptr %src, ptr %dst, i64 %in) nounwind {
 define void @insert_8xfloat(ptr %src, ptr %dst, float %in) nounwind {
 ; CHECK-LABEL: insert_8xfloat:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movfr2gr.s $a2, $fa0
-; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a2, 1
-; CHECK-NEXT:    xvst $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a0, 0
+; CHECK-NEXT:    movfr2gr.s $a0, $fa0
+; CHECK-NEXT:    xvinsgr2vr.w $xr1, $a0, 1
+; CHECK-NEXT:    xvst $xr1, $a1, 0
 ; CHECK-NEXT:    ret
   %v = load volatile <8 x float>, ptr %src
   %v_new = insertelement <8 x float> %v, float %in, i32 1
@@ -102,10 +102,10 @@ define void @insert_8xfloat(ptr %src, ptr %dst, float %in) nounwind {
 define void @insert_4xdouble(ptr %src, ptr %dst, double %in) nounwind {
 ; CHECK-LABEL: insert_4xdouble:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movfr2gr.d $a2, $fa0
-; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvinsgr2vr.d $xr0, $a2, 1
-; CHECK-NEXT:    xvst $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a0, 0
+; CHECK-NEXT:    movfr2gr.d $a0, $fa0
+; CHECK-NEXT:    xvinsgr2vr.d $xr1, $a0, 1
+; CHECK-NEXT:    xvst $xr1, $a1, 0
 ; CHECK-NEXT:    ret
   %v = load volatile <4 x double>, ptr %src
   %v_new = insertelement <4 x double> %v, double %in, i32 1
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll
index 24be69d8032a82..5b992b5e38de5c 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll
@@ -4,9 +4,9 @@
 define void @lshr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsrl.b $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsrl.b $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @lshr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsrl.h $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsrl.h $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @lshr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsrl.w $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsrl.w $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @lshr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsrl.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsrl.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll
index dcb893caa2555a..4745e7003cb1cd 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll
@@ -4,9 +4,9 @@
 define void @mul_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mul_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvmul.b $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvmul.b $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @mul_v16i16(ptr %res, ptr %a0, ptr %a1)  nounwind {
 ; CHECK-LABEL: mul_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvmul.h $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvmul.h $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @mul_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mul_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvmul.w $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvmul.w $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @mul_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mul_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvmul.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvmul.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll
index f37cbf1cefedc4..f32b8897bebce0 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll
@@ -4,9 +4,9 @@
 define void @or_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @or_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @or_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @or_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll
index e3635a5f14a2ba..879caa5a6700d2 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll
@@ -4,9 +4,9 @@
 define void @sdiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvdiv.b $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvdiv.b $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @sdiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvdiv.h $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvdiv.h $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @sdiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvdiv.w $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvdiv.w $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @sdiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvdiv.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvdiv.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll
index 8a02c7e3ac975a..56c69171c9d443 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll
@@ -4,9 +4,9 @@
 define void @shl_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsll.b $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsll.b $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @shl_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsll.h $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsll.h $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @shl_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsll.w $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsll.w $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @shl_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsll.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsll.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll
index bcfff16514770f..5102abac83d80f 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll
@@ -4,9 +4,9 @@
 define void @sub_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsub.b $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsub.b $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @sub_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsub.h $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsub.h $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @sub_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsub.w $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsub.w $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @sub_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvsub.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvsub.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll
index e78084c7186d33..43f558f3cdf372 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll
@@ -4,9 +4,9 @@
 define void @udiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvdiv.bu $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvdiv.bu $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @udiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvdiv.hu $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvdiv.hu $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @udiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvdiv.wu $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvdiv.wu $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @udiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvdiv.du $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvdiv.du $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll
index c2fb1462b7a250..e062e10b21d9f8 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll
@@ -4,9 +4,9 @@
 define void @xor_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvxor.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @xor_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvxor.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @xor_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvxor.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @xor_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvxor.v $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/mulh.ll b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll
index aac711a4a371ca..db3cc7f38774d0 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/mulh.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll
@@ -4,9 +4,9 @@
 define void @mulhs_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvmuh.b $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvmuh.b $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -24,9 +24,9 @@ entry:
 define void @mulhu_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvmuh.bu $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvmuh.bu $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -44,9 +44,9 @@ entry:
 define void @mulhs_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvmuh.h $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvmuh.h $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -64,9 +64,9 @@ entry:
 define void @mulhu_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvmuh.hu $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvmuh.hu $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -84,9 +84,9 @@ entry:
 define void @mulhs_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvmuh.w $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvmuh.w $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -104,9 +104,9 @@ entry:
 define void @mulhu_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvmuh.wu $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvmuh.wu $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -124,9 +124,9 @@ entry:
 define void @mulhs_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvmuh.d $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvmuh.d $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -144,9 +144,9 @@ entry:
 define void @mulhu_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a2, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvmuh.du $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    xvmuh.du $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
index a9a54257917abe..7b4c7ced4b5f36 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
@@ -34,11 +34,11 @@ define void @select_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: select_v16i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lu12i.w $a3, -16
-; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a3
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvld $xr2, $a2, 0
-; CHECK-NEXT:    xvbitsel.v $xr0, $xr2, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    lu12i.w $a1, -16
+; CHECK-NEXT:    xvreplgr2vr.w $xr2, $a1
+; CHECK-NEXT:    xvbitsel.v $xr0, $xr1, $xr0, $xr2
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i16>, ptr %a0
@@ -69,12 +69,12 @@ define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: select_v4i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a3, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT:    addi.d $a3, $a3, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT:    xvld $xr0, $a3, 0
-; CHECK-NEXT:    xvld $xr1, $a1, 0
-; CHECK-NEXT:    xvld $xr2, $a2, 0
-; CHECK-NEXT:    xvbitsel.v $xr0, $xr2, $xr1, $xr0
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI4_0)
+; CHECK-NEXT:    addi.d $a1, $a1, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT:    xvld $xr2, $a1, 0
+; CHECK-NEXT:    xvbitsel.v $xr0, $xr1, $xr0, $xr2
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i64>, ptr %a0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
index ed1f610a5fa615..c04d7ca889f7ef 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
@@ -230,6 +230,15 @@ entry:
 define void @buildvector_v16i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
 ; CHECK-LABEL: buildvector_v16i8:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.b $t0, $sp, 64
+; CHECK-NEXT:    ld.b $t1, $sp, 56
+; CHECK-NEXT:    ld.b $t2, $sp, 48
+; CHECK-NEXT:    ld.b $t3, $sp, 40
+; CHECK-NEXT:    ld.b $t4, $sp, 32
+; CHECK-NEXT:    ld.b $t5, $sp, 24
+; CHECK-NEXT:    ld.b $t6, $sp, 16
+; CHECK-NEXT:    ld.b $t7, $sp, 8
+; CHECK-NEXT:    ld.b $t8, $sp, 0
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 0
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a2, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 2
@@ -237,24 +246,15 @@ define void @buildvector_v16i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a5, 4
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a6, 5
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a7, 6
-; CHECK-NEXT:    ld.b $a1, $sp, 0
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 7
-; CHECK-NEXT:    ld.b $a1, $sp, 8
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 8
-; CHECK-NEXT:    ld.b $a1, $sp, 16
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 9
-; CHECK-NEXT:    ld.b $a1, $sp, 24
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 10
-; CHECK-NEXT:    ld.b $a1, $sp, 32
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 11
-; CHECK-NEXT:    ld.b $a1, $sp, 40
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 12
-; CHECK-NEXT:    ld.b $a1, $sp, 48
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 13
-; CHECK-NEXT:    ld.b $a1, $sp, 56
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 14
-; CHECK-NEXT:    ld.b $a1, $sp, 64
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 15
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t8, 7
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t7, 8
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t6, 9
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t5, 10
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t4, 11
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t3, 12
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t2, 13
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t1, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t0, 15
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -281,6 +281,7 @@ entry:
 define void @buildvector_v8i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
 ; CHECK-LABEL: buildvector_v8i16:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.h $t0, $sp, 0
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 0
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a2, 1
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a3, 2
@@ -288,8 +289,7 @@ define void @buildvector_v8i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a5, 4
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a6, 5
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a7, 6
-; CHECK-NEXT:    ld.h $a1, $sp, 0
-; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 7
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $t0, 7
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll
index 8e0459b4afabef..c83c563952d4f3 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll
@@ -9,30 +9,30 @@
 define void @vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfadd.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfadd.d $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfadd.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfadd.d $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -48,30 +48,30 @@ entry:
 define void @vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -87,31 +87,31 @@ entry:
 define void @vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfadd.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfadd.d $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vbitrevi.d $vr0, $vr0, 63
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfadd.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfadd.d $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vbitrevi.d $vr0, $vr0, 63
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -129,32 +129,32 @@ entry:
 define void @vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmadd_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmadd_d_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vbitrevi.d $vr1, $vr1, 63
-; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vbitrevi.d $vr0, $vr0, 63
+; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmadd_d_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vbitrevi.d $vr1, $vr1, 63
-; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vbitrevi.d $vr0, $vr0, 63
+; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -173,33 +173,33 @@ entry:
 define void @not_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_vfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.d $vr2, $vr2, 63
-; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.d $vr0, $vr0, 63
+; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_vfnmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vbitrevi.d $vr1, $vr1, 63
-; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vbitrevi.d $vr0, $vr0, 63
+; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_vfnmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vbitrevi.d $vr1, $vr1, 63
-; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vbitrevi.d $vr0, $vr0, 63
+; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -217,31 +217,31 @@ entry:
 define void @vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vbitrevi.d $vr0, $vr0, 63
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vbitrevi.d $vr0, $vr0, 63
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -260,30 +260,30 @@ entry:
 define void @vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmsub_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmsub_d_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr2, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmsub_d_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr2, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -301,31 +301,31 @@ entry:
 define void @not_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_vfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.d $vr2, $vr2, 63
-; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.d $vr0, $vr0, 63
+; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_vfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr2, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_vfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr2, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -342,28 +342,28 @@ entry:
 define void @contract_vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -379,28 +379,28 @@ entry:
 define void @contract_vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -416,28 +416,28 @@ entry:
 define void @contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -454,28 +454,28 @@ entry:
 define void @contract_vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmadd_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmadd_d_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmadd_d_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -494,31 +494,31 @@ entry:
 define void @not_contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_vfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.d $vr2, $vr2, 63
-; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.d $vr0, $vr0, 63
+; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_vfnmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vbitrevi.d $vr2, $vr2, 63
-; CONTRACT-ON-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vbitrevi.d $vr0, $vr0, 63
+; CONTRACT-ON-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_vfnmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vbitrevi.d $vr2, $vr2, 63
-; CONTRACT-OFF-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vbitrevi.d $vr0, $vr0, 63
+; CONTRACT-OFF-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -536,28 +536,28 @@ entry:
 define void @contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -575,28 +575,28 @@ entry:
 define void @contract_vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmsub_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmsub_d_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmsub_d_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -614,31 +614,31 @@ entry:
 define void @not_contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_vfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.d $vr2, $vr2, 63
-; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.d $vr0, $vr0, 63
+; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_vfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vbitrevi.d $vr2, $vr2, 63
-; CONTRACT-ON-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vbitrevi.d $vr0, $vr0, 63
+; CONTRACT-ON-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_vfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vbitrevi.d $vr2, $vr2, 63
-; CONTRACT-OFF-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vbitrevi.d $vr0, $vr0, 63
+; CONTRACT-OFF-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -655,28 +655,28 @@ entry:
 define void @vfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmadd_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmadd_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmadd_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -692,28 +692,28 @@ entry:
 define void @vfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmsub_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmsub_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmsub_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -729,28 +729,28 @@ entry:
 define void @vfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmadd_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmadd_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmadd_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -767,28 +767,28 @@ entry:
 define void @vfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmsub_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmsub_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmsub_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll
index 7efbd61c0c4f7b..1f316d5b1c8a4f 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll
@@ -9,30 +9,30 @@
 define void @vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfadd.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfadd.s $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfadd.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfadd.s $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -48,30 +48,30 @@ entry:
 define void @vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -87,31 +87,31 @@ entry:
 define void @vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfadd.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfadd.s $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vbitrevi.w $vr0, $vr0, 31
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfadd.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfadd.s $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vbitrevi.w $vr0, $vr0, 31
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -129,32 +129,32 @@ entry:
 define void @vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmadd_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmadd_s_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vbitrevi.w $vr1, $vr1, 31
-; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vbitrevi.w $vr0, $vr0, 31
+; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmadd_s_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vbitrevi.w $vr1, $vr1, 31
-; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vbitrevi.w $vr0, $vr0, 31
+; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -173,33 +173,33 @@ entry:
 define void @not_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_vfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.w $vr2, $vr2, 31
-; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.w $vr0, $vr0, 31
+; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_vfnmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vbitrevi.w $vr1, $vr1, 31
-; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vbitrevi.w $vr0, $vr0, 31
+; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_vfnmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vbitrevi.w $vr1, $vr1, 31
-; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vbitrevi.w $vr0, $vr0, 31
+; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -217,31 +217,31 @@ entry:
 define void @vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vbitrevi.w $vr0, $vr0, 31
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vbitrevi.w $vr0, $vr0, 31
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -260,30 +260,30 @@ entry:
 define void @vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmsub_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmsub_s_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr2, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmsub_s_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr2, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -301,31 +301,31 @@ entry:
 define void @not_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_vfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.w $vr2, $vr2, 31
-; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.w $vr0, $vr0, 31
+; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_vfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-ON-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr2, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_vfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr1, $vr0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a3, 0
-; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr2, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -342,28 +342,28 @@ entry:
 define void @contract_vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -379,28 +379,28 @@ entry:
 define void @contract_vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -416,28 +416,28 @@ entry:
 define void @contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -454,28 +454,28 @@ entry:
 define void @contract_vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmadd_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmadd_s_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmadd_s_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -494,31 +494,31 @@ entry:
 define void @not_contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_vfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.w $vr2, $vr2, 31
-; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.w $vr0, $vr0, 31
+; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_vfnmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vbitrevi.w $vr2, $vr2, 31
-; CONTRACT-ON-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vbitrevi.w $vr0, $vr0, 31
+; CONTRACT-ON-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_vfnmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vbitrevi.w $vr2, $vr2, 31
-; CONTRACT-OFF-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vbitrevi.w $vr0, $vr0, 31
+; CONTRACT-OFF-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -536,28 +536,28 @@ entry:
 define void @contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -575,28 +575,28 @@ entry:
 define void @contract_vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmsub_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmsub_s_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmsub_s_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -614,31 +614,31 @@ entry:
 define void @not_contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_vfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.w $vr2, $vr2, 31
-; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.w $vr0, $vr0, 31
+; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_vfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vbitrevi.w $vr2, $vr2, 31
-; CONTRACT-ON-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vbitrevi.w $vr0, $vr0, 31
+; CONTRACT-ON-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_vfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vbitrevi.w $vr2, $vr2, 31
-; CONTRACT-OFF-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vbitrevi.w $vr0, $vr0, 31
+; CONTRACT-OFF-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -655,28 +655,28 @@ entry:
 define void @vfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmadd_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmadd_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmadd_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -692,28 +692,28 @@ entry:
 define void @vfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmsub_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmsub_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmsub_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -729,28 +729,28 @@ entry:
 define void @vfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmadd_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmadd_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmadd_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -767,28 +767,28 @@ entry:
 define void @vfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmsub_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmsub_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-ON-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-ON-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmsub_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
-; CONTRACT-OFF-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
+; CONTRACT-OFF-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll
index 2a7c37c2ae346e..485bd1df8d66e2 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll
@@ -4,9 +4,9 @@
 define void @add_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vadd.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vadd.b $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @add_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vadd.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vadd.h $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @add_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vadd.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vadd.w $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @add_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vadd.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vadd.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll
index 523255159a8115..d3e4efb1b1c27f 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll
@@ -4,9 +4,9 @@
 define void @and_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vand.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vand.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @and_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vand.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vand.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @and_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vand.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vand.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @and_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vand.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vand.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll
index fbc570d77ba803..2a310744709832 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll
@@ -4,9 +4,9 @@
 define void @ashr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsra.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsra.b $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @ashr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsra.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsra.h $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @ashr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsra.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsra.w $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @ashr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsra.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsra.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
index b8798c97861e2c..f3b8e02ac28f74 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
@@ -82,9 +82,9 @@ define void @extract_2xdouble(ptr %src, ptr %dst) nounwind {
 define void @extract_16xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 ; CHECK-LABEL: extract_16xi8_idx:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    bstrpick.d $a2, $a2, 31, 0
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vreplve.b $vr0, $vr0, $a2
+; CHECK-NEXT:    bstrpick.d $a0, $a2, 31, 0
+; CHECK-NEXT:    vreplve.b $vr0, $vr0, $a0
 ; CHECK-NEXT:    movfr2gr.s $a0, $fa0
 ; CHECK-NEXT:    srai.w $a0, $a0, 24
 ; CHECK-NEXT:    st.b $a0, $a1, 0
@@ -98,9 +98,9 @@ define void @extract_16xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 define void @extract_8xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 ; CHECK-LABEL: extract_8xi16_idx:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    bstrpick.d $a2, $a2, 31, 0
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vreplve.h $vr0, $vr0, $a2
+; CHECK-NEXT:    bstrpick.d $a0, $a2, 31, 0
+; CHECK-NEXT:    vreplve.h $vr0, $vr0, $a0
 ; CHECK-NEXT:    movfr2gr.s $a0, $fa0
 ; CHECK-NEXT:    srai.w $a0, $a0, 16
 ; CHECK-NEXT:    st.h $a0, $a1, 0
@@ -114,9 +114,9 @@ define void @extract_8xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 define void @extract_4xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 ; CHECK-LABEL: extract_4xi32_idx:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    bstrpick.d $a2, $a2, 31, 0
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vreplve.w $vr0, $vr0, $a2
+; CHECK-NEXT:    bstrpick.d $a0, $a2, 31, 0
+; CHECK-NEXT:    vreplve.w $vr0, $vr0, $a0
 ; CHECK-NEXT:    movfr2gr.s $a0, $fa0
 ; CHECK-NEXT:    st.w $a0, $a1, 0
 ; CHECK-NEXT:    ret
@@ -129,9 +129,9 @@ define void @extract_4xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 ; CHECK-LABEL: extract_2xi64_idx:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    bstrpick.d $a2, $a2, 31, 0
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vreplve.d $vr0, $vr0, $a2
+; CHECK-NEXT:    bstrpick.d $a0, $a2, 31, 0
+; CHECK-NEXT:    vreplve.d $vr0, $vr0, $a0
 ; CHECK-NEXT:    movfr2gr.d $a0, $fa0
 ; CHECK-NEXT:    st.d $a0, $a1, 0
 ; CHECK-NEXT:    ret
@@ -144,9 +144,9 @@ define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 define void @extract_4xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 ; CHECK-LABEL: extract_4xfloat_idx:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    bstrpick.d $a2, $a2, 31, 0
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vreplve.w $vr0, $vr0, $a2
+; CHECK-NEXT:    bstrpick.d $a0, $a2, 31, 0
+; CHECK-NEXT:    vreplve.w $vr0, $vr0, $a0
 ; CHECK-NEXT:    fst.s $fa0, $a1, 0
 ; CHECK-NEXT:    ret
   %v = load volatile <4 x float>, ptr %src
@@ -158,9 +158,9 @@ define void @extract_4xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 ; CHECK-LABEL: extract_2xdouble_idx:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    bstrpick.d $a2, $a2, 31, 0
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vreplve.d $vr0, $vr0, $a2
+; CHECK-NEXT:    bstrpick.d $a0, $a2, 31, 0
+; CHECK-NEXT:    vreplve.d $vr0, $vr0, $a0
 ; CHECK-NEXT:    fst.d $fa0, $a1, 0
 ; CHECK-NEXT:    ret
   %v = load volatile <2 x double>, ptr %src
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll
index 1fa1f611c4a36c..989ad10a44ffc6 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll
@@ -4,9 +4,9 @@
 define void @fadd_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fadd_v4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfadd.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfadd.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fadd_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fadd_v2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfadd.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfadd.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll
index 53fbf0b2f86fe4..95e46a4e71dab7 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll
@@ -35,9 +35,9 @@ define void @v2f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_oeq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.ceq.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.ceq.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -51,9 +51,9 @@ define void @v4f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_oeq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.ceq.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.ceq.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -68,9 +68,9 @@ define void @v2f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_ueq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cueq.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cueq.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -84,9 +84,9 @@ define void @v4f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_ueq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cueq.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cueq.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -101,9 +101,9 @@ define void @v2f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.ceq.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.ceq.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -117,9 +117,9 @@ define void @v4f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.ceq.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.ceq.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -134,9 +134,9 @@ define void @v2f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_ole:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cle.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cle.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -150,9 +150,9 @@ define void @v4f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_ole:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cle.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cle.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -167,9 +167,9 @@ define void @v2f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cule.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cule.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -183,9 +183,9 @@ define void @v4f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cule.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cule.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -200,9 +200,9 @@ define void @v2f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_le:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cle.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cle.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -216,9 +216,9 @@ define void @v4f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_le:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cle.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cle.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -233,9 +233,9 @@ define void @v2f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_olt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.clt.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.clt.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -249,9 +249,9 @@ define void @v4f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_olt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.clt.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.clt.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -266,9 +266,9 @@ define void @v2f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cult.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cult.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -282,9 +282,9 @@ define void @v4f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cult.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cult.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -299,9 +299,9 @@ define void @v2f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_lt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.clt.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.clt.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -315,9 +315,9 @@ define void @v4f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_lt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.clt.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.clt.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -332,9 +332,9 @@ define void @v2f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_one:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cne.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cne.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -348,9 +348,9 @@ define void @v4f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_one:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cne.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cne.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -365,9 +365,9 @@ define void @v2f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_une:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cune.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cune.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -381,9 +381,9 @@ define void @v4f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_une:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cune.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cune.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -398,9 +398,9 @@ define void @v2f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cne.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cne.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -414,9 +414,9 @@ define void @v4f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cne.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cne.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -431,9 +431,9 @@ define void @v2f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_ord:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cor.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cor.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -447,9 +447,9 @@ define void @v4f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_ord:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cor.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cor.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -464,9 +464,9 @@ define void @v2f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_uno:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cun.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cun.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -480,9 +480,9 @@ define void @v4f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_uno:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfcmp.cun.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfcmp.cun.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll
index 5f1ee9e4d212eb..3b9642e31b02d4 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll
@@ -4,9 +4,9 @@
 define void @fdiv_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fdiv_v4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfdiv.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfdiv.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fdiv_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fdiv_v2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfdiv.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfdiv.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll
index e7fb527f7805e8..f604a8962958dc 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll
@@ -4,9 +4,9 @@
 define void @fmul_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fmul_v4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfmul.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfmul.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fmul_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fmul_v2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfmul.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfmul.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll
index df98182321dab9..02350c0763baee 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll
@@ -4,9 +4,9 @@
 define void @fsub_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fsub_v4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfsub.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfsub.s $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fsub_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fsub_v2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vfsub.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vfsub.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll
index 448f3fa6c6e0e4..04b4831f1188c2 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll
@@ -19,9 +19,9 @@ define void @v16i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i8_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vseq.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vseq.b $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i8>, ptr %a0
@@ -49,9 +49,9 @@ define void @v8i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i16_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vseq.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vseq.h $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i16>, ptr %a0
@@ -79,9 +79,9 @@ define void @v4i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i32_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vseq.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vseq.w $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i32>, ptr %a0
@@ -109,9 +109,9 @@ define void @v2i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v2i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2i64_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vseq.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vseq.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x i64>, ptr %a0
@@ -140,9 +140,9 @@ define void @v16i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i8_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsle.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsle.b $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i8>, ptr %a0
@@ -170,9 +170,9 @@ define void @v8i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i16_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsle.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsle.h $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i16>, ptr %a0
@@ -200,9 +200,9 @@ define void @v4i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i32_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsle.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsle.w $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i32>, ptr %a0
@@ -230,9 +230,9 @@ define void @v2i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v2i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2i64_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsle.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsle.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x i64>, ptr %a0
@@ -261,9 +261,9 @@ define void @v16i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i8_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsle.bu $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsle.bu $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i8>, ptr %a0
@@ -291,9 +291,9 @@ define void @v8i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i16_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsle.hu $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsle.hu $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i16>, ptr %a0
@@ -321,9 +321,9 @@ define void @v4i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i32_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsle.wu $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsle.wu $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i32>, ptr %a0
@@ -351,9 +351,9 @@ define void @v2i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v2i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2i64_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsle.du $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsle.du $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x i64>, ptr %a0
@@ -382,9 +382,9 @@ define void @v16i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i8_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vslt.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vslt.b $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i8>, ptr %a0
@@ -412,9 +412,9 @@ define void @v8i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i16_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vslt.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vslt.h $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i16>, ptr %a0
@@ -442,9 +442,9 @@ define void @v4i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i32_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vslt.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vslt.w $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i32>, ptr %a0
@@ -472,9 +472,9 @@ define void @v2i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v2i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2i64_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vslt.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vslt.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x i64>, ptr %a0
@@ -503,9 +503,9 @@ define void @v16i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i8_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vslt.bu $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vslt.bu $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i8>, ptr %a0
@@ -533,9 +533,9 @@ define void @v8i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i16_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vslt.hu $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vslt.hu $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i16>, ptr %a0
@@ -563,9 +563,9 @@ define void @v4i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i32_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vslt.wu $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vslt.wu $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i32>, ptr %a0
@@ -593,9 +593,9 @@ define void @v2i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v2i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2i64_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vslt.du $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vslt.du $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x i64>, ptr %a0
@@ -610,9 +610,9 @@ define void @v2i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v16i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i8_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vseq.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vseq.b $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vxori.b $vr0, $vr0, 255
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
@@ -627,9 +627,9 @@ define void @v16i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i16_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vseq.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vseq.h $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vrepli.b $vr1, -1
 ; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
@@ -645,9 +645,9 @@ define void @v8i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i32_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vseq.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vseq.w $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vrepli.b $vr1, -1
 ; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
@@ -663,9 +663,9 @@ define void @v4i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2i64_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vseq.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vseq.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vrepli.b $vr1, -1
 ; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll
index a9834591aa0e85..7f232073ae129c 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll
@@ -56,10 +56,10 @@ define void @insert_2xi64(ptr %src, ptr %dst, i64 %ins) nounwind {
 define void @insert_4xfloat(ptr %src, ptr %dst, float %ins) nounwind {
 ; CHECK-LABEL: insert_4xfloat:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movfr2gr.s $a2, $fa0
-; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vinsgr2vr.w $vr0, $a2, 1
-; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a0, 0
+; CHECK-NEXT:    movfr2gr.s $a0, $fa0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; CHECK-NEXT:    vst $vr1, $a1, 0
 ; CHECK-NEXT:    ret
   %v = load volatile <4 x float>, ptr %src
   %v_new = insertelement <4 x float> %v, float %ins, i32 1
@@ -70,10 +70,10 @@ define void @insert_4xfloat(ptr %src, ptr %dst, float %ins) nounwind {
 define void @insert_2xdouble(ptr %src, ptr %dst, double %ins) nounwind {
 ; CHECK-LABEL: insert_2xdouble:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movfr2gr.d $a2, $fa0
-; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vinsgr2vr.d $vr0, $a2, 1
-; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a0, 0
+; CHECK-NEXT:    movfr2gr.d $a0, $fa0
+; CHECK-NEXT:    vinsgr2vr.d $vr1, $a0, 1
+; CHECK-NEXT:    vst $vr1, $a1, 0
 ; CHECK-NEXT:    ret
   %v = load volatile <2 x double>, ptr %src
   %v_new = insertelement <2 x double> %v, double %ins, i32 1
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll
index dada52f93060e1..2693310b4f5089 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll
@@ -4,9 +4,9 @@
 define void @lshr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsrl.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsrl.b $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @lshr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsrl.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsrl.h $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @lshr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsrl.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsrl.w $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @lshr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsrl.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsrl.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll
index d0be9cb7e3c8bd..f66cae6a180274 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll
@@ -4,9 +4,9 @@
 define void @mul_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mul_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vmul.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vmul.b $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @mul_v8i16(ptr %res, ptr %a0, ptr %a1)  nounwind {
 ; CHECK-LABEL: mul_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vmul.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vmul.h $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @mul_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mul_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vmul.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vmul.w $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @mul_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mul_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vmul.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vmul.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll
index f124512acce73d..89702e60c01f5b 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll
@@ -4,9 +4,9 @@
 define void @or_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @or_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @or_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @or_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll
index b68f73a749135d..cdff58defdaeaa 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll
@@ -4,9 +4,9 @@
 define void @sdiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vdiv.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vdiv.b $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @sdiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vdiv.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vdiv.h $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @sdiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vdiv.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vdiv.w $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @sdiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vdiv.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vdiv.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll
index fa0aebaf28b3c5..4b34c04f3374bb 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll
@@ -4,9 +4,9 @@
 define void @shl_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsll.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsll.b $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @shl_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsll.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsll.h $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @shl_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsll.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsll.w $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @shl_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsll.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsll.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll
index 25b4623a47d1fc..2813d9c97e680a 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll
@@ -4,9 +4,9 @@
 define void @sub_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsub.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsub.b $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @sub_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsub.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsub.h $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @sub_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsub.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsub.w $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @sub_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vsub.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vsub.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll
index abb60b91dd488f..32dac67d36a814 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll
@@ -4,9 +4,9 @@
 define void @udiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vdiv.bu $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vdiv.bu $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @udiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vdiv.hu $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vdiv.hu $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @udiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vdiv.wu $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vdiv.wu $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @udiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vdiv.du $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vdiv.du $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll
index ce3e49c990ffb0..482cecb1d7522f 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll
@@ -4,9 +4,9 @@
 define void @xor_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vxor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @xor_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vxor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @xor_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vxor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @xor_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vxor.v $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/mulh.ll b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll
index e1388f00e355fb..b0ca556eeff36d 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/mulh.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll
@@ -4,9 +4,9 @@
 define void @mulhs_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vmuh.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vmuh.b $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -24,9 +24,9 @@ entry:
 define void @mulhu_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vmuh.bu $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vmuh.bu $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -44,9 +44,9 @@ entry:
 define void @mulhs_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vmuh.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vmuh.h $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -64,9 +64,9 @@ entry:
 define void @mulhu_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vmuh.hu $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vmuh.hu $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -84,9 +84,9 @@ entry:
 define void @mulhs_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vmuh.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vmuh.w $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -104,9 +104,9 @@ entry:
 define void @mulhu_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vmuh.wu $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vmuh.wu $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -124,9 +124,9 @@ entry:
 define void @mulhs_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vmuh.d $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vmuh.d $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -144,9 +144,9 @@ entry:
 define void @mulhu_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a2, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vmuh.du $vr0, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    vmuh.du $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
index 97a55532907b83..48ef3c14a4bf51 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
@@ -34,11 +34,11 @@ define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: select_v8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lu12i.w $a3, -16
-; CHECK-NEXT:    vreplgr2vr.w $vr0, $a3
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vld $vr2, $a2, 0
-; CHECK-NEXT:    vbitsel.v $vr0, $vr2, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    lu12i.w $a1, -16
+; CHECK-NEXT:    vreplgr2vr.w $vr2, $a1
+; CHECK-NEXT:    vbitsel.v $vr0, $vr1, $vr0, $vr2
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i16>, ptr %a0
@@ -69,12 +69,12 @@ define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: select_v2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a3, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT:    addi.d $a3, $a3, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT:    vld $vr0, $a3, 0
-; CHECK-NEXT:    vld $vr1, $a1, 0
-; CHECK-NEXT:    vld $vr2, $a2, 0
-; CHECK-NEXT:    vbitsel.v $vr0, $vr2, $vr1, $vr0
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI4_0)
+; CHECK-NEXT:    addi.d $a1, $a1, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT:    vld $vr2, $a1, 0
+; CHECK-NEXT:    vbitsel.v $vr0, $vr1, $vr0, $vr2
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x i64>, ptr %a0
diff --git a/llvm/test/CodeGen/LoongArch/preferred-alignments.ll b/llvm/test/CodeGen/LoongArch/preferred-alignments.ll
index 6dac525874ec3a..2b6a109228a612 100644
--- a/llvm/test/CodeGen/LoongArch/preferred-alignments.ll
+++ b/llvm/test/CodeGen/LoongArch/preferred-alignments.ll
@@ -8,21 +8,21 @@ define signext i32 @sum(ptr noalias nocapture noundef readonly %0, i32 noundef s
 ; LA464-NEXT:    ori $a2, $zero, 1
 ; LA464-NEXT:    blt $a1, $a2, .LBB0_4
 ; LA464-NEXT:  # %bb.1:
-; LA464-NEXT:    bstrpick.d $a2, $a1, 31, 0
-; LA464-NEXT:    move $a1, $zero
+; LA464-NEXT:    move $a2, $zero
+; LA464-NEXT:    bstrpick.d $a1, $a1, 31, 0
 ; LA464-NEXT:    .p2align 4, , 16
 ; LA464-NEXT:  .LBB0_2: # =>This Inner Loop Header: Depth=1
 ; LA464-NEXT:    ld.w $a3, $a0, 0
-; LA464-NEXT:    add.d $a1, $a3, $a1
+; LA464-NEXT:    add.d $a2, $a3, $a2
+; LA464-NEXT:    addi.d $a1, $a1, -1
 ; LA464-NEXT:    addi.d $a0, $a0, 4
-; LA464-NEXT:    addi.d $a2, $a2, -1
-; LA464-NEXT:    bnez $a2, .LBB0_2
+; LA464-NEXT:    bnez $a1, .LBB0_2
 ; LA464-NEXT:  # %bb.3:
-; LA464-NEXT:    addi.w $a0, $a1, 0
+; LA464-NEXT:    addi.w $a0, $a2, 0
 ; LA464-NEXT:    ret
 ; LA464-NEXT:  .LBB0_4:
-; LA464-NEXT:    move $a1, $zero
-; LA464-NEXT:    addi.w $a0, $a1, 0
+; LA464-NEXT:    move $a2, $zero
+; LA464-NEXT:    addi.w $a0, $a2, 0
 ; LA464-NEXT:    ret
   %3 = icmp sgt i32 %1, 0
   br i1 %3, label %4, label %6
diff --git a/llvm/test/CodeGen/LoongArch/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll
index b9a6ebdcdd22cc..b067eb9cfa924a 100644
--- a/llvm/test/CodeGen/LoongArch/rotl-rotr.ll
+++ b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll
@@ -14,10 +14,10 @@ define i32 @rotl_32(i32 %x, i32 %y) nounwind {
 ;
 ; LA64-LABEL: rotl_32:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    sub.d $a2, $zero, $a1
-; LA64-NEXT:    sll.w $a1, $a0, $a1
-; LA64-NEXT:    srl.w $a0, $a0, $a2
-; LA64-NEXT:    or $a0, $a1, $a0
+; LA64-NEXT:    sll.w $a2, $a0, $a1
+; LA64-NEXT:    sub.d $a1, $zero, $a1
+; LA64-NEXT:    srl.w $a0, $a0, $a1
+; LA64-NEXT:    or $a0, $a2, $a0
 ; LA64-NEXT:    ret
   %z = sub i32 32, %y
   %b = shl i32 %x, %y
@@ -46,39 +46,39 @@ define i32 @rotr_32(i32 %x, i32 %y) nounwind {
 define i64 @rotl_64(i64 %x, i64 %y) nounwind {
 ; LA32-LABEL: rotl_64:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    xori $a3, $a2, 31
-; LA32-NEXT:    srli.w $a4, $a0, 1
-; LA32-NEXT:    srl.w $a3, $a4, $a3
-; LA32-NEXT:    sll.w $a4, $a1, $a2
-; LA32-NEXT:    or $a3, $a4, $a3
+; LA32-NEXT:    sll.w $a3, $a1, $a2
+; LA32-NEXT:    xori $a4, $a2, 31
+; LA32-NEXT:    srli.w $a5, $a0, 1
+; LA32-NEXT:    srl.w $a4, $a5, $a4
+; LA32-NEXT:    or $a3, $a3, $a4
 ; LA32-NEXT:    addi.w $a4, $a2, -32
 ; LA32-NEXT:    slti $a5, $a4, 0
 ; LA32-NEXT:    maskeqz $a3, $a3, $a5
 ; LA32-NEXT:    sll.w $a6, $a0, $a4
 ; LA32-NEXT:    masknez $a5, $a6, $a5
 ; LA32-NEXT:    or $a3, $a3, $a5
+; LA32-NEXT:    sll.w $a5, $a0, $a2
+; LA32-NEXT:    srai.w $a4, $a4, 31
+; LA32-NEXT:    and $a4, $a4, $a5
+; LA32-NEXT:    sub.w $a5, $zero, $a2
+; LA32-NEXT:    srl.w $a6, $a1, $a5
+; LA32-NEXT:    ori $a7, $zero, 32
+; LA32-NEXT:    sub.w $a7, $a7, $a2
+; LA32-NEXT:    slti $t0, $a7, 0
+; LA32-NEXT:    masknez $t1, $a6, $t0
+; LA32-NEXT:    srl.w $a0, $a0, $a5
 ; LA32-NEXT:    ori $a5, $zero, 64
-; LA32-NEXT:    sub.w $a5, $a5, $a2
-; LA32-NEXT:    xori $a5, $a5, 31
-; LA32-NEXT:    slli.w $a6, $a1, 1
-; LA32-NEXT:    sll.w $a5, $a6, $a5
-; LA32-NEXT:    sub.w $a6, $zero, $a2
-; LA32-NEXT:    srl.w $a7, $a1, $a6
-; LA32-NEXT:    ori $a1, $zero, 32
-; LA32-NEXT:    sub.w $t0, $a1, $a2
-; LA32-NEXT:    srai.w $a1, $t0, 31
-; LA32-NEXT:    and $a1, $a1, $a7
+; LA32-NEXT:    sub.w $a2, $a5, $a2
+; LA32-NEXT:    xori $a2, $a2, 31
+; LA32-NEXT:    slli.w $a1, $a1, 1
+; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    or $a0, $a0, $a1
+; LA32-NEXT:    maskeqz $a0, $a0, $t0
+; LA32-NEXT:    or $a0, $a0, $t1
+; LA32-NEXT:    srai.w $a1, $a7, 31
+; LA32-NEXT:    and $a1, $a1, $a6
 ; LA32-NEXT:    or $a1, $a3, $a1
-; LA32-NEXT:    srl.w $a3, $a0, $a6
-; LA32-NEXT:    or $a3, $a3, $a5
-; LA32-NEXT:    slti $a5, $t0, 0
-; LA32-NEXT:    masknez $a6, $a7, $a5
-; LA32-NEXT:    maskeqz $a3, $a3, $a5
-; LA32-NEXT:    or $a3, $a3, $a6
-; LA32-NEXT:    sll.w $a0, $a0, $a2
-; LA32-NEXT:    srai.w $a2, $a4, 31
-; LA32-NEXT:    and $a0, $a2, $a0
-; LA32-NEXT:    or $a0, $a0, $a3
+; LA32-NEXT:    or $a0, $a4, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: rotl_64:
@@ -97,39 +97,39 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind {
 define i64 @rotr_64(i64 %x, i64 %y) nounwind {
 ; LA32-LABEL: rotr_64:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    xori $a3, $a2, 31
-; LA32-NEXT:    slli.w $a4, $a1, 1
-; LA32-NEXT:    sll.w $a3, $a4, $a3
-; LA32-NEXT:    srl.w $a4, $a0, $a2
-; LA32-NEXT:    or $a3, $a4, $a3
+; LA32-NEXT:    srl.w $a3, $a0, $a2
+; LA32-NEXT:    xori $a4, $a2, 31
+; LA32-NEXT:    slli.w $a5, $a1, 1
+; LA32-NEXT:    sll.w $a4, $a5, $a4
+; LA32-NEXT:    or $a3, $a3, $a4
 ; LA32-NEXT:    addi.w $a4, $a2, -32
 ; LA32-NEXT:    slti $a5, $a4, 0
 ; LA32-NEXT:    maskeqz $a3, $a3, $a5
 ; LA32-NEXT:    srl.w $a6, $a1, $a4
 ; LA32-NEXT:    masknez $a5, $a6, $a5
 ; LA32-NEXT:    or $a3, $a3, $a5
+; LA32-NEXT:    srl.w $a5, $a1, $a2
+; LA32-NEXT:    srai.w $a4, $a4, 31
+; LA32-NEXT:    and $a4, $a4, $a5
+; LA32-NEXT:    sub.w $a5, $zero, $a2
+; LA32-NEXT:    sll.w $a6, $a0, $a5
+; LA32-NEXT:    ori $a7, $zero, 32
+; LA32-NEXT:    sub.w $a7, $a7, $a2
+; LA32-NEXT:    slti $t0, $a7, 0
+; LA32-NEXT:    masknez $t1, $a6, $t0
+; LA32-NEXT:    sll.w $a1, $a1, $a5
 ; LA32-NEXT:    ori $a5, $zero, 64
-; LA32-NEXT:    sub.w $a5, $a5, $a2
-; LA32-NEXT:    xori $a5, $a5, 31
-; LA32-NEXT:    srli.w $a6, $a0, 1
-; LA32-NEXT:    srl.w $a5, $a6, $a5
-; LA32-NEXT:    sub.w $a6, $zero, $a2
-; LA32-NEXT:    sll.w $a7, $a0, $a6
-; LA32-NEXT:    ori $a0, $zero, 32
-; LA32-NEXT:    sub.w $t0, $a0, $a2
-; LA32-NEXT:    srai.w $a0, $t0, 31
-; LA32-NEXT:    and $a0, $a0, $a7
+; LA32-NEXT:    sub.w $a2, $a5, $a2
+; LA32-NEXT:    xori $a2, $a2, 31
+; LA32-NEXT:    srli.w $a0, $a0, 1
+; LA32-NEXT:    srl.w $a0, $a0, $a2
+; LA32-NEXT:    or $a0, $a1, $a0
+; LA32-NEXT:    maskeqz $a0, $a0, $t0
+; LA32-NEXT:    or $a1, $a0, $t1
+; LA32-NEXT:    srai.w $a0, $a7, 31
+; LA32-NEXT:    and $a0, $a0, $a6
 ; LA32-NEXT:    or $a0, $a3, $a0
-; LA32-NEXT:    sll.w $a3, $a1, $a6
-; LA32-NEXT:    or $a3, $a3, $a5
-; LA32-NEXT:    slti $a5, $t0, 0
-; LA32-NEXT:    masknez $a6, $a7, $a5
-; LA32-NEXT:    maskeqz $a3, $a3, $a5
-; LA32-NEXT:    or $a3, $a3, $a6
-; LA32-NEXT:    srl.w $a1, $a1, $a2
-; LA32-NEXT:    srai.w $a2, $a4, 31
-; LA32-NEXT:    and $a1, $a2, $a1
-; LA32-NEXT:    or $a1, $a1, $a3
+; LA32-NEXT:    or $a1, $a4, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: rotr_64:
@@ -152,10 +152,10 @@ define i32 @rotl_32_mask(i32 %x, i32 %y) nounwind {
 ;
 ; LA64-LABEL: rotl_32_mask:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    sub.d $a2, $zero, $a1
-; LA64-NEXT:    sll.w $a1, $a0, $a1
-; LA64-NEXT:    srl.w $a0, $a0, $a2
-; LA64-NEXT:    or $a0, $a1, $a0
+; LA64-NEXT:    sll.w $a2, $a0, $a1
+; LA64-NEXT:    sub.d $a1, $zero, $a1
+; LA64-NEXT:    srl.w $a0, $a0, $a1
+; LA64-NEXT:    or $a0, $a2, $a0
 ; LA64-NEXT:    ret
   %z = sub i32 0, %y
   %and = and i32 %z, 31
@@ -174,10 +174,10 @@ define i32 @rotl_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
 ;
 ; LA64-LABEL: rotl_32_mask_and_63_and_31:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    sub.d $a2, $zero, $a1
-; LA64-NEXT:    sll.w $a1, $a0, $a1
-; LA64-NEXT:    srl.w $a0, $a0, $a2
-; LA64-NEXT:    or $a0, $a1, $a0
+; LA64-NEXT:    sll.w $a2, $a0, $a1
+; LA64-NEXT:    sub.d $a1, $zero, $a1
+; LA64-NEXT:    srl.w $a0, $a0, $a1
+; LA64-NEXT:    or $a0, $a2, $a0
 ; LA64-NEXT:    ret
   %a = and i32 %y, 63
   %b = shl i32 %x, %a
@@ -197,10 +197,10 @@ define i32 @rotl_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind {
 ;
 ; LA64-LABEL: rotl_32_mask_or_64_or_32:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    sub.d $a2, $zero, $a1
-; LA64-NEXT:    sll.w $a1, $a0, $a1
-; LA64-NEXT:    srl.w $a0, $a0, $a2
-; LA64-NEXT:    or $a0, $a1, $a0
+; LA64-NEXT:    sll.w $a2, $a0, $a1
+; LA64-NEXT:    sub.d $a1, $zero, $a1
+; LA64-NEXT:    srl.w $a0, $a0, $a1
+; LA64-NEXT:    or $a0, $a2, $a0
 ; LA64-NEXT:    ret
   %a = or i32 %y, 64
   %b = shl i32 %x, %a
@@ -270,39 +270,38 @@ define i32 @rotr_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind {
 define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind {
 ; LA32-LABEL: rotl_64_mask:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    xori $a3, $a2, 31
-; LA32-NEXT:    srli.w $a4, $a0, 1
-; LA32-NEXT:    srl.w $a3, $a4, $a3
-; LA32-NEXT:    sll.w $a4, $a1, $a2
-; LA32-NEXT:    or $a3, $a4, $a3
-; LA32-NEXT:    sub.w $a4, $zero, $a2
-; LA32-NEXT:    srl.w $a5, $a1, $a4
-; LA32-NEXT:    andi $a6, $a4, 63
-; LA32-NEXT:    addi.w $a7, $a6, -32
-; LA32-NEXT:    srai.w $t0, $a7, 31
-; LA32-NEXT:    and $a5, $t0, $a5
-; LA32-NEXT:    addi.w $t0, $a2, -32
-; LA32-NEXT:    slti $t1, $t0, 0
-; LA32-NEXT:    maskeqz $a3, $a3, $t1
-; LA32-NEXT:    sll.w $t2, $a0, $t0
-; LA32-NEXT:    masknez $t1, $t2, $t1
-; LA32-NEXT:    or $a3, $a3, $t1
-; LA32-NEXT:    xori $a6, $a6, 31
-; LA32-NEXT:    slli.w $t1, $a1, 1
-; LA32-NEXT:    sll.w $a6, $t1, $a6
+; LA32-NEXT:    sll.w $a3, $a1, $a2
+; LA32-NEXT:    xori $a4, $a2, 31
+; LA32-NEXT:    srli.w $a5, $a0, 1
+; LA32-NEXT:    srl.w $a4, $a5, $a4
+; LA32-NEXT:    or $a3, $a3, $a4
+; LA32-NEXT:    addi.w $a4, $a2, -32
+; LA32-NEXT:    slti $a5, $a4, 0
+; LA32-NEXT:    maskeqz $a3, $a3, $a5
+; LA32-NEXT:    sll.w $a6, $a0, $a4
+; LA32-NEXT:    masknez $a5, $a6, $a5
 ; LA32-NEXT:    or $a3, $a3, $a5
-; LA32-NEXT:    srl.w $a4, $a0, $a4
-; LA32-NEXT:    or $a4, $a4, $a6
-; LA32-NEXT:    srl.w $a1, $a1, $a7
-; LA32-NEXT:    slti $a5, $a7, 0
-; LA32-NEXT:    masknez $a1, $a1, $a5
-; LA32-NEXT:    maskeqz $a4, $a4, $a5
-; LA32-NEXT:    or $a1, $a4, $a1
-; LA32-NEXT:    sll.w $a0, $a0, $a2
-; LA32-NEXT:    srai.w $a2, $t0, 31
-; LA32-NEXT:    and $a0, $a2, $a0
-; LA32-NEXT:    or $a0, $a0, $a1
-; LA32-NEXT:    move $a1, $a3
+; LA32-NEXT:    sll.w $a5, $a0, $a2
+; LA32-NEXT:    srai.w $a4, $a4, 31
+; LA32-NEXT:    and $a4, $a4, $a5
+; LA32-NEXT:    sub.w $a2, $zero, $a2
+; LA32-NEXT:    andi $a5, $a2, 63
+; LA32-NEXT:    addi.w $a6, $a5, -32
+; LA32-NEXT:    srl.w $a7, $a1, $a6
+; LA32-NEXT:    slti $t0, $a6, 0
+; LA32-NEXT:    masknez $a7, $a7, $t0
+; LA32-NEXT:    srl.w $a0, $a0, $a2
+; LA32-NEXT:    xori $a5, $a5, 31
+; LA32-NEXT:    slli.w $t1, $a1, 1
+; LA32-NEXT:    sll.w $a5, $t1, $a5
+; LA32-NEXT:    or $a0, $a0, $a5
+; LA32-NEXT:    maskeqz $a0, $a0, $t0
+; LA32-NEXT:    or $a0, $a0, $a7
+; LA32-NEXT:    srl.w $a1, $a1, $a2
+; LA32-NEXT:    srai.w $a2, $a6, 31
+; LA32-NEXT:    and $a1, $a2, $a1
+; LA32-NEXT:    or $a1, $a3, $a1
+; LA32-NEXT:    or $a0, $a4, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: rotl_64_mask:
@@ -321,40 +320,39 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind {
 define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
 ; LA32-LABEL: rotl_64_mask_and_127_and_63:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a3, $a0, 1
-; LA32-NEXT:    andi $a4, $a2, 127
-; LA32-NEXT:    xori $a5, $a4, 31
-; LA32-NEXT:    srl.w $a3, $a3, $a5
-; LA32-NEXT:    sll.w $a5, $a1, $a2
-; LA32-NEXT:    or $a3, $a5, $a3
-; LA32-NEXT:    sub.w $a5, $zero, $a2
-; LA32-NEXT:    srl.w $a6, $a1, $a5
-; LA32-NEXT:    andi $a7, $a5, 63
-; LA32-NEXT:    addi.w $t0, $a7, -32
-; LA32-NEXT:    srai.w $t1, $t0, 31
-; LA32-NEXT:    and $a6, $t1, $a6
-; LA32-NEXT:    addi.w $a4, $a4, -32
-; LA32-NEXT:    slti $t1, $a4, 0
-; LA32-NEXT:    maskeqz $a3, $a3, $t1
-; LA32-NEXT:    sll.w $t2, $a0, $a4
-; LA32-NEXT:    masknez $t1, $t2, $t1
-; LA32-NEXT:    or $a3, $a3, $t1
-; LA32-NEXT:    xori $a7, $a7, 31
+; LA32-NEXT:    sll.w $a3, $a1, $a2
+; LA32-NEXT:    srli.w $a4, $a0, 1
+; LA32-NEXT:    andi $a5, $a2, 127
+; LA32-NEXT:    xori $a6, $a5, 31
+; LA32-NEXT:    srl.w $a4, $a4, $a6
+; LA32-NEXT:    or $a3, $a3, $a4
+; LA32-NEXT:    addi.w $a4, $a5, -32
+; LA32-NEXT:    slti $a5, $a4, 0
+; LA32-NEXT:    maskeqz $a3, $a3, $a5
+; LA32-NEXT:    sll.w $a6, $a0, $a4
+; LA32-NEXT:    masknez $a5, $a6, $a5
+; LA32-NEXT:    or $a3, $a3, $a5
+; LA32-NEXT:    sll.w $a5, $a0, $a2
+; LA32-NEXT:    srai.w $a4, $a4, 31
+; LA32-NEXT:    and $a4, $a4, $a5
+; LA32-NEXT:    sub.w $a2, $zero, $a2
+; LA32-NEXT:    andi $a5, $a2, 63
+; LA32-NEXT:    addi.w $a6, $a5, -32
+; LA32-NEXT:    srl.w $a7, $a1, $a6
+; LA32-NEXT:    slti $t0, $a6, 0
+; LA32-NEXT:    masknez $a7, $a7, $t0
+; LA32-NEXT:    srl.w $a0, $a0, $a2
+; LA32-NEXT:    xori $a5, $a5, 31
 ; LA32-NEXT:    slli.w $t1, $a1, 1
-; LA32-NEXT:    sll.w $a7, $t1, $a7
-; LA32-NEXT:    or $a3, $a3, $a6
-; LA32-NEXT:    srl.w $a5, $a0, $a5
-; LA32-NEXT:    or $a5, $a5, $a7
-; LA32-NEXT:    srl.w $a1, $a1, $t0
-; LA32-NEXT:    slti $a6, $t0, 0
-; LA32-NEXT:    masknez $a1, $a1, $a6
-; LA32-NEXT:    maskeqz $a5, $a5, $a6
-; LA32-NEXT:    or $a1, $a5, $a1
-; LA32-NEXT:    sll.w $a0, $a0, $a2
-; LA32-NEXT:    srai.w $a2, $a4, 31
-; LA32-NEXT:    and $a0, $a2, $a0
-; LA32-NEXT:    or $a0, $a0, $a1
-; LA32-NEXT:    move $a1, $a3
+; LA32-NEXT:    sll.w $a5, $t1, $a5
+; LA32-NEXT:    or $a0, $a0, $a5
+; LA32-NEXT:    maskeqz $a0, $a0, $t0
+; LA32-NEXT:    or $a0, $a0, $a7
+; LA32-NEXT:    srl.w $a1, $a1, $a2
+; LA32-NEXT:    srai.w $a2, $a6, 31
+; LA32-NEXT:    and $a1, $a2, $a1
+; LA32-NEXT:    or $a1, $a3, $a1
+; LA32-NEXT:    or $a0, $a4, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: rotl_64_mask_and_127_and_63:
@@ -395,39 +393,38 @@ define i64 @rotl_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind {
 define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
 ; LA32-LABEL: rotr_64_mask:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    xori $a3, $a2, 31
-; LA32-NEXT:    slli.w $a4, $a1, 1
-; LA32-NEXT:    sll.w $a3, $a4, $a3
-; LA32-NEXT:    srl.w $a4, $a0, $a2
-; LA32-NEXT:    or $a3, $a4, $a3
-; LA32-NEXT:    sub.w $a4, $zero, $a2
-; LA32-NEXT:    sll.w $a5, $a0, $a4
-; LA32-NEXT:    andi $a6, $a4, 63
-; LA32-NEXT:    addi.w $a7, $a6, -32
-; LA32-NEXT:    srai.w $t0, $a7, 31
-; LA32-NEXT:    and $a5, $t0, $a5
-; LA32-NEXT:    addi.w $t0, $a2, -32
-; LA32-NEXT:    slti $t1, $t0, 0
-; LA32-NEXT:    maskeqz $a3, $a3, $t1
-; LA32-NEXT:    srl.w $t2, $a1, $t0
-; LA32-NEXT:    masknez $t1, $t2, $t1
-; LA32-NEXT:    or $a3, $a3, $t1
-; LA32-NEXT:    xori $a6, $a6, 31
-; LA32-NEXT:    srli.w $t1, $a0, 1
-; LA32-NEXT:    srl.w $a6, $t1, $a6
+; LA32-NEXT:    srl.w $a3, $a0, $a2
+; LA32-NEXT:    xori $a4, $a2, 31
+; LA32-NEXT:    slli.w $a5, $a1, 1
+; LA32-NEXT:    sll.w $a4, $a5, $a4
+; LA32-NEXT:    or $a3, $a3, $a4
+; LA32-NEXT:    addi.w $a4, $a2, -32
+; LA32-NEXT:    slti $a5, $a4, 0
+; LA32-NEXT:    maskeqz $a3, $a3, $a5
+; LA32-NEXT:    srl.w $a6, $a1, $a4
+; LA32-NEXT:    masknez $a5, $a6, $a5
 ; LA32-NEXT:    or $a3, $a3, $a5
-; LA32-NEXT:    sll.w $a4, $a1, $a4
-; LA32-NEXT:    or $a4, $a4, $a6
-; LA32-NEXT:    sll.w $a0, $a0, $a7
-; LA32-NEXT:    slti $a5, $a7, 0
-; LA32-NEXT:    masknez $a0, $a0, $a5
-; LA32-NEXT:    maskeqz $a4, $a4, $a5
-; LA32-NEXT:    or $a0, $a4, $a0
-; LA32-NEXT:    srl.w $a1, $a1, $a2
-; LA32-NEXT:    srai.w $a2, $t0, 31
-; LA32-NEXT:    and $a1, $a2, $a1
-; LA32-NEXT:    or $a1, $a1, $a0
-; LA32-NEXT:    move $a0, $a3
+; LA32-NEXT:    srl.w $a5, $a1, $a2
+; LA32-NEXT:    srai.w $a4, $a4, 31
+; LA32-NEXT:    and $a4, $a4, $a5
+; LA32-NEXT:    sub.w $a2, $zero, $a2
+; LA32-NEXT:    andi $a5, $a2, 63
+; LA32-NEXT:    addi.w $a6, $a5, -32
+; LA32-NEXT:    sll.w $a7, $a0, $a6
+; LA32-NEXT:    slti $t0, $a6, 0
+; LA32-NEXT:    masknez $a7, $a7, $t0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    xori $a5, $a5, 31
+; LA32-NEXT:    srli.w $t1, $a0, 1
+; LA32-NEXT:    srl.w $a5, $t1, $a5
+; LA32-NEXT:    or $a1, $a1, $a5
+; LA32-NEXT:    maskeqz $a1, $a1, $t0
+; LA32-NEXT:    or $a1, $a1, $a7
+; LA32-NEXT:    sll.w $a0, $a0, $a2
+; LA32-NEXT:    srai.w $a2, $a6, 31
+; LA32-NEXT:    and $a0, $a2, $a0
+; LA32-NEXT:    or $a0, $a3, $a0
+; LA32-NEXT:    or $a1, $a4, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: rotr_64_mask:
@@ -445,40 +442,39 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
 define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
 ; LA32-LABEL: rotr_64_mask_and_127_and_63:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    slli.w $a3, $a1, 1
-; LA32-NEXT:    andi $a4, $a2, 127
-; LA32-NEXT:    xori $a5, $a4, 31
-; LA32-NEXT:    sll.w $a3, $a3, $a5
-; LA32-NEXT:    srl.w $a5, $a0, $a2
-; LA32-NEXT:    or $a3, $a5, $a3
-; LA32-NEXT:    sub.w $a5, $zero, $a2
-; LA32-NEXT:    sll.w $a6, $a0, $a5
-; LA32-NEXT:    andi $a7, $a5, 63
-; LA32-NEXT:    addi.w $t0, $a7, -32
-; LA32-NEXT:    srai.w $t1, $t0, 31
-; LA32-NEXT:    and $a6, $t1, $a6
-; LA32-NEXT:    addi.w $a4, $a4, -32
-; LA32-NEXT:    slti $t1, $a4, 0
-; LA32-NEXT:    maskeqz $a3, $a3, $t1
-; LA32-NEXT:    srl.w $t2, $a1, $a4
-; LA32-NEXT:    masknez $t1, $t2, $t1
-; LA32-NEXT:    or $a3, $a3, $t1
-; LA32-NEXT:    xori $a7, $a7, 31
+; LA32-NEXT:    srl.w $a3, $a0, $a2
+; LA32-NEXT:    slli.w $a4, $a1, 1
+; LA32-NEXT:    andi $a5, $a2, 127
+; LA32-NEXT:    xori $a6, $a5, 31
+; LA32-NEXT:    sll.w $a4, $a4, $a6
+; LA32-NEXT:    or $a3, $a3, $a4
+; LA32-NEXT:    addi.w $a4, $a5, -32
+; LA32-NEXT:    slti $a5, $a4, 0
+; LA32-NEXT:    maskeqz $a3, $a3, $a5
+; LA32-NEXT:    srl.w $a6, $a1, $a4
+; LA32-NEXT:    masknez $a5, $a6, $a5
+; LA32-NEXT:    or $a3, $a3, $a5
+; LA32-NEXT:    srl.w $a5, $a1, $a2
+; LA32-NEXT:    srai.w $a4, $a4, 31
+; LA32-NEXT:    and $a4, $a4, $a5
+; LA32-NEXT:    sub.w $a2, $zero, $a2
+; LA32-NEXT:    andi $a5, $a2, 63
+; LA32-NEXT:    addi.w $a6, $a5, -32
+; LA32-NEXT:    sll.w $a7, $a0, $a6
+; LA32-NEXT:    slti $t0, $a6, 0
+; LA32-NEXT:    masknez $a7, $a7, $t0
+; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    xori $a5, $a5, 31
 ; LA32-NEXT:    srli.w $t1, $a0, 1
-; LA32-NEXT:    srl.w $a7, $t1, $a7
-; LA32-NEXT:    or $a3, $a3, $a6
-; LA32-NEXT:    sll.w $a5, $a1, $a5
-; LA32-NEXT:    or $a5, $a5, $a7
-; LA32-NEXT:    sll.w $a0, $a0, $t0
-; LA32-NEXT:    slti $a6, $t0, 0
-; LA32-NEXT:    masknez $a0, $a0, $a6
-; LA32-NEXT:    maskeqz $a5, $a5, $a6
-; LA32-NEXT:    or $a0, $a5, $a0
-; LA32-NEXT:    srl.w $a1, $a1, $a2
-; LA32-NEXT:    srai.w $a2, $a4, 31
-; LA32-NEXT:    and $a1, $a2, $a1
-; LA32-NEXT:    or $a1, $a1, $a0
-; LA32-NEXT:    move $a0, $a3
+; LA32-NEXT:    srl.w $a5, $t1, $a5
+; LA32-NEXT:    or $a1, $a1, $a5
+; LA32-NEXT:    maskeqz $a1, $a1, $t0
+; LA32-NEXT:    or $a1, $a1, $a7
+; LA32-NEXT:    sll.w $a0, $a0, $a2
+; LA32-NEXT:    srai.w $a2, $a6, 31
+; LA32-NEXT:    and $a0, $a2, $a0
+; LA32-NEXT:    or $a0, $a3, $a0
+; LA32-NEXT:    or $a1, $a4, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: rotr_64_mask_and_127_and_63:
diff --git a/llvm/test/CodeGen/LoongArch/select-to-shiftand.ll b/llvm/test/CodeGen/LoongArch/select-to-shiftand.ll
index 61fe123ee6aac5..fa8879ea69dda2 100644
--- a/llvm/test/CodeGen/LoongArch/select-to-shiftand.ll
+++ b/llvm/test/CodeGen/LoongArch/select-to-shiftand.ll
@@ -212,9 +212,9 @@ define i32 @sub_clamp_zero_i32(i32 signext %x, i32 signext %y) {
 define i64 @sub_clamp_zero_i64(i64 signext %x, i64 signext %y) {
 ; LA32-LABEL: sub_clamp_zero_i64:
 ; LA32:       # %bb.0:
+; LA32-NEXT:    sltu $a4, $a0, $a2
 ; LA32-NEXT:    sub.w $a1, $a1, $a3
-; LA32-NEXT:    sltu $a3, $a0, $a2
-; LA32-NEXT:    sub.w $a1, $a1, $a3
+; LA32-NEXT:    sub.w $a1, $a1, $a4
 ; LA32-NEXT:    sub.w $a0, $a0, $a2
 ; LA32-NEXT:    srai.w $a2, $a1, 31
 ; LA32-NEXT:    andn $a1, $a1, $a2
diff --git a/llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll b/llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll
index e151624d908c2c..3494329e3e7c6f 100644
--- a/llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll
+++ b/llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll
@@ -160,11 +160,11 @@ define i64 @sll_redundant_mask_zeros_i64(i64 %a, i64 %b) {
 ; LA32-LABEL: sll_redundant_mask_zeros_i64:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a2, 2
+; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    srli.w $a3, $a0, 1
 ; LA32-NEXT:    andi $a4, $a2, 60
 ; LA32-NEXT:    xori $a5, $a4, 31
 ; LA32-NEXT:    srl.w $a3, $a3, $a5
-; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    or $a1, $a1, $a3
 ; LA32-NEXT:    addi.w $a3, $a4, -32
 ; LA32-NEXT:    slti $a4, $a3, 0
@@ -192,11 +192,11 @@ define i64 @srl_redundant_mask_zeros_i64(i64 %a, i64 %b) {
 ; LA32-LABEL: srl_redundant_mask_zeros_i64:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a2, 3
+; LA32-NEXT:    srl.w $a0, $a0, $a2
 ; LA32-NEXT:    slli.w $a3, $a1, 1
 ; LA32-NEXT:    andi $a4, $a2, 56
 ; LA32-NEXT:    xori $a5, $a4, 31
 ; LA32-NEXT:    sll.w $a3, $a3, $a5
-; LA32-NEXT:    srl.w $a0, $a0, $a2
 ; LA32-NEXT:    or $a0, $a0, $a3
 ; LA32-NEXT:    addi.w $a3, $a4, -32
 ; LA32-NEXT:    slti $a4, $a3, 0
@@ -223,23 +223,23 @@ define i64 @srl_redundant_mask_zeros_i64(i64 %a, i64 %b) {
 define i64 @sra_redundant_mask_zeros_i64(i64 %a, i64 %b) {
 ; LA32-LABEL: sra_redundant_mask_zeros_i64:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    slli.w $a3, $a2, 4
-; LA32-NEXT:    srai.w $a2, $a1, 31
-; LA32-NEXT:    andi $a4, $a3, 48
-; LA32-NEXT:    addi.w $a5, $a4, -32
-; LA32-NEXT:    slti $a6, $a5, 0
-; LA32-NEXT:    masknez $a2, $a2, $a6
-; LA32-NEXT:    sra.w $a7, $a1, $a3
-; LA32-NEXT:    maskeqz $a7, $a7, $a6
-; LA32-NEXT:    or $a2, $a7, $a2
-; LA32-NEXT:    srl.w $a0, $a0, $a3
-; LA32-NEXT:    xori $a3, $a4, 31
-; LA32-NEXT:    slli.w $a4, $a1, 1
-; LA32-NEXT:    sll.w $a3, $a4, $a3
+; LA32-NEXT:    srai.w $a3, $a1, 31
+; LA32-NEXT:    slli.w $a4, $a2, 4
+; LA32-NEXT:    andi $a5, $a4, 48
+; LA32-NEXT:    addi.w $a6, $a5, -32
+; LA32-NEXT:    slti $a7, $a6, 0
+; LA32-NEXT:    masknez $a2, $a3, $a7
+; LA32-NEXT:    sra.w $a3, $a1, $a4
+; LA32-NEXT:    maskeqz $a3, $a3, $a7
+; LA32-NEXT:    or $a2, $a3, $a2
+; LA32-NEXT:    srl.w $a0, $a0, $a4
+; LA32-NEXT:    slli.w $a3, $a1, 1
+; LA32-NEXT:    xori $a4, $a5, 31
+; LA32-NEXT:    sll.w $a3, $a3, $a4
 ; LA32-NEXT:    or $a0, $a0, $a3
-; LA32-NEXT:    sra.w $a1, $a1, $a5
-; LA32-NEXT:    maskeqz $a0, $a0, $a6
-; LA32-NEXT:    masknez $a1, $a1, $a6
+; LA32-NEXT:    maskeqz $a0, $a0, $a7
+; LA32-NEXT:    sra.w $a1, $a1, $a6
+; LA32-NEXT:    masknez $a1, $a1, $a7
 ; LA32-NEXT:    or $a0, $a0, $a1
 ; LA32-NEXT:    move $a1, $a2
 ; LA32-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/shrinkwrap.ll b/llvm/test/CodeGen/LoongArch/shrinkwrap.ll
index 5f15dd2e7eafa8..0323b56080f83e 100644
--- a/llvm/test/CodeGen/LoongArch/shrinkwrap.ll
+++ b/llvm/test/CodeGen/LoongArch/shrinkwrap.ll
@@ -9,7 +9,6 @@ define void @eliminate_restore(i32 %n) nounwind {
 ; NOSHRINKW:       # %bb.0:
 ; NOSHRINKW-NEXT:    addi.d $sp, $sp, -16
 ; NOSHRINKW-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
-; NOSHRINKW-NEXT:    # kill: def $r5 killed $r4
 ; NOSHRINKW-NEXT:    addi.w $a1, $a0, 0
 ; NOSHRINKW-NEXT:    ori $a0, $zero, 32
 ; NOSHRINKW-NEXT:    bltu $a0, $a1, .LBB0_2
@@ -52,9 +51,8 @@ define void @conditional_alloca(i32 %n) nounwind {
 ; NOSHRINKW-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
 ; NOSHRINKW-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
 ; NOSHRINKW-NEXT:    addi.d $fp, $sp, 32
-; NOSHRINKW-NEXT:    move $a1, $a0
-; NOSHRINKW-NEXT:    st.d $a1, $fp, -24 # 8-byte Folded Spill
 ; NOSHRINKW-NEXT:    addi.w $a1, $a0, 0
+; NOSHRINKW-NEXT:    st.d $a0, $fp, -24 # 8-byte Folded Spill
 ; NOSHRINKW-NEXT:    ori $a0, $zero, 32
 ; NOSHRINKW-NEXT:    bltu $a0, $a1, .LBB1_2
 ; NOSHRINKW-NEXT:    b .LBB1_1
diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
index 6cba4108d63c6f..0efb5fd4e640bf 100644
--- a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
+++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
@@ -5,63 +5,64 @@
 define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) {
 ; LA32-LABEL: smuloi64:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srai.w $a5, $a1, 31
-; LA32-NEXT:    mul.w $a6, $a2, $a5
-; LA32-NEXT:    mulh.wu $a7, $a2, $a5
-; LA32-NEXT:    add.w $a7, $a7, $a6
-; LA32-NEXT:    mul.w $a5, $a3, $a5
+; LA32-NEXT:    mulh.wu $a5, $a0, $a2
+; LA32-NEXT:    mul.w $a6, $a1, $a2
+; LA32-NEXT:    add.w $a5, $a6, $a5
+; LA32-NEXT:    sltu $a6, $a5, $a6
+; LA32-NEXT:    mulh.wu $a7, $a1, $a2
+; LA32-NEXT:    add.w $a6, $a7, $a6
+; LA32-NEXT:    mul.w $a7, $a0, $a3
 ; LA32-NEXT:    add.w $a5, $a7, $a5
-; LA32-NEXT:    srai.w $a7, $a3, 31
-; LA32-NEXT:    mul.w $t0, $a7, $a1
-; LA32-NEXT:    mulh.wu $t1, $a7, $a0
-; LA32-NEXT:    add.w $t0, $t1, $t0
-; LA32-NEXT:    mul.w $a7, $a7, $a0
-; LA32-NEXT:    add.w $t0, $t0, $a7
-; LA32-NEXT:    add.w $a5, $t0, $a5
-; LA32-NEXT:    mulh.wu $t0, $a0, $a2
-; LA32-NEXT:    mul.w $t1, $a1, $a2
-; LA32-NEXT:    add.w $t0, $t1, $t0
-; LA32-NEXT:    sltu $t1, $t0, $t1
-; LA32-NEXT:    mulh.wu $t2, $a1, $a2
-; LA32-NEXT:    add.w $t1, $t2, $t1
-; LA32-NEXT:    mul.w $t2, $a0, $a3
-; LA32-NEXT:    add.w $t0, $t2, $t0
-; LA32-NEXT:    sltu $t2, $t0, $t2
-; LA32-NEXT:    mulh.wu $t3, $a0, $a3
-; LA32-NEXT:    add.w $t2, $t3, $t2
+; LA32-NEXT:    sltu $a7, $a5, $a7
+; LA32-NEXT:    mulh.wu $t0, $a0, $a3
+; LA32-NEXT:    add.w $a7, $t0, $a7
+; LA32-NEXT:    add.w $a7, $a6, $a7
+; LA32-NEXT:    mul.w $t0, $a1, $a3
+; LA32-NEXT:    add.w $t1, $t0, $a7
+; LA32-NEXT:    srai.w $t2, $a1, 31
+; LA32-NEXT:    mul.w $t3, $a2, $t2
+; LA32-NEXT:    srai.w $t4, $a3, 31
+; LA32-NEXT:    mul.w $t5, $t4, $a0
+; LA32-NEXT:    add.w $t6, $t5, $t3
+; LA32-NEXT:    add.w $t7, $t1, $t6
+; LA32-NEXT:    sltu $t8, $t7, $t1
+; LA32-NEXT:    sltu $t0, $t1, $t0
+; LA32-NEXT:    sltu $a6, $a7, $a6
+; LA32-NEXT:    mulh.wu $a7, $a1, $a3
 ; LA32-NEXT:    add.w $a6, $a7, $a6
-; LA32-NEXT:    sltu $a7, $a6, $a7
-; LA32-NEXT:    add.w $a5, $a5, $a7
+; LA32-NEXT:    add.w $a6, $a6, $t0
+; LA32-NEXT:    mulh.wu $a7, $a2, $t2
+; LA32-NEXT:    add.w $a7, $a7, $t3
+; LA32-NEXT:    mul.w $a3, $a3, $t2
+; LA32-NEXT:    add.w $a3, $a7, $a3
+; LA32-NEXT:    mul.w $a1, $t4, $a1
+; LA32-NEXT:    mulh.wu $a7, $t4, $a0
+; LA32-NEXT:    add.w $a1, $a7, $a1
+; LA32-NEXT:    add.w $a1, $a1, $t5
+; LA32-NEXT:    add.w $a1, $a1, $a3
+; LA32-NEXT:    sltu $a3, $t6, $t5
+; LA32-NEXT:    add.w $a1, $a1, $a3
+; LA32-NEXT:    add.w $a1, $a6, $a1
+; LA32-NEXT:    add.w $a1, $a1, $t8
+; LA32-NEXT:    srai.w $a3, $a5, 31
+; LA32-NEXT:    xor $a1, $a1, $a3
+; LA32-NEXT:    xor $a3, $t7, $a3
+; LA32-NEXT:    or $a1, $a3, $a1
+; LA32-NEXT:    sltu $a1, $zero, $a1
 ; LA32-NEXT:    mul.w $a0, $a0, $a2
-; LA32-NEXT:    mul.w $a2, $a1, $a3
-; LA32-NEXT:    mulh.wu $a1, $a1, $a3
-; LA32-NEXT:    add.w $a3, $t1, $t2
-; LA32-NEXT:    sltu $a7, $a3, $t1
-; LA32-NEXT:    add.w $a1, $a1, $a7
 ; LA32-NEXT:    st.w $a0, $a4, 0
-; LA32-NEXT:    add.w $a0, $a2, $a3
-; LA32-NEXT:    sltu $a2, $a0, $a2
-; LA32-NEXT:    add.w $a1, $a1, $a2
-; LA32-NEXT:    st.w $t0, $a4, 4
-; LA32-NEXT:    add.w $a1, $a1, $a5
-; LA32-NEXT:    add.w $a2, $a0, $a6
-; LA32-NEXT:    sltu $a0, $a2, $a0
-; LA32-NEXT:    add.w $a0, $a1, $a0
-; LA32-NEXT:    srai.w $a1, $t0, 31
-; LA32-NEXT:    xor $a0, $a0, $a1
-; LA32-NEXT:    xor $a1, $a2, $a1
-; LA32-NEXT:    or $a0, $a1, $a0
-; LA32-NEXT:    sltu $a0, $zero, $a0
+; LA32-NEXT:    st.w $a5, $a4, 4
+; LA32-NEXT:    move $a0, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: smuloi64:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    mul.d $a3, $a0, $a1
-; LA64-NEXT:    st.d $a3, $a2, 0
-; LA64-NEXT:    mulh.d $a0, $a0, $a1
-; LA64-NEXT:    srai.d $a1, $a3, 63
-; LA64-NEXT:    xor $a0, $a0, $a1
+; LA64-NEXT:    mulh.d $a3, $a0, $a1
+; LA64-NEXT:    mul.d $a1, $a0, $a1
+; LA64-NEXT:    srai.d $a0, $a1, 63
+; LA64-NEXT:    xor $a0, $a3, $a0
 ; LA64-NEXT:    sltu $a0, $zero, $a0
+; LA64-NEXT:    st.d $a1, $a2, 0
 ; LA64-NEXT:    ret
   %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
@@ -97,273 +98,283 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
 ; LA32-NEXT:    .cfi_offset 29, -36
 ; LA32-NEXT:    .cfi_offset 30, -40
 ; LA32-NEXT:    .cfi_offset 31, -44
-; LA32-NEXT:    st.w $a2, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    ld.w $a6, $a1, 0
-; LA32-NEXT:    ld.w $a7, $a0, 0
-; LA32-NEXT:    mulh.wu $a3, $a7, $a6
-; LA32-NEXT:    ld.w $a5, $a0, 4
-; LA32-NEXT:    mul.w $a4, $a5, $a6
-; LA32-NEXT:    add.w $a3, $a4, $a3
-; LA32-NEXT:    sltu $a4, $a3, $a4
-; LA32-NEXT:    mulh.wu $t0, $a5, $a6
-; LA32-NEXT:    add.w $a4, $t0, $a4
-; LA32-NEXT:    ld.w $t0, $a1, 4
-; LA32-NEXT:    mul.w $t1, $a7, $t0
-; LA32-NEXT:    add.w $a3, $t1, $a3
-; LA32-NEXT:    st.w $a3, $sp, 44 # 4-byte Folded Spill
-; LA32-NEXT:    sltu $t1, $a3, $t1
-; LA32-NEXT:    mulh.wu $t2, $a7, $t0
-; LA32-NEXT:    add.w $t1, $t2, $t1
-; LA32-NEXT:    ld.w $t4, $a0, 12
-; LA32-NEXT:    ld.w $t2, $a0, 8
-; LA32-NEXT:    ld.w $t3, $a1, 8
-; LA32-NEXT:    mulh.wu $a0, $t2, $t3
-; LA32-NEXT:    mul.w $t5, $t4, $t3
-; LA32-NEXT:    add.w $a0, $t5, $a0
-; LA32-NEXT:    sltu $t5, $a0, $t5
-; LA32-NEXT:    mulh.wu $t6, $t4, $t3
-; LA32-NEXT:    add.w $t5, $t6, $t5
-; LA32-NEXT:    ld.w $t7, $a1, 12
-; LA32-NEXT:    mul.w $a1, $t2, $t7
+; LA32-NEXT:    st.w $a2, $sp, 48 # 4-byte Folded Spill
+; LA32-NEXT:    ld.w $t0, $a1, 12
+; LA32-NEXT:    ld.w $t1, $a1, 8
+; LA32-NEXT:    ld.w $a5, $a0, 12
+; LA32-NEXT:    ld.w $a7, $a1, 0
+; LA32-NEXT:    ld.w $a3, $a0, 0
+; LA32-NEXT:    ld.w $a6, $a0, 4
+; LA32-NEXT:    ld.w $a4, $a0, 8
+; LA32-NEXT:    ld.w $t3, $a1, 4
+; LA32-NEXT:    mulh.wu $a0, $a3, $a7
+; LA32-NEXT:    mul.w $a1, $a6, $a7
 ; LA32-NEXT:    add.w $a0, $a1, $a0
-; LA32-NEXT:    st.w $a0, $sp, 48 # 4-byte Folded Spill
 ; LA32-NEXT:    sltu $a1, $a0, $a1
-; LA32-NEXT:    mulh.wu $t6, $t2, $t7
-; LA32-NEXT:    add.w $t6, $t6, $a1
-; LA32-NEXT:    srai.w $s7, $t4, 31
-; LA32-NEXT:    mul.w $a1, $s7, $t7
-; LA32-NEXT:    mulh.wu $t8, $s7, $t3
-; LA32-NEXT:    add.w $t8, $t8, $a1
-; LA32-NEXT:    mulh.wu $fp, $a6, $s7
-; LA32-NEXT:    mul.w $s6, $t0, $s7
-; LA32-NEXT:    add.w $s8, $s6, $fp
-; LA32-NEXT:    mul.w $a1, $a6, $s7
-; LA32-NEXT:    add.w $ra, $a1, $s8
-; LA32-NEXT:    sltu $s0, $ra, $a1
-; LA32-NEXT:    add.w $a0, $fp, $s0
-; LA32-NEXT:    add.w $a3, $a4, $t1
+; LA32-NEXT:    mulh.wu $t2, $a6, $a7
+; LA32-NEXT:    add.w $a1, $t2, $a1
+; LA32-NEXT:    mul.w $t2, $a3, $t3
+; LA32-NEXT:    add.w $a0, $t2, $a0
+; LA32-NEXT:    st.w $a0, $sp, 44 # 4-byte Folded Spill
+; LA32-NEXT:    sltu $t2, $a0, $t2
+; LA32-NEXT:    mulh.wu $t4, $a3, $t3
+; LA32-NEXT:    add.w $t2, $t4, $t2
+; LA32-NEXT:    add.w $t2, $a1, $t2
+; LA32-NEXT:    mul.w $t4, $a6, $t3
+; LA32-NEXT:    add.w $t5, $t4, $t2
+; LA32-NEXT:    sltu $t4, $t5, $t4
+; LA32-NEXT:    sltu $a1, $t2, $a1
+; LA32-NEXT:    mulh.wu $t2, $a6, $t3
+; LA32-NEXT:    add.w $a1, $t2, $a1
+; LA32-NEXT:    add.w $a1, $a1, $t4
+; LA32-NEXT:    mulh.wu $t2, $a4, $a7
+; LA32-NEXT:    mul.w $t4, $a5, $a7
+; LA32-NEXT:    add.w $t2, $t4, $t2
+; LA32-NEXT:    mul.w $t6, $a4, $t3
+; LA32-NEXT:    add.w $t7, $t6, $t2
+; LA32-NEXT:    add.w $a1, $t7, $a1
+; LA32-NEXT:    mul.w $t8, $a4, $a7
+; LA32-NEXT:    add.w $t5, $t8, $t5
+; LA32-NEXT:    sltu $t8, $t5, $t8
+; LA32-NEXT:    add.w $a1, $a1, $t8
+; LA32-NEXT:    sltu $fp, $a1, $t7
+; LA32-NEXT:    xor $s0, $a1, $t7
+; LA32-NEXT:    sltui $s0, $s0, 1
+; LA32-NEXT:    masknez $fp, $fp, $s0
+; LA32-NEXT:    maskeqz $t8, $t8, $s0
+; LA32-NEXT:    or $t8, $t8, $fp
+; LA32-NEXT:    sltu $t2, $t2, $t4
+; LA32-NEXT:    mulh.wu $t4, $a5, $a7
+; LA32-NEXT:    add.w $t4, $t4, $t2
+; LA32-NEXT:    sltu $t2, $t7, $t6
+; LA32-NEXT:    mulh.wu $t6, $a4, $t3
+; LA32-NEXT:    add.w $t2, $t6, $t2
+; LA32-NEXT:    add.w $fp, $t4, $t2
+; LA32-NEXT:    mul.w $t6, $a5, $t3
+; LA32-NEXT:    add.w $s0, $t6, $fp
+; LA32-NEXT:    add.w $s1, $s0, $t8
+; LA32-NEXT:    mulh.wu $t2, $a3, $t1
+; LA32-NEXT:    mul.w $t7, $a6, $t1
+; LA32-NEXT:    add.w $t8, $t7, $t2
+; LA32-NEXT:    mul.w $s2, $a3, $t0
+; LA32-NEXT:    add.w $s3, $s2, $t8
+; LA32-NEXT:    add.w $t2, $s3, $a1
+; LA32-NEXT:    mul.w $s4, $a3, $t1
+; LA32-NEXT:    add.w $a0, $s4, $t5
+; LA32-NEXT:    st.w $a0, $sp, 40 # 4-byte Folded Spill
+; LA32-NEXT:    sltu $t5, $a0, $s4
+; LA32-NEXT:    add.w $a0, $t2, $t5
+; LA32-NEXT:    st.w $a0, $sp, 36 # 4-byte Folded Spill
+; LA32-NEXT:    sltu $s4, $a0, $s3
+; LA32-NEXT:    xor $s5, $a0, $s3
+; LA32-NEXT:    sltui $s5, $s5, 1
+; LA32-NEXT:    masknez $s4, $s4, $s5
+; LA32-NEXT:    maskeqz $t5, $t5, $s5
+; LA32-NEXT:    or $t5, $t5, $s4
+; LA32-NEXT:    sltu $t7, $t8, $t7
+; LA32-NEXT:    mulh.wu $t8, $a6, $t1
+; LA32-NEXT:    add.w $s4, $t8, $t7
+; LA32-NEXT:    sltu $t7, $s3, $s2
+; LA32-NEXT:    mulh.wu $t8, $a3, $t0
+; LA32-NEXT:    add.w $t7, $t8, $t7
+; LA32-NEXT:    add.w $s2, $s4, $t7
+; LA32-NEXT:    mul.w $s3, $a6, $t0
+; LA32-NEXT:    add.w $s6, $s3, $s2
+; LA32-NEXT:    add.w $s7, $s6, $t5
+; LA32-NEXT:    add.w $s5, $s1, $s7
+; LA32-NEXT:    mul.w $s8, $a4, $t1
+; LA32-NEXT:    add.w $ra, $s8, $s5
+; LA32-NEXT:    srai.w $t8, $a5, 31
+; LA32-NEXT:    mul.w $t7, $a7, $t8
+; LA32-NEXT:    st.w $a7, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT:    srai.w $t5, $t0, 31
+; LA32-NEXT:    sltu $s5, $s5, $s1
+; LA32-NEXT:    sltu $s1, $s1, $s0
+; LA32-NEXT:    sltu $s0, $s0, $t6
+; LA32-NEXT:    mul.w $t2, $a3, $t5
+; LA32-NEXT:    st.w $a3, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT:    sltu $t4, $fp, $t4
+; LA32-NEXT:    mulh.wu $fp, $a5, $t3
+; LA32-NEXT:    st.w $a5, $sp, 0 # 4-byte Folded Spill
+; LA32-NEXT:    add.w $t4, $fp, $t4
+; LA32-NEXT:    add.w $fp, $t2, $t7
+; LA32-NEXT:    add.w $s0, $t4, $s0
+; LA32-NEXT:    add.w $a0, $ra, $fp
+; LA32-NEXT:    st.w $a0, $sp, 32 # 4-byte Folded Spill
+; LA32-NEXT:    add.w $a2, $s0, $s1
+; LA32-NEXT:    sltu $s0, $a0, $ra
+; LA32-NEXT:    sltu $s1, $s7, $s6
+; LA32-NEXT:    sltu $s3, $s6, $s3
+; LA32-NEXT:    sltu $s2, $s2, $s4
+; LA32-NEXT:    move $s6, $a6
+; LA32-NEXT:    st.w $a6, $sp, 16 # 4-byte Folded Spill
+; LA32-NEXT:    mulh.wu $s4, $a6, $t0
+; LA32-NEXT:    add.w $s2, $s4, $s2
+; LA32-NEXT:    add.w $s2, $s2, $s3
+; LA32-NEXT:    add.w $s1, $s2, $s1
+; LA32-NEXT:    add.w $s1, $a2, $s1
+; LA32-NEXT:    add.w $s7, $s1, $s5
+; LA32-NEXT:    move $a0, $a4
+; LA32-NEXT:    st.w $a4, $sp, 4 # 4-byte Folded Spill
+; LA32-NEXT:    mulh.wu $s1, $a4, $t1
+; LA32-NEXT:    mul.w $a5, $a5, $t1
+; LA32-NEXT:    add.w $a4, $a5, $s1
+; LA32-NEXT:    mul.w $a6, $a0, $t0
+; LA32-NEXT:    add.w $a1, $a6, $a4
+; LA32-NEXT:    sltu $ra, $ra, $s8
+; LA32-NEXT:    add.w $s1, $a1, $s7
+; LA32-NEXT:    add.w $s8, $s1, $ra
+; LA32-NEXT:    move $a0, $t2
+; LA32-NEXT:    st.w $t2, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT:    sltu $t6, $fp, $t2
+; LA32-NEXT:    mulh.wu $t2, $a7, $t8
+; LA32-NEXT:    mul.w $s4, $t3, $t8
+; LA32-NEXT:    add.w $a7, $s4, $t2
+; LA32-NEXT:    st.w $a7, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:    add.w $s3, $t7, $a7
+; LA32-NEXT:    mulh.wu $a7, $a3, $t5
+; LA32-NEXT:    add.w $t4, $a7, $a0
+; LA32-NEXT:    mul.w $s2, $s6, $t5
+; LA32-NEXT:    add.w $s1, $t4, $s2
+; LA32-NEXT:    add.w $fp, $s1, $s3
+; LA32-NEXT:    add.w $a0, $fp, $t6
+; LA32-NEXT:    add.w $fp, $s8, $a0
+; LA32-NEXT:    add.w $a3, $fp, $s0
 ; LA32-NEXT:    st.w $a3, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT:    xor $fp, $a3, $s8
+; LA32-NEXT:    sltui $fp, $fp, 1
+; LA32-NEXT:    sltu $s6, $a3, $s8
+; LA32-NEXT:    masknez $s6, $s6, $fp
+; LA32-NEXT:    maskeqz $fp, $s0, $fp
+; LA32-NEXT:    or $s6, $fp, $s6
+; LA32-NEXT:    sltu $fp, $s7, $a2
+; LA32-NEXT:    xor $a2, $s7, $a2
+; LA32-NEXT:    sltui $a2, $a2, 1
+; LA32-NEXT:    masknez $fp, $fp, $a2
+; LA32-NEXT:    maskeqz $a2, $s5, $a2
+; LA32-NEXT:    or $s0, $a2, $fp
+; LA32-NEXT:    sltu $a2, $a4, $a5
+; LA32-NEXT:    ld.w $a5, $sp, 0 # 4-byte Folded Reload
+; LA32-NEXT:    mulh.wu $a3, $a5, $t1
+; LA32-NEXT:    add.w $a2, $a3, $a2
+; LA32-NEXT:    sltu $a3, $a1, $a6
+; LA32-NEXT:    ld.w $fp, $sp, 4 # 4-byte Folded Reload
+; LA32-NEXT:    mulh.wu $a4, $fp, $t0
+; LA32-NEXT:    add.w $a3, $a4, $a3
+; LA32-NEXT:    sltu $a4, $s8, $a1
+; LA32-NEXT:    xor $a1, $s8, $a1
+; LA32-NEXT:    sltui $a1, $a1, 1
+; LA32-NEXT:    masknez $a4, $a4, $a1
+; LA32-NEXT:    maskeqz $a1, $ra, $a1
+; LA32-NEXT:    or $a1, $a1, $a4
+; LA32-NEXT:    sltu $a4, $a0, $s1
+; LA32-NEXT:    xor $a0, $a0, $s1
+; LA32-NEXT:    sltui $a0, $a0, 1
+; LA32-NEXT:    masknez $a4, $a4, $a0
+; LA32-NEXT:    maskeqz $a0, $t6, $a0
+; LA32-NEXT:    or $s5, $a0, $a4
+; LA32-NEXT:    sltu $a0, $s3, $t7
+; LA32-NEXT:    add.w $a0, $t2, $a0
+; LA32-NEXT:    ld.w $t2, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT:    sltu $a4, $t4, $t2
+; LA32-NEXT:    add.w $s7, $a7, $a4
+; LA32-NEXT:    add.w $a3, $a2, $a3
+; LA32-NEXT:    sltu $a2, $a3, $a2
+; LA32-NEXT:    mulh.wu $a4, $a5, $t0
+; LA32-NEXT:    add.w $a2, $a4, $a2
+; LA32-NEXT:    mul.w $a4, $a5, $t0
+; LA32-NEXT:    move $a6, $a5
+; LA32-NEXT:    add.w $a3, $a4, $a3
 ; LA32-NEXT:    sltu $a4, $a3, $a4
-; LA32-NEXT:    mulh.wu $t1, $a5, $t0
-; LA32-NEXT:    add.w $a3, $t1, $a4
-; LA32-NEXT:    st.w $a3, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT:    srai.w $s4, $t7, 31
-; LA32-NEXT:    mul.w $fp, $a7, $s4
-; LA32-NEXT:    mulh.wu $a4, $a7, $s4
-; LA32-NEXT:    add.w $s1, $a4, $fp
-; LA32-NEXT:    sltu $s0, $s1, $fp
-; LA32-NEXT:    add.w $s5, $a4, $s0
-; LA32-NEXT:    mul.w $a4, $s7, $t3
-; LA32-NEXT:    add.w $t8, $t8, $a4
-; LA32-NEXT:    add.w $s0, $ra, $t8
-; LA32-NEXT:    add.w $a3, $a1, $a4
-; LA32-NEXT:    st.w $a3, $sp, 32 # 4-byte Folded Spill
-; LA32-NEXT:    sltu $a4, $a3, $a1
-; LA32-NEXT:    add.w $a3, $s0, $a4
-; LA32-NEXT:    st.w $a3, $sp, 24 # 4-byte Folded Spill
-; LA32-NEXT:    add.w $s3, $t5, $t6
-; LA32-NEXT:    sltu $a4, $s3, $t5
-; LA32-NEXT:    mulh.wu $t5, $t4, $t7
-; LA32-NEXT:    add.w $a3, $t5, $a4
-; LA32-NEXT:    st.w $a3, $sp, 16 # 4-byte Folded Spill
-; LA32-NEXT:    mul.w $a4, $a7, $a6
-; LA32-NEXT:    st.w $a4, $a2, 0
-; LA32-NEXT:    sltu $a4, $s8, $s6
-; LA32-NEXT:    mulh.wu $t5, $t0, $s7
-; LA32-NEXT:    add.w $a4, $t5, $a4
-; LA32-NEXT:    add.w $t1, $a4, $a0
-; LA32-NEXT:    sltu $a4, $t1, $a4
-; LA32-NEXT:    add.w $s2, $t5, $a4
-; LA32-NEXT:    mulh.wu $a4, $a7, $t3
-; LA32-NEXT:    mul.w $t5, $a5, $t3
-; LA32-NEXT:    add.w $a4, $t5, $a4
-; LA32-NEXT:    sltu $t5, $a4, $t5
-; LA32-NEXT:    mulh.wu $t6, $a5, $t3
-; LA32-NEXT:    add.w $a3, $t6, $t5
-; LA32-NEXT:    mul.w $t6, $a7, $t7
-; LA32-NEXT:    add.w $t5, $t6, $a4
-; LA32-NEXT:    sltu $a4, $t5, $t6
-; LA32-NEXT:    mulh.wu $t6, $a7, $t7
-; LA32-NEXT:    add.w $a4, $t6, $a4
-; LA32-NEXT:    mulh.wu $t6, $t2, $a6
-; LA32-NEXT:    mul.w $s7, $t4, $a6
-; LA32-NEXT:    add.w $t6, $s7, $t6
-; LA32-NEXT:    sltu $s7, $t6, $s7
-; LA32-NEXT:    mulh.wu $s8, $t4, $a6
-; LA32-NEXT:    add.w $a0, $s8, $s7
-; LA32-NEXT:    mul.w $s7, $t2, $t0
-; LA32-NEXT:    add.w $t6, $s7, $t6
-; LA32-NEXT:    sltu $s7, $t6, $s7
-; LA32-NEXT:    mulh.wu $s8, $t2, $t0
-; LA32-NEXT:    add.w $a2, $s8, $s7
-; LA32-NEXT:    mul.w $s8, $a5, $s4
-; LA32-NEXT:    add.w $s7, $s1, $s8
-; LA32-NEXT:    add.w $s1, $s7, $ra
-; LA32-NEXT:    add.w $a1, $fp, $a1
-; LA32-NEXT:    st.w $a1, $sp, 40 # 4-byte Folded Spill
-; LA32-NEXT:    sltu $ra, $a1, $fp
-; LA32-NEXT:    add.w $a1, $s1, $ra
-; LA32-NEXT:    st.w $a1, $sp, 36 # 4-byte Folded Spill
-; LA32-NEXT:    xor $s0, $a1, $s7
-; LA32-NEXT:    sltui $s0, $s0, 1
-; LA32-NEXT:    sltu $a1, $a1, $s7
-; LA32-NEXT:    masknez $s1, $a1, $s0
-; LA32-NEXT:    maskeqz $s0, $ra, $s0
-; LA32-NEXT:    add.w $t1, $s6, $t1
-; LA32-NEXT:    sltu $s6, $t1, $s6
-; LA32-NEXT:    add.w $s2, $s2, $s6
+; LA32-NEXT:    add.w $a2, $a2, $a4
+; LA32-NEXT:    add.w $a4, $a3, $s0
+; LA32-NEXT:    sltu $a3, $a4, $a3
+; LA32-NEXT:    add.w $a2, $a2, $a3
+; LA32-NEXT:    add.w $s8, $a4, $a1
+; LA32-NEXT:    sltu $a1, $s8, $a4
+; LA32-NEXT:    add.w $ra, $a2, $a1
+; LA32-NEXT:    ld.w $a1, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:    sltu $a1, $a1, $s4
+; LA32-NEXT:    mulh.wu $a2, $t3, $t8
+; LA32-NEXT:    add.w $a1, $a2, $a1
+; LA32-NEXT:    add.w $a0, $a1, $a0
+; LA32-NEXT:    sltu $a1, $a0, $a1
+; LA32-NEXT:    add.w $a1, $a2, $a1
+; LA32-NEXT:    add.w $a0, $s4, $a0
+; LA32-NEXT:    sltu $a2, $a0, $s4
+; LA32-NEXT:    add.w $a1, $a1, $a2
+; LA32-NEXT:    mul.w $a2, $t8, $t1
+; LA32-NEXT:    mul.w $a3, $t8, $t0
+; LA32-NEXT:    mulh.wu $a4, $t8, $t1
+; LA32-NEXT:    add.w $a3, $a4, $a3
+; LA32-NEXT:    add.w $a3, $a3, $a2
+; LA32-NEXT:    add.w $a3, $s3, $a3
+; LA32-NEXT:    add.w $a2, $t7, $a2
+; LA32-NEXT:    sltu $a4, $a2, $t7
+; LA32-NEXT:    add.w $a3, $a3, $a4
+; LA32-NEXT:    add.w $a1, $a1, $a3
 ; LA32-NEXT:    add.w $a2, $a0, $a2
 ; LA32-NEXT:    sltu $a0, $a2, $a0
-; LA32-NEXT:    mulh.wu $s6, $t4, $t0
-; LA32-NEXT:    add.w $t8, $s6, $a0
-; LA32-NEXT:    add.w $a4, $a3, $a4
-; LA32-NEXT:    sltu $a3, $a4, $a3
-; LA32-NEXT:    mulh.wu $s6, $a5, $t7
-; LA32-NEXT:    add.w $a3, $s6, $a3
-; LA32-NEXT:    mul.w $s6, $t4, $t7
-; LA32-NEXT:    mul.w $t7, $a5, $t7
-; LA32-NEXT:    mul.w $ra, $t4, $t0
-; LA32-NEXT:    mul.w $t0, $a5, $t0
-; LA32-NEXT:    mul.w $t4, $t4, $s4
-; LA32-NEXT:    mul.w $a7, $a7, $t3
-; LA32-NEXT:    mul.w $a6, $t2, $a6
-; LA32-NEXT:    mul.w $t3, $t2, $t3
-; LA32-NEXT:    mul.w $a0, $t2, $s4
-; LA32-NEXT:    mulh.wu $t2, $t2, $s4
-; LA32-NEXT:    mulh.wu $a5, $s4, $a5
-; LA32-NEXT:    sltu $s4, $s7, $s8
-; LA32-NEXT:    add.w $s4, $a5, $s4
-; LA32-NEXT:    add.w $s4, $s5, $s4
-; LA32-NEXT:    sltu $s5, $s4, $s5
-; LA32-NEXT:    add.w $s5, $a5, $s5
-; LA32-NEXT:    ld.w $a1, $sp, 20 # 4-byte Folded Reload
-; LA32-NEXT:    add.w $a1, $t0, $a1
-; LA32-NEXT:    sltu $a5, $a1, $t0
-; LA32-NEXT:    ld.w $t0, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT:    add.w $t0, $t0, $a5
-; LA32-NEXT:    or $s0, $s0, $s1
-; LA32-NEXT:    add.w $a4, $t7, $a4
-; LA32-NEXT:    sltu $a5, $a4, $t7
-; LA32-NEXT:    add.w $t7, $a3, $a5
-; LA32-NEXT:    add.w $s1, $ra, $a2
-; LA32-NEXT:    sltu $a2, $s1, $ra
-; LA32-NEXT:    add.w $t8, $t8, $a2
-; LA32-NEXT:    add.w $a5, $s6, $s3
-; LA32-NEXT:    sltu $a2, $a5, $s6
+; LA32-NEXT:    add.w $a0, $a1, $a0
+; LA32-NEXT:    sltu $a1, $s1, $s2
 ; LA32-NEXT:    ld.w $a3, $sp, 16 # 4-byte Folded Reload
-; LA32-NEXT:    add.w $a2, $a3, $a2
-; LA32-NEXT:    ld.w $s6, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    ld.w $a3, $sp, 44 # 4-byte Folded Reload
-; LA32-NEXT:    st.w $a3, $s6, 4
-; LA32-NEXT:    ld.w $a3, $sp, 24 # 4-byte Folded Reload
-; LA32-NEXT:    add.w $a3, $s2, $a3
-; LA32-NEXT:    ld.w $s2, $sp, 32 # 4-byte Folded Reload
-; LA32-NEXT:    add.w $s2, $t1, $s2
-; LA32-NEXT:    sltu $t1, $s2, $t1
-; LA32-NEXT:    add.w $a3, $a3, $t1
-; LA32-NEXT:    add.w $t1, $s8, $s4
-; LA32-NEXT:    sltu $s3, $t1, $s8
-; LA32-NEXT:    add.w $s3, $s5, $s3
-; LA32-NEXT:    add.w $t2, $t2, $a0
-; LA32-NEXT:    add.w $t2, $t2, $t4
-; LA32-NEXT:    add.w $t2, $t2, $s7
-; LA32-NEXT:    add.w $t4, $a0, $fp
-; LA32-NEXT:    sltu $a0, $t4, $a0
-; LA32-NEXT:    add.w $a0, $t2, $a0
-; LA32-NEXT:    add.w $a0, $s3, $a0
-; LA32-NEXT:    add.w $t2, $t1, $t4
-; LA32-NEXT:    sltu $t1, $t2, $t1
-; LA32-NEXT:    add.w $a0, $a0, $t1
-; LA32-NEXT:    add.w $a0, $a0, $a3
-; LA32-NEXT:    add.w $t1, $t2, $s2
-; LA32-NEXT:    sltu $a3, $t1, $t2
-; LA32-NEXT:    add.w $a0, $a0, $a3
-; LA32-NEXT:    add.w $a3, $t6, $t0
-; LA32-NEXT:    add.w $a1, $a6, $a1
-; LA32-NEXT:    sltu $a6, $a1, $a6
-; LA32-NEXT:    add.w $t0, $a3, $a6
-; LA32-NEXT:    add.w $a1, $a7, $a1
-; LA32-NEXT:    sltu $a7, $a1, $a7
-; LA32-NEXT:    add.w $a3, $t5, $t0
-; LA32-NEXT:    add.w $a3, $a3, $a7
-; LA32-NEXT:    sltu $t2, $a3, $t5
-; LA32-NEXT:    xor $t4, $a3, $t5
-; LA32-NEXT:    sltui $t4, $t4, 1
-; LA32-NEXT:    masknez $t2, $t2, $t4
-; LA32-NEXT:    maskeqz $a7, $a7, $t4
-; LA32-NEXT:    st.w $a1, $s6, 8
-; LA32-NEXT:    or $a1, $a7, $t2
-; LA32-NEXT:    sltu $a7, $t0, $t6
-; LA32-NEXT:    xor $t0, $t0, $t6
-; LA32-NEXT:    sltui $t0, $t0, 1
-; LA32-NEXT:    masknez $a7, $a7, $t0
-; LA32-NEXT:    maskeqz $a6, $a6, $t0
-; LA32-NEXT:    or $a6, $a6, $a7
-; LA32-NEXT:    add.w $a6, $s1, $a6
-; LA32-NEXT:    sltu $a7, $a6, $s1
-; LA32-NEXT:    add.w $a7, $t8, $a7
-; LA32-NEXT:    add.w $a1, $a4, $a1
-; LA32-NEXT:    sltu $a4, $a1, $a4
-; LA32-NEXT:    add.w $a4, $t7, $a4
-; LA32-NEXT:    add.w $t0, $t1, $s0
-; LA32-NEXT:    sltu $t1, $t0, $t1
-; LA32-NEXT:    add.w $a0, $a0, $t1
-; LA32-NEXT:    st.w $a3, $s6, 12
-; LA32-NEXT:    add.w $a1, $a6, $a1
-; LA32-NEXT:    sltu $a6, $a1, $a6
-; LA32-NEXT:    add.w $a4, $a7, $a4
-; LA32-NEXT:    add.w $a4, $a4, $a6
-; LA32-NEXT:    sltu $t1, $a4, $a7
-; LA32-NEXT:    xor $a7, $a4, $a7
-; LA32-NEXT:    sltui $a7, $a7, 1
-; LA32-NEXT:    masknez $t1, $t1, $a7
-; LA32-NEXT:    maskeqz $a6, $a6, $a7
-; LA32-NEXT:    or $a6, $a6, $t1
-; LA32-NEXT:    add.w $a6, $a5, $a6
-; LA32-NEXT:    sltu $a5, $a6, $a5
-; LA32-NEXT:    add.w $a2, $a2, $a5
-; LA32-NEXT:    ld.w $t1, $sp, 48 # 4-byte Folded Reload
-; LA32-NEXT:    add.w $a4, $t1, $a4
-; LA32-NEXT:    add.w $a1, $t3, $a1
-; LA32-NEXT:    sltu $a5, $a1, $t3
-; LA32-NEXT:    add.w $a4, $a4, $a5
-; LA32-NEXT:    sltu $a7, $a4, $t1
-; LA32-NEXT:    xor $t1, $a4, $t1
-; LA32-NEXT:    sltui $t1, $t1, 1
-; LA32-NEXT:    masknez $a7, $a7, $t1
-; LA32-NEXT:    maskeqz $a5, $a5, $t1
-; LA32-NEXT:    or $a5, $a5, $a7
-; LA32-NEXT:    add.w $a5, $a6, $a5
-; LA32-NEXT:    sltu $a6, $a5, $a6
-; LA32-NEXT:    add.w $a2, $a2, $a6
-; LA32-NEXT:    add.w $a0, $a2, $a0
-; LA32-NEXT:    add.w $a2, $a5, $t0
-; LA32-NEXT:    sltu $a5, $a2, $a5
-; LA32-NEXT:    add.w $a0, $a0, $a5
-; LA32-NEXT:    ld.w $a5, $sp, 40 # 4-byte Folded Reload
-; LA32-NEXT:    add.w $a5, $a1, $a5
-; LA32-NEXT:    sltu $a1, $a5, $a1
-; LA32-NEXT:    ld.w $a6, $sp, 36 # 4-byte Folded Reload
-; LA32-NEXT:    add.w $a6, $a4, $a6
-; LA32-NEXT:    add.w $a6, $a6, $a1
-; LA32-NEXT:    sltu $a7, $a6, $a4
-; LA32-NEXT:    xor $a4, $a6, $a4
-; LA32-NEXT:    sltui $a4, $a4, 1
-; LA32-NEXT:    masknez $a7, $a7, $a4
-; LA32-NEXT:    maskeqz $a1, $a1, $a4
-; LA32-NEXT:    or $a1, $a1, $a7
-; LA32-NEXT:    add.w $a1, $a2, $a1
-; LA32-NEXT:    sltu $a2, $a1, $a2
+; LA32-NEXT:    mulh.wu $a3, $t5, $a3
+; LA32-NEXT:    add.w $a1, $a3, $a1
+; LA32-NEXT:    add.w $a1, $s7, $a1
+; LA32-NEXT:    sltu $a4, $a1, $s7
+; LA32-NEXT:    add.w $a3, $a3, $a4
+; LA32-NEXT:    add.w $a1, $s2, $a1
+; LA32-NEXT:    sltu $a4, $a1, $s2
+; LA32-NEXT:    add.w $a3, $a3, $a4
+; LA32-NEXT:    mul.w $a4, $fp, $t5
+; LA32-NEXT:    mulh.wu $a5, $fp, $t5
+; LA32-NEXT:    mul.w $a6, $a6, $t5
+; LA32-NEXT:    add.w $a5, $a5, $a4
+; LA32-NEXT:    add.w $a5, $a5, $a6
+; LA32-NEXT:    add.w $a5, $a5, $s1
+; LA32-NEXT:    add.w $a6, $a4, $t2
+; LA32-NEXT:    sltu $a4, $a6, $a4
+; LA32-NEXT:    add.w $a4, $a5, $a4
+; LA32-NEXT:    add.w $a3, $a3, $a4
+; LA32-NEXT:    add.w $a4, $a1, $a6
+; LA32-NEXT:    sltu $a1, $a4, $a1
+; LA32-NEXT:    add.w $a1, $a3, $a1
+; LA32-NEXT:    add.w $a0, $a1, $a0
+; LA32-NEXT:    add.w $a1, $a4, $a2
+; LA32-NEXT:    sltu $a2, $a1, $a4
 ; LA32-NEXT:    add.w $a0, $a0, $a2
-; LA32-NEXT:    srai.w $a2, $a3, 31
-; LA32-NEXT:    xor $a3, $a6, $a2
-; LA32-NEXT:    xor $a0, $a0, $a2
+; LA32-NEXT:    add.w $a2, $a1, $s5
+; LA32-NEXT:    sltu $a1, $a2, $a1
+; LA32-NEXT:    add.w $a0, $a0, $a1
+; LA32-NEXT:    add.w $a0, $ra, $a0
+; LA32-NEXT:    add.w $a1, $s8, $a2
+; LA32-NEXT:    sltu $a2, $a1, $s8
+; LA32-NEXT:    add.w $a0, $a0, $a2
+; LA32-NEXT:    add.w $a2, $a1, $s6
+; LA32-NEXT:    sltu $a1, $a2, $a1
+; LA32-NEXT:    add.w $a0, $a0, $a1
+; LA32-NEXT:    ld.w $a4, $sp, 36 # 4-byte Folded Reload
+; LA32-NEXT:    srai.w $a1, $a4, 31
+; LA32-NEXT:    xor $a0, $a0, $a1
+; LA32-NEXT:    ld.w $a3, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT:    xor $a3, $a3, $a1
 ; LA32-NEXT:    or $a0, $a3, $a0
-; LA32-NEXT:    xor $a3, $a5, $a2
-; LA32-NEXT:    xor $a1, $a1, $a2
-; LA32-NEXT:    or $a1, $a3, $a1
+; LA32-NEXT:    xor $a2, $a2, $a1
+; LA32-NEXT:    ld.w $a3, $sp, 32 # 4-byte Folded Reload
+; LA32-NEXT:    xor $a1, $a3, $a1
+; LA32-NEXT:    or $a1, $a1, $a2
 ; LA32-NEXT:    or $a0, $a1, $a0
+; LA32-NEXT:    ld.w $a1, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $a2, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT:    mul.w $a1, $a2, $a1
+; LA32-NEXT:    ld.w $a2, $sp, 48 # 4-byte Folded Reload
+; LA32-NEXT:    st.w $a1, $a2, 0
+; LA32-NEXT:    ld.w $a1, $sp, 44 # 4-byte Folded Reload
+; LA32-NEXT:    st.w $a1, $a2, 4
+; LA32-NEXT:    ld.w $a1, $sp, 40 # 4-byte Folded Reload
+; LA32-NEXT:    st.w $a1, $a2, 8
 ; LA32-NEXT:    sltu $a0, $zero, $a0
+; LA32-NEXT:    st.w $a4, $a2, 12
 ; LA32-NEXT:    ld.w $s8, $sp, 52 # 4-byte Folded Reload
 ; LA32-NEXT:    ld.w $s7, $sp, 56 # 4-byte Folded Reload
 ; LA32-NEXT:    ld.w $s6, $sp, 60 # 4-byte Folded Reload
@@ -380,53 +391,54 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
 ;
 ; LA64-LABEL: smuloi128:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    srai.d $a5, $a1, 63
-; LA64-NEXT:    mul.d $a6, $a2, $a5
-; LA64-NEXT:    mulh.du $a7, $a2, $a5
-; LA64-NEXT:    add.d $a7, $a7, $a6
-; LA64-NEXT:    mul.d $a5, $a3, $a5
+; LA64-NEXT:    mulh.du $a5, $a0, $a2
+; LA64-NEXT:    mul.d $a6, $a1, $a2
+; LA64-NEXT:    add.d $a5, $a6, $a5
+; LA64-NEXT:    sltu $a6, $a5, $a6
+; LA64-NEXT:    mulh.du $a7, $a1, $a2
+; LA64-NEXT:    add.d $a6, $a7, $a6
+; LA64-NEXT:    mul.d $a7, $a0, $a3
 ; LA64-NEXT:    add.d $a5, $a7, $a5
-; LA64-NEXT:    srai.d $a7, $a3, 63
-; LA64-NEXT:    mul.d $t0, $a7, $a1
-; LA64-NEXT:    mulh.du $t1, $a7, $a0
-; LA64-NEXT:    add.d $t0, $t1, $t0
-; LA64-NEXT:    mul.d $a7, $a7, $a0
-; LA64-NEXT:    add.d $t0, $t0, $a7
-; LA64-NEXT:    add.d $a5, $t0, $a5
-; LA64-NEXT:    mulh.du $t0, $a0, $a2
-; LA64-NEXT:    mul.d $t1, $a1, $a2
-; LA64-NEXT:    add.d $t0, $t1, $t0
-; LA64-NEXT:    sltu $t1, $t0, $t1
-; LA64-NEXT:    mulh.du $t2, $a1, $a2
-; LA64-NEXT:    add.d $t1, $t2, $t1
-; LA64-NEXT:    mul.d $t2, $a0, $a3
-; LA64-NEXT:    add.d $t0, $t2, $t0
-; LA64-NEXT:    sltu $t2, $t0, $t2
-; LA64-NEXT:    mulh.du $t3, $a0, $a3
-; LA64-NEXT:    add.d $t2, $t3, $t2
+; LA64-NEXT:    sltu $a7, $a5, $a7
+; LA64-NEXT:    mulh.du $t0, $a0, $a3
+; LA64-NEXT:    add.d $a7, $t0, $a7
+; LA64-NEXT:    add.d $a7, $a6, $a7
+; LA64-NEXT:    mul.d $t0, $a1, $a3
+; LA64-NEXT:    add.d $t1, $t0, $a7
+; LA64-NEXT:    srai.d $t2, $a1, 63
+; LA64-NEXT:    mul.d $t3, $a2, $t2
+; LA64-NEXT:    srai.d $t4, $a3, 63
+; LA64-NEXT:    mul.d $t5, $t4, $a0
+; LA64-NEXT:    add.d $t6, $t5, $t3
+; LA64-NEXT:    add.d $t7, $t1, $t6
+; LA64-NEXT:    sltu $t8, $t7, $t1
+; LA64-NEXT:    sltu $t0, $t1, $t0
+; LA64-NEXT:    sltu $a6, $a7, $a6
+; LA64-NEXT:    mulh.du $a7, $a1, $a3
 ; LA64-NEXT:    add.d $a6, $a7, $a6
-; LA64-NEXT:    sltu $a7, $a6, $a7
-; LA64-NEXT:    add.d $a5, $a5, $a7
+; LA64-NEXT:    add.d $a6, $a6, $t0
+; LA64-NEXT:    mulh.du $a7, $a2, $t2
+; LA64-NEXT:    add.d $a7, $a7, $t3
+; LA64-NEXT:    mul.d $a3, $a3, $t2
+; LA64-NEXT:    add.d $a3, $a7, $a3
+; LA64-NEXT:    mul.d $a1, $t4, $a1
+; LA64-NEXT:    mulh.du $a7, $t4, $a0
+; LA64-NEXT:    add.d $a1, $a7, $a1
+; LA64-NEXT:    add.d $a1, $a1, $t5
+; LA64-NEXT:    add.d $a1, $a1, $a3
+; LA64-NEXT:    sltu $a3, $t6, $t5
+; LA64-NEXT:    add.d $a1, $a1, $a3
+; LA64-NEXT:    add.d $a1, $a6, $a1
+; LA64-NEXT:    add.d $a1, $a1, $t8
+; LA64-NEXT:    srai.d $a3, $a5, 63
+; LA64-NEXT:    xor $a1, $a1, $a3
+; LA64-NEXT:    xor $a3, $t7, $a3
+; LA64-NEXT:    or $a1, $a3, $a1
+; LA64-NEXT:    sltu $a1, $zero, $a1
 ; LA64-NEXT:    mul.d $a0, $a0, $a2
-; LA64-NEXT:    mul.d $a2, $a1, $a3
-; LA64-NEXT:    mulh.du $a1, $a1, $a3
-; LA64-NEXT:    add.d $a3, $t1, $t2
-; LA64-NEXT:    sltu $a7, $a3, $t1
-; LA64-NEXT:    add.d $a1, $a1, $a7
 ; LA64-NEXT:    st.d $a0, $a4, 0
-; LA64-NEXT:    add.d $a0, $a2, $a3
-; LA64-NEXT:    sltu $a2, $a0, $a2
-; LA64-NEXT:    add.d $a1, $a1, $a2
-; LA64-NEXT:    st.d $t0, $a4, 8
-; LA64-NEXT:    add.d $a1, $a1, $a5
-; LA64-NEXT:    add.d $a2, $a0, $a6
-; LA64-NEXT:    sltu $a0, $a2, $a0
-; LA64-NEXT:    add.d $a0, $a1, $a0
-; LA64-NEXT:    srai.d $a1, $t0, 63
-; LA64-NEXT:    xor $a0, $a0, $a1
-; LA64-NEXT:    xor $a1, $a2, $a1
-; LA64-NEXT:    or $a0, $a1, $a0
-; LA64-NEXT:    sltu $a0, $zero, $a0
+; LA64-NEXT:    st.d $a5, $a4, 8
+; LA64-NEXT:    move $a0, $a1
 ; LA64-NEXT:    ret
   %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2)
   %val = extractvalue {i128, i1} %t, 0
diff --git a/llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll b/llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll
index d12cbaaabb9863..e6df401f0fef50 100644
--- a/llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll
+++ b/llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll
@@ -7,15 +7,15 @@ define i32 @fptosi_i32_fp128(fp128 %X) nounwind {
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    addi.w $sp, $sp, -32
 ; LA32-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT:    ld.w $a1, $a0, 12
-; LA32-NEXT:    st.w $a1, $sp, 20
-; LA32-NEXT:    ld.w $a1, $a0, 8
-; LA32-NEXT:    st.w $a1, $sp, 16
-; LA32-NEXT:    ld.w $a1, $a0, 4
-; LA32-NEXT:    st.w $a1, $sp, 12
-; LA32-NEXT:    ld.w $a0, $a0, 0
-; LA32-NEXT:    st.w $a0, $sp, 8
+; LA32-NEXT:    ld.w $a1, $a0, 0
+; LA32-NEXT:    ld.w $a2, $a0, 4
+; LA32-NEXT:    ld.w $a3, $a0, 8
+; LA32-NEXT:    ld.w $a0, $a0, 12
+; LA32-NEXT:    st.w $a0, $sp, 20
+; LA32-NEXT:    st.w $a3, $sp, 16
+; LA32-NEXT:    st.w $a2, $sp, 12
 ; LA32-NEXT:    addi.w $a0, $sp, 8
+; LA32-NEXT:    st.w $a1, $sp, 8
 ; LA32-NEXT:    bl %plt(__fixtfsi)
 ; LA32-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
 ; LA32-NEXT:    addi.w $sp, $sp, 32
@@ -83,15 +83,15 @@ define i64 @fptosi_i64_fp128(fp128 %X) nounwind {
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    addi.w $sp, $sp, -32
 ; LA32-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT:    ld.w $a1, $a0, 12
-; LA32-NEXT:    st.w $a1, $sp, 12
-; LA32-NEXT:    ld.w $a1, $a0, 8
-; LA32-NEXT:    st.w $a1, $sp, 8
-; LA32-NEXT:    ld.w $a1, $a0, 4
-; LA32-NEXT:    st.w $a1, $sp, 4
-; LA32-NEXT:    ld.w $a0, $a0, 0
-; LA32-NEXT:    st.w $a0, $sp, 0
+; LA32-NEXT:    ld.w $a1, $a0, 0
+; LA32-NEXT:    ld.w $a2, $a0, 4
+; LA32-NEXT:    ld.w $a3, $a0, 8
+; LA32-NEXT:    ld.w $a0, $a0, 12
+; LA32-NEXT:    st.w $a0, $sp, 12
+; LA32-NEXT:    st.w $a3, $sp, 8
+; LA32-NEXT:    st.w $a2, $sp, 4
 ; LA32-NEXT:    addi.w $a0, $sp, 0
+; LA32-NEXT:    st.w $a1, $sp, 0
 ; LA32-NEXT:    bl %plt(__fixtfdi)
 ; LA32-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
 ; LA32-NEXT:    addi.w $sp, $sp, 32
diff --git a/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll
index 092da5aba2d937..7a52697d152974 100644
--- a/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll
+++ b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll
@@ -19,13 +19,13 @@ define dso_local ptr @f(i32 noundef signext %i) "frame-pointer"="all" {
 ; CHECK-NEXT:    addi.d $fp, $sp, 48
 ; CHECK-NEXT:    .cfi_def_cfa 22, 0
 ; CHECK-NEXT:    st.d $ra, $fp, -40 # 8-byte Folded Spill
-; CHECK-NEXT:    move $a1, $a0
+; CHECK-NEXT:    # kill: def $r5 killed $r4
+; CHECK-NEXT:    st.w $a0, $fp, -28
 ; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(calls)
-; CHECK-NEXT:    addi.d $a3, $a0, %pc_lo12(calls)
-; CHECK-NEXT:    ld.w $a0, $a3, 0
-; CHECK-NEXT:    addi.d $a2, $a0, 1
-; CHECK-NEXT:    st.w $a2, $a3, 0
-; CHECK-NEXT:    st.w $a1, $fp, -28
+; CHECK-NEXT:    addi.d $a2, $a0, %pc_lo12(calls)
+; CHECK-NEXT:    ld.w $a0, $a2, 0
+; CHECK-NEXT:    addi.d $a1, $a0, 1
+; CHECK-NEXT:    st.w $a1, $a2, 0
 ; CHECK-NEXT:    bnez $a0, .LBB0_2
 ; CHECK-NEXT:    b .LBB0_1
 ; CHECK-NEXT:  .LBB0_1: # %if.then
diff --git a/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll b/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll
index 63407ad003f694..8dd95befb82786 100644
--- a/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll
+++ b/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll
@@ -8,74 +8,60 @@ declare void @foo()
 define i1 @load_store_fcc_reg(float %a, i1 %c) {
 ; LA32-LABEL: load_store_fcc_reg:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    addi.w $sp, $sp, -32
-; LA32-NEXT:    .cfi_def_cfa_offset 32
-; LA32-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT:    st.w $fp, $sp, 24 # 4-byte Folded Spill
-; LA32-NEXT:    fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA32-NEXT:    fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT:    addi.w $sp, $sp, -16
+; LA32-NEXT:    .cfi_def_cfa_offset 16
+; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT:    fst.d $fs0, $sp, 0 # 8-byte Folded Spill
 ; LA32-NEXT:    .cfi_offset 1, -4
 ; LA32-NEXT:    .cfi_offset 22, -8
 ; LA32-NEXT:    .cfi_offset 56, -16
-; LA32-NEXT:    .cfi_offset 57, -24
 ; LA32-NEXT:    move $fp, $a0
 ; LA32-NEXT:    fmov.s $fs0, $fa0
-; LA32-NEXT:    movgr2fr.w $fs1, $zero
-; LA32-NEXT:    fcmp.cult.s $fcc0, $fs1, $fa0
-; LA32-NEXT:    movcf2gr $a0, $fcc0
-; LA32-NEXT:    st.w $a0, $sp, 4
 ; LA32-NEXT:    bl %plt(foo)
-; LA32-NEXT:    ld.w $a0, $sp, 4
-; LA32-NEXT:    movgr2cf $fcc0, $a0
+; LA32-NEXT:    movgr2fr.w $fa0, $zero
+; LA32-NEXT:    fcmp.cult.s $fcc0, $fa0, $fs0
 ; LA32-NEXT:    bcnez $fcc0, .LBB0_2
 ; LA32-NEXT:  # %bb.1: # %if.then
 ; LA32-NEXT:    move $a0, $fp
 ; LA32-NEXT:    b .LBB0_3
 ; LA32-NEXT:  .LBB0_2: # %if.else
-; LA32-NEXT:    fcmp.cle.s $fcc0, $fs0, $fs1
+; LA32-NEXT:    fcmp.cle.s $fcc0, $fs0, $fa0
 ; LA32-NEXT:    movcf2gr $a0, $fcc0
 ; LA32-NEXT:  .LBB0_3: # %if.then
-; LA32-NEXT:    fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA32-NEXT:    fld.d $fs0, $sp, 16 # 8-byte Folded Reload
-; LA32-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
-; LA32-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 32
+; LA32-NEXT:    fld.d $fs0, $sp, 0 # 8-byte Folded Reload
+; LA32-NEXT:    ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 16
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: load_store_fcc_reg:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    addi.d $sp, $sp, -48
-; LA64-NEXT:    .cfi_def_cfa_offset 48
-; LA64-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64-NEXT:    fst.d $fs0, $sp, 24 # 8-byte Folded Spill
-; LA64-NEXT:    fst.d $fs1, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT:    addi.d $sp, $sp, -32
+; LA64-NEXT:    .cfi_def_cfa_offset 32
+; LA64-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT:    fst.d $fs0, $sp, 8 # 8-byte Folded Spill
 ; LA64-NEXT:    .cfi_offset 1, -8
 ; LA64-NEXT:    .cfi_offset 22, -16
 ; LA64-NEXT:    .cfi_offset 56, -24
-; LA64-NEXT:    .cfi_offset 57, -32
 ; LA64-NEXT:    move $fp, $a0
 ; LA64-NEXT:    fmov.s $fs0, $fa0
-; LA64-NEXT:    movgr2fr.w $fs1, $zero
-; LA64-NEXT:    fcmp.cult.s $fcc0, $fs1, $fa0
-; LA64-NEXT:    movcf2gr $a0, $fcc0
-; LA64-NEXT:    st.d $a0, $sp, 8
 ; LA64-NEXT:    bl %plt(foo)
-; LA64-NEXT:    ld.d $a0, $sp, 8
-; LA64-NEXT:    movgr2cf $fcc0, $a0
+; LA64-NEXT:    movgr2fr.w $fa0, $zero
+; LA64-NEXT:    fcmp.cult.s $fcc0, $fa0, $fs0
 ; LA64-NEXT:    bcnez $fcc0, .LBB0_2
 ; LA64-NEXT:  # %bb.1: # %if.then
 ; LA64-NEXT:    move $a0, $fp
 ; LA64-NEXT:    b .LBB0_3
 ; LA64-NEXT:  .LBB0_2: # %if.else
-; LA64-NEXT:    fcmp.cle.s $fcc0, $fs0, $fs1
+; LA64-NEXT:    fcmp.cle.s $fcc0, $fs0, $fa0
 ; LA64-NEXT:    movcf2gr $a0, $fcc0
 ; LA64-NEXT:  .LBB0_3: # %if.then
-; LA64-NEXT:    fld.d $fs1, $sp, 16 # 8-byte Folded Reload
-; LA64-NEXT:    fld.d $fs0, $sp, 24 # 8-byte Folded Reload
-; LA64-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 48
+; LA64-NEXT:    fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT:    addi.d $sp, $sp, 32
 ; LA64-NEXT:    ret
   %cmp = fcmp ole float %a, 0.000000e+00
   call void @foo()
diff --git a/llvm/test/CodeGen/LoongArch/tail-calls.ll b/llvm/test/CodeGen/LoongArch/tail-calls.ll
index 52bdd230816f5f..c22a65c77e702f 100644
--- a/llvm/test/CodeGen/LoongArch/tail-calls.ll
+++ b/llvm/test/CodeGen/LoongArch/tail-calls.ll
@@ -19,10 +19,12 @@ declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1)
 define void @caller_extern(ptr %src) optsize {
 ; CHECK-LABEL: caller_extern:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    move $a1, $a0
-; CHECK-NEXT:    pcalau12i $a0, %got_pc_hi20(dest)
-; CHECK-NEXT:    ld.d $a0, $a0, %got_pc_lo12(dest)
+; CHECK-NEXT:    pcalau12i $a1, %got_pc_hi20(dest)
+; CHECK-NEXT:    ld.d $a1, $a1, %got_pc_lo12(dest)
 ; CHECK-NEXT:    ori $a2, $zero, 33
+; CHECK-NEXT:    move $a3, $a0
+; CHECK-NEXT:    move $a0, $a1
+; CHECK-NEXT:    move $a1, $a3
 ; CHECK-NEXT:    b %plt(memcpy)
 entry:
   tail call void @llvm.memcpy.p0.p0.i32(ptr @dest, ptr %src, i32 33, i1 false)
@@ -35,13 +37,13 @@ declare void @callee_indirect2()
 define void @caller_indirect_tail(i32 %a) nounwind {
 ; CHECK-LABEL: caller_indirect_tail:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi.w $a0, $a0, 0
-; CHECK-NEXT:    sltui $a0, $a0, 1
 ; CHECK-NEXT:    pcalau12i $a1, %got_pc_hi20(callee_indirect2)
 ; CHECK-NEXT:    ld.d $a1, $a1, %got_pc_lo12(callee_indirect2)
-; CHECK-NEXT:    masknez $a1, $a1, $a0
 ; CHECK-NEXT:    pcalau12i $a2, %got_pc_hi20(callee_indirect1)
 ; CHECK-NEXT:    ld.d $a2, $a2, %got_pc_lo12(callee_indirect1)
+; CHECK-NEXT:    addi.w $a0, $a0, 0
+; CHECK-NEXT:    sltui $a0, $a0, 1
+; CHECK-NEXT:    masknez $a1, $a1, $a0
 ; CHECK-NEXT:    maskeqz $a0, $a2, $a0
 ; CHECK-NEXT:    or $a0, $a0, $a1
 ; CHECK-NEXT:    jr $a0
@@ -103,9 +105,9 @@ define void @caller_indirect_args() nounwind {
 ; CHECK-NEXT:    st.d $zero, $sp, 24
 ; CHECK-NEXT:    st.d $zero, $sp, 16
 ; CHECK-NEXT:    st.d $zero, $sp, 8
-; CHECK-NEXT:    ori $a0, $zero, 1
-; CHECK-NEXT:    st.d $a0, $sp, 0
+; CHECK-NEXT:    ori $a1, $zero, 1
 ; CHECK-NEXT:    addi.d $a0, $sp, 0
+; CHECK-NEXT:    st.d $a1, $sp, 0
 ; CHECK-NEXT:    bl %plt(callee_indirect_args)
 ; CHECK-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
 ; CHECK-NEXT:    addi.d $sp, $sp, 48
diff --git a/llvm/test/CodeGen/LoongArch/unaligned-access.ll b/llvm/test/CodeGen/LoongArch/unaligned-access.ll
index 871c17f06e968d..dd5b585fcca500 100644
--- a/llvm/test/CodeGen/LoongArch/unaligned-access.ll
+++ b/llvm/test/CodeGen/LoongArch/unaligned-access.ll
@@ -13,10 +13,10 @@
 define i32 @f0(ptr %p) nounwind {
 ; LA32-ALIGNED-LABEL: f0:
 ; LA32-ALIGNED:       # %bb.0:
-; LA32-ALIGNED-NEXT:    ld.hu $a1, $a0, 0
-; LA32-ALIGNED-NEXT:    ld.hu $a0, $a0, 2
-; LA32-ALIGNED-NEXT:    slli.w $a0, $a0, 16
-; LA32-ALIGNED-NEXT:    or $a0, $a0, $a1
+; LA32-ALIGNED-NEXT:    ld.hu $a1, $a0, 2
+; LA32-ALIGNED-NEXT:    ld.hu $a0, $a0, 0
+; LA32-ALIGNED-NEXT:    slli.w $a1, $a1, 16
+; LA32-ALIGNED-NEXT:    or $a0, $a1, $a0
 ; LA32-ALIGNED-NEXT:    ret
 ;
 ; LA32-UNALIGNED-LABEL: f0:
@@ -31,10 +31,10 @@ define i32 @f0(ptr %p) nounwind {
 ;
 ; LA64-ALIGNED-LABEL: f0:
 ; LA64-ALIGNED:       # %bb.0:
-; LA64-ALIGNED-NEXT:    ld.hu $a1, $a0, 0
-; LA64-ALIGNED-NEXT:    ld.h $a0, $a0, 2
-; LA64-ALIGNED-NEXT:    slli.d $a0, $a0, 16
-; LA64-ALIGNED-NEXT:    or $a0, $a0, $a1
+; LA64-ALIGNED-NEXT:    ld.h $a1, $a0, 2
+; LA64-ALIGNED-NEXT:    ld.hu $a0, $a0, 0
+; LA64-ALIGNED-NEXT:    slli.d $a1, $a1, 16
+; LA64-ALIGNED-NEXT:    or $a0, $a1, $a0
 ; LA64-ALIGNED-NEXT:    ret
   %tmp = load i32, ptr %p, align 2
   ret i32 %tmp
@@ -62,10 +62,10 @@ define i64 @f1(ptr %p) nounwind {
 ;
 ; LA64-ALIGNED-LABEL: f1:
 ; LA64-ALIGNED:       # %bb.0:
-; LA64-ALIGNED-NEXT:    ld.wu $a1, $a0, 0
-; LA64-ALIGNED-NEXT:    ld.wu $a0, $a0, 4
-; LA64-ALIGNED-NEXT:    slli.d $a0, $a0, 32
-; LA64-ALIGNED-NEXT:    or $a0, $a0, $a1
+; LA64-ALIGNED-NEXT:    ld.wu $a1, $a0, 4
+; LA64-ALIGNED-NEXT:    ld.wu $a0, $a0, 0
+; LA64-ALIGNED-NEXT:    slli.d $a1, $a1, 32
+; LA64-ALIGNED-NEXT:    or $a0, $a1, $a0
 ; LA64-ALIGNED-NEXT:    ret
   %tmp = load i64, ptr %p, align 4
   ret i64 %tmp
diff --git a/llvm/test/CodeGen/LoongArch/vararg.ll b/llvm/test/CodeGen/LoongArch/vararg.ll
index a377628c3d7864..112459ab1f4d4c 100644
--- a/llvm/test/CodeGen/LoongArch/vararg.ll
+++ b/llvm/test/CodeGen/LoongArch/vararg.ll
@@ -22,9 +22,9 @@ define i64 @va1(ptr %fmt, ...) {
 ; LA64-FPELIM-NEXT:    st.d $a4, $sp, 48
 ; LA64-FPELIM-NEXT:    st.d $a3, $sp, 40
 ; LA64-FPELIM-NEXT:    st.d $a2, $sp, 32
+; LA64-FPELIM-NEXT:    st.d $a1, $sp, 24
 ; LA64-FPELIM-NEXT:    addi.d $a1, $sp, 32
 ; LA64-FPELIM-NEXT:    st.d $a1, $sp, 8
-; LA64-FPELIM-NEXT:    st.d $a0, $sp, 24
 ; LA64-FPELIM-NEXT:    addi.d $sp, $sp, 80
 ; LA64-FPELIM-NEXT:    ret
 ;
@@ -45,9 +45,9 @@ define i64 @va1(ptr %fmt, ...) {
 ; LA64-WITHFP-NEXT:    st.d $a4, $fp, 32
 ; LA64-WITHFP-NEXT:    st.d $a3, $fp, 24
 ; LA64-WITHFP-NEXT:    st.d $a2, $fp, 16
+; LA64-WITHFP-NEXT:    st.d $a1, $fp, 8
 ; LA64-WITHFP-NEXT:    addi.d $a1, $fp, 16
 ; LA64-WITHFP-NEXT:    st.d $a1, $fp, -24
-; LA64-WITHFP-NEXT:    st.d $a0, $fp, 8
 ; LA64-WITHFP-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
 ; LA64-WITHFP-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
 ; LA64-WITHFP-NEXT:    addi.d $sp, $sp, 96
@@ -73,9 +73,9 @@ define i64 @va1_va_arg(ptr %fmt, ...) nounwind {
 ; LA64-FPELIM-NEXT:    st.d $a4, $sp, 48
 ; LA64-FPELIM-NEXT:    st.d $a3, $sp, 40
 ; LA64-FPELIM-NEXT:    st.d $a2, $sp, 32
+; LA64-FPELIM-NEXT:    st.d $a1, $sp, 24
 ; LA64-FPELIM-NEXT:    addi.d $a1, $sp, 32
 ; LA64-FPELIM-NEXT:    st.d $a1, $sp, 8
-; LA64-FPELIM-NEXT:    st.d $a0, $sp, 24
 ; LA64-FPELIM-NEXT:    addi.d $sp, $sp, 80
 ; LA64-FPELIM-NEXT:    ret
 ;
@@ -92,9 +92,9 @@ define i64 @va1_va_arg(ptr %fmt, ...) nounwind {
 ; LA64-WITHFP-NEXT:    st.d $a4, $fp, 32
 ; LA64-WITHFP-NEXT:    st.d $a3, $fp, 24
 ; LA64-WITHFP-NEXT:    st.d $a2, $fp, 16
+; LA64-WITHFP-NEXT:    st.d $a1, $fp, 8
 ; LA64-WITHFP-NEXT:    addi.d $a1, $fp, 16
 ; LA64-WITHFP-NEXT:    st.d $a1, $fp, -24
-; LA64-WITHFP-NEXT:    st.d $a0, $fp, 8
 ; LA64-WITHFP-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
 ; LA64-WITHFP-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
 ; LA64-WITHFP-NEXT:    addi.d $sp, $sp, 96
@@ -124,9 +124,9 @@ define i64 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
 ; LA64-FPELIM-NEXT:    st.d $a4, $fp, 32
 ; LA64-FPELIM-NEXT:    st.d $a3, $fp, 24
 ; LA64-FPELIM-NEXT:    st.d $a2, $fp, 16
+; LA64-FPELIM-NEXT:    st.d $a1, $fp, 8
 ; LA64-FPELIM-NEXT:    addi.d $a0, $fp, 16
 ; LA64-FPELIM-NEXT:    st.d $a0, $fp, -32
-; LA64-FPELIM-NEXT:    st.d $a1, $fp, 8
 ; LA64-FPELIM-NEXT:    addi.d $a0, $a1, 15
 ; LA64-FPELIM-NEXT:    bstrins.d $a0, $zero, 3, 0
 ; LA64-FPELIM-NEXT:    sub.d $a0, $sp, $a0
@@ -154,9 +154,9 @@ define i64 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
 ; LA64-WITHFP-NEXT:    st.d $a4, $fp, 32
 ; LA64-WITHFP-NEXT:    st.d $a3, $fp, 24
 ; LA64-WITHFP-NEXT:    st.d $a2, $fp, 16
+; LA64-WITHFP-NEXT:    st.d $a1, $fp, 8
 ; LA64-WITHFP-NEXT:    addi.d $a0, $fp, 16
 ; LA64-WITHFP-NEXT:    st.d $a0, $fp, -32
-; LA64-WITHFP-NEXT:    st.d $a1, $fp, 8
 ; LA64-WITHFP-NEXT:    addi.d $a0, $a1, 15
 ; LA64-WITHFP-NEXT:    bstrins.d $a0, $zero, 3, 0
 ; LA64-WITHFP-NEXT:    sub.d $a0, $sp, $a0
@@ -288,16 +288,16 @@ define void @va_aligned_stack_caller() nounwind {
 ; LA64-FPELIM-NEXT:    lu32i.d $a0, 335544
 ; LA64-FPELIM-NEXT:    lu52i.d $a0, $a0, -328
 ; LA64-FPELIM-NEXT:    st.d $a0, $sp, 16
-; LA64-FPELIM-NEXT:    ori $a0, $zero, 1000
-; LA64-FPELIM-NEXT:    st.d $a0, $sp, 64
 ; LA64-FPELIM-NEXT:    st.d $zero, $sp, 88
 ; LA64-FPELIM-NEXT:    st.d $zero, $sp, 80
 ; LA64-FPELIM-NEXT:    st.d $zero, $sp, 72
+; LA64-FPELIM-NEXT:    ori $a5, $zero, 1000
 ; LA64-FPELIM-NEXT:    ori $a1, $zero, 11
 ; LA64-FPELIM-NEXT:    addi.d $a2, $sp, 64
 ; LA64-FPELIM-NEXT:    ori $a3, $zero, 12
 ; LA64-FPELIM-NEXT:    ori $a4, $zero, 13
 ; LA64-FPELIM-NEXT:    ori $a0, $zero, 1
+; LA64-FPELIM-NEXT:    st.d $a5, $sp, 64
 ; LA64-FPELIM-NEXT:    move $a6, $zero
 ; LA64-FPELIM-NEXT:    move $a7, $a0
 ; LA64-FPELIM-NEXT:    bl %plt(va_aligned_stack_callee)
@@ -329,16 +329,16 @@ define void @va_aligned_stack_caller() nounwind {
 ; LA64-WITHFP-NEXT:    lu32i.d $a0, 335544
 ; LA64-WITHFP-NEXT:    lu52i.d $a0, $a0, -328
 ; LA64-WITHFP-NEXT:    st.d $a0, $sp, 16
-; LA64-WITHFP-NEXT:    ori $a0, $zero, 1000
-; LA64-WITHFP-NEXT:    st.d $a0, $fp, -48
 ; LA64-WITHFP-NEXT:    st.d $zero, $fp, -24
 ; LA64-WITHFP-NEXT:    st.d $zero, $fp, -32
 ; LA64-WITHFP-NEXT:    st.d $zero, $fp, -40
+; LA64-WITHFP-NEXT:    ori $a5, $zero, 1000
 ; LA64-WITHFP-NEXT:    ori $a1, $zero, 11
 ; LA64-WITHFP-NEXT:    addi.d $a2, $fp, -48
 ; LA64-WITHFP-NEXT:    ori $a3, $zero, 12
 ; LA64-WITHFP-NEXT:    ori $a4, $zero, 13
 ; LA64-WITHFP-NEXT:    ori $a0, $zero, 1
+; LA64-WITHFP-NEXT:    st.d $a5, $fp, -48
 ; LA64-WITHFP-NEXT:    move $a6, $zero
 ; LA64-WITHFP-NEXT:    move $a7, $a0
 ; LA64-WITHFP-NEXT:    bl %plt(va_aligned_stack_callee)
diff --git a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
index 8009866d3953e1..d03af114bceefe 100644
--- a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
@@ -17,69 +17,69 @@ define void @test_zero(ptr %P, ptr %S) nounwind {
 ; LA32F-LABEL: test_zero:
 ; LA32F:       # %bb.0:
 ; LA32F-NEXT:    fld.s $fa0, $a0, 12
-; LA32F-NEXT:    movgr2fr.w $fa1, $zero
-; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    fld.s $fa1, $a0, 0
+; LA32F-NEXT:    fld.s $fa2, $a0, 4
+; LA32F-NEXT:    fld.s $fa3, $a0, 8
+; LA32F-NEXT:    movgr2fr.w $fa4, $zero
+; LA32F-NEXT:    fadd.s $fa1, $fa1, $fa4
+; LA32F-NEXT:    fadd.s $fa2, $fa2, $fa4
+; LA32F-NEXT:    fadd.s $fa3, $fa3, $fa4
+; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa4
 ; LA32F-NEXT:    fst.s $fa0, $a1, 12
-; LA32F-NEXT:    fld.s $fa0, $a0, 8
-; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA32F-NEXT:    fst.s $fa0, $a1, 8
-; LA32F-NEXT:    fld.s $fa0, $a0, 4
-; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA32F-NEXT:    fst.s $fa0, $a1, 4
-; LA32F-NEXT:    fld.s $fa0, $a0, 0
-; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA32F-NEXT:    fst.s $fa0, $a1, 0
+; LA32F-NEXT:    fst.s $fa3, $a1, 8
+; LA32F-NEXT:    fst.s $fa2, $a1, 4
+; LA32F-NEXT:    fst.s $fa1, $a1, 0
 ; LA32F-NEXT:    ret
 ;
 ; LA32D-LABEL: test_zero:
 ; LA32D:       # %bb.0:
 ; LA32D-NEXT:    fld.s $fa0, $a0, 12
-; LA32D-NEXT:    movgr2fr.w $fa1, $zero
-; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    fld.s $fa1, $a0, 0
+; LA32D-NEXT:    fld.s $fa2, $a0, 4
+; LA32D-NEXT:    fld.s $fa3, $a0, 8
+; LA32D-NEXT:    movgr2fr.w $fa4, $zero
+; LA32D-NEXT:    fadd.s $fa1, $fa1, $fa4
+; LA32D-NEXT:    fadd.s $fa2, $fa2, $fa4
+; LA32D-NEXT:    fadd.s $fa3, $fa3, $fa4
+; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa4
 ; LA32D-NEXT:    fst.s $fa0, $a1, 12
-; LA32D-NEXT:    fld.s $fa0, $a0, 8
-; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA32D-NEXT:    fst.s $fa0, $a1, 8
-; LA32D-NEXT:    fld.s $fa0, $a0, 4
-; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA32D-NEXT:    fst.s $fa0, $a1, 4
-; LA32D-NEXT:    fld.s $fa0, $a0, 0
-; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA32D-NEXT:    fst.s $fa0, $a1, 0
+; LA32D-NEXT:    fst.s $fa3, $a1, 8
+; LA32D-NEXT:    fst.s $fa2, $a1, 4
+; LA32D-NEXT:    fst.s $fa1, $a1, 0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: test_zero:
 ; LA64F:       # %bb.0:
 ; LA64F-NEXT:    fld.s $fa0, $a0, 12
-; LA64F-NEXT:    movgr2fr.w $fa1, $zero
-; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    fld.s $fa1, $a0, 0
+; LA64F-NEXT:    fld.s $fa2, $a0, 4
+; LA64F-NEXT:    fld.s $fa3, $a0, 8
+; LA64F-NEXT:    movgr2fr.w $fa4, $zero
+; LA64F-NEXT:    fadd.s $fa1, $fa1, $fa4
+; LA64F-NEXT:    fadd.s $fa2, $fa2, $fa4
+; LA64F-NEXT:    fadd.s $fa3, $fa3, $fa4
+; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa4
 ; LA64F-NEXT:    fst.s $fa0, $a1, 12
-; LA64F-NEXT:    fld.s $fa0, $a0, 8
-; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA64F-NEXT:    fst.s $fa0, $a1, 8
-; LA64F-NEXT:    fld.s $fa0, $a0, 4
-; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA64F-NEXT:    fst.s $fa0, $a1, 4
-; LA64F-NEXT:    fld.s $fa0, $a0, 0
-; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA64F-NEXT:    fst.s $fa0, $a1, 0
+; LA64F-NEXT:    fst.s $fa3, $a1, 8
+; LA64F-NEXT:    fst.s $fa2, $a1, 4
+; LA64F-NEXT:    fst.s $fa1, $a1, 0
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: test_zero:
 ; LA64D:       # %bb.0:
 ; LA64D-NEXT:    fld.s $fa0, $a0, 12
-; LA64D-NEXT:    movgr2fr.w $fa1, $zero
-; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    fld.s $fa1, $a0, 0
+; LA64D-NEXT:    fld.s $fa2, $a0, 4
+; LA64D-NEXT:    fld.s $fa3, $a0, 8
+; LA64D-NEXT:    movgr2fr.w $fa4, $zero
+; LA64D-NEXT:    fadd.s $fa1, $fa1, $fa4
+; LA64D-NEXT:    fadd.s $fa2, $fa2, $fa4
+; LA64D-NEXT:    fadd.s $fa3, $fa3, $fa4
+; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa4
 ; LA64D-NEXT:    fst.s $fa0, $a1, 12
-; LA64D-NEXT:    fld.s $fa0, $a0, 8
-; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA64D-NEXT:    fst.s $fa0, $a1, 8
-; LA64D-NEXT:    fld.s $fa0, $a0, 4
-; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA64D-NEXT:    fst.s $fa0, $a1, 4
-; LA64D-NEXT:    fld.s $fa0, $a0, 0
-; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA64D-NEXT:    fst.s $fa0, $a1, 0
+; LA64D-NEXT:    fst.s $fa3, $a1, 8
+; LA64D-NEXT:    fst.s $fa2, $a1, 4
+; LA64D-NEXT:    fst.s $fa1, $a1, 0
 ; LA64D-NEXT:    ret
   %p = load %f4, ptr %P
   %R = fadd %f4 %p, zeroinitializer
@@ -90,66 +90,66 @@ define void @test_zero(ptr %P, ptr %S) nounwind {
 define void @test_f2(ptr %P, ptr %S) nounwind {
 ; LA32F-LABEL: test_f2:
 ; LA32F:       # %bb.0:
+; LA32F-NEXT:    fld.s $fa0, $a0, 4
+; LA32F-NEXT:    fld.s $fa1, $a0, 0
+; LA32F-NEXT:    addi.w $a0, $zero, 1
 ; LA32F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI1_0)
 ; LA32F-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI1_0)
-; LA32F-NEXT:    fld.s $fa0, $a2, 0
-; LA32F-NEXT:    fld.s $fa1, $a0, 4
-; LA32F-NEXT:    fadd.s $fa0, $fa1, $fa0
+; LA32F-NEXT:    fld.s $fa2, $a2, 0
+; LA32F-NEXT:    movgr2fr.w $fa3, $a0
+; LA32F-NEXT:    ffint.s.w $fa3, $fa3
+; LA32F-NEXT:    fadd.s $fa1, $fa1, $fa3
+; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa2
 ; LA32F-NEXT:    fst.s $fa0, $a1, 4
-; LA32F-NEXT:    fld.s $fa0, $a0, 0
-; LA32F-NEXT:    addi.w $a0, $zero, 1
-; LA32F-NEXT:    movgr2fr.w $fa1, $a0
-; LA32F-NEXT:    ffint.s.w $fa1, $fa1
-; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA32F-NEXT:    fst.s $fa0, $a1, 0
+; LA32F-NEXT:    fst.s $fa1, $a1, 0
 ; LA32F-NEXT:    ret
 ;
 ; LA32D-LABEL: test_f2:
 ; LA32D:       # %bb.0:
+; LA32D-NEXT:    fld.s $fa0, $a0, 4
+; LA32D-NEXT:    fld.s $fa1, $a0, 0
+; LA32D-NEXT:    addi.w $a0, $zero, 1
 ; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI1_0)
 ; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI1_0)
-; LA32D-NEXT:    fld.s $fa0, $a2, 0
-; LA32D-NEXT:    fld.s $fa1, $a0, 4
-; LA32D-NEXT:    fadd.s $fa0, $fa1, $fa0
+; LA32D-NEXT:    fld.s $fa2, $a2, 0
+; LA32D-NEXT:    movgr2fr.w $fa3, $a0
+; LA32D-NEXT:    ffint.s.w $fa3, $fa3
+; LA32D-NEXT:    fadd.s $fa1, $fa1, $fa3
+; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa2
 ; LA32D-NEXT:    fst.s $fa0, $a1, 4
-; LA32D-NEXT:    fld.s $fa0, $a0, 0
-; LA32D-NEXT:    addi.w $a0, $zero, 1
-; LA32D-NEXT:    movgr2fr.w $fa1, $a0
-; LA32D-NEXT:    ffint.s.w $fa1, $fa1
-; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA32D-NEXT:    fst.s $fa0, $a1, 0
+; LA32D-NEXT:    fst.s $fa1, $a1, 0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: test_f2:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI1_0)
-; LA64F-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI1_0)
-; LA64F-NEXT:    fld.s $fa0, $a2, 0
-; LA64F-NEXT:    fld.s $fa1, $a0, 4
-; LA64F-NEXT:    fadd.s $fa0, $fa1, $fa0
-; LA64F-NEXT:    fst.s $fa0, $a1, 4
-; LA64F-NEXT:    fld.s $fa0, $a0, 0
+; LA64F-NEXT:    fld.s $fa0, $a0, 4
+; LA64F-NEXT:    fld.s $fa1, $a0, 0
+; LA64F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI1_0)
+; LA64F-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI1_0)
+; LA64F-NEXT:    fld.s $fa2, $a0, 0
 ; LA64F-NEXT:    addi.w $a0, $zero, 1
-; LA64F-NEXT:    movgr2fr.w $fa1, $a0
-; LA64F-NEXT:    ffint.s.w $fa1, $fa1
-; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA64F-NEXT:    fst.s $fa0, $a1, 0
+; LA64F-NEXT:    movgr2fr.w $fa3, $a0
+; LA64F-NEXT:    ffint.s.w $fa3, $fa3
+; LA64F-NEXT:    fadd.s $fa1, $fa1, $fa3
+; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa2
+; LA64F-NEXT:    fst.s $fa0, $a1, 4
+; LA64F-NEXT:    fst.s $fa1, $a1, 0
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: test_f2:
 ; LA64D:       # %bb.0:
-; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI1_0)
-; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI1_0)
-; LA64D-NEXT:    fld.s $fa0, $a2, 0
-; LA64D-NEXT:    fld.s $fa1, $a0, 4
-; LA64D-NEXT:    fadd.s $fa0, $fa1, $fa0
-; LA64D-NEXT:    fst.s $fa0, $a1, 4
-; LA64D-NEXT:    fld.s $fa0, $a0, 0
+; LA64D-NEXT:    fld.s $fa0, $a0, 4
+; LA64D-NEXT:    fld.s $fa1, $a0, 0
+; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI1_0)
+; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI1_0)
+; LA64D-NEXT:    fld.s $fa2, $a0, 0
 ; LA64D-NEXT:    addi.w $a0, $zero, 1
-; LA64D-NEXT:    movgr2fr.w $fa1, $a0
-; LA64D-NEXT:    ffint.s.w $fa1, $fa1
-; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA64D-NEXT:    fst.s $fa0, $a1, 0
+; LA64D-NEXT:    movgr2fr.w $fa3, $a0
+; LA64D-NEXT:    ffint.s.w $fa3, $fa3
+; LA64D-NEXT:    fadd.s $fa1, $fa1, $fa3
+; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa2
+; LA64D-NEXT:    fst.s $fa0, $a1, 4
+; LA64D-NEXT:    fst.s $fa1, $a1, 0
 ; LA64D-NEXT:    ret
   %p = load %f2, ptr %P
   %R = fadd %f2 %p, < float 1.000000e+00, float 2.000000e+00 >
@@ -160,114 +160,114 @@ define void @test_f2(ptr %P, ptr %S) nounwind {
 define void @test_f4(ptr %P, ptr %S) nounwind {
 ; LA32F-LABEL: test_f4:
 ; LA32F:       # %bb.0:
-; LA32F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_0)
-; LA32F-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI2_0)
-; LA32F-NEXT:    fld.s $fa0, $a2, 0
-; LA32F-NEXT:    fld.s $fa1, $a0, 4
-; LA32F-NEXT:    fadd.s $fa0, $fa1, $fa0
-; LA32F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_1)
-; LA32F-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI2_1)
-; LA32F-NEXT:    fld.s $fa1, $a2, 0
-; LA32F-NEXT:    fld.s $fa2, $a0, 8
-; LA32F-NEXT:    fadd.s $fa1, $fa2, $fa1
-; LA32F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_2)
-; LA32F-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI2_2)
-; LA32F-NEXT:    fld.s $fa2, $a2, 0
-; LA32F-NEXT:    fld.s $fa3, $a0, 12
-; LA32F-NEXT:    fadd.s $fa2, $fa3, $fa2
-; LA32F-NEXT:    fst.s $fa2, $a1, 12
-; LA32F-NEXT:    fst.s $fa1, $a1, 8
-; LA32F-NEXT:    fst.s $fa0, $a1, 4
-; LA32F-NEXT:    fld.s $fa0, $a0, 0
+; LA32F-NEXT:    fld.s $fa0, $a0, 12
+; LA32F-NEXT:    fld.s $fa1, $a0, 8
+; LA32F-NEXT:    fld.s $fa2, $a0, 4
+; LA32F-NEXT:    fld.s $fa3, $a0, 0
 ; LA32F-NEXT:    addi.w $a0, $zero, 1
-; LA32F-NEXT:    movgr2fr.w $fa1, $a0
-; LA32F-NEXT:    ffint.s.w $fa1, $fa1
-; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA32F-NEXT:    fst.s $fa0, $a1, 0
+; LA32F-NEXT:    movgr2fr.w $fa4, $a0
+; LA32F-NEXT:    ffint.s.w $fa4, $fa4
+; LA32F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_0)
+; LA32F-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI2_0)
+; LA32F-NEXT:    fld.s $fa5, $a0, 0
+; LA32F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_1)
+; LA32F-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI2_1)
+; LA32F-NEXT:    fld.s $fa6, $a0, 0
+; LA32F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_2)
+; LA32F-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI2_2)
+; LA32F-NEXT:    fld.s $fa7, $a0, 0
+; LA32F-NEXT:    fadd.s $fa3, $fa3, $fa4
+; LA32F-NEXT:    fadd.s $fa2, $fa2, $fa5
+; LA32F-NEXT:    fadd.s $fa1, $fa1, $fa6
+; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa7
+; LA32F-NEXT:    fst.s $fa0, $a1, 12
+; LA32F-NEXT:    fst.s $fa1, $a1, 8
+; LA32F-NEXT:    fst.s $fa2, $a1, 4
+; LA32F-NEXT:    fst.s $fa3, $a1, 0
 ; LA32F-NEXT:    ret
 ;
 ; LA32D-LABEL: test_f4:
 ; LA32D:       # %bb.0:
-; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_0)
-; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI2_0)
-; LA32D-NEXT:    fld.s $fa0, $a2, 0
-; LA32D-NEXT:    fld.s $fa1, $a0, 4
-; LA32D-NEXT:    fadd.s $fa0, $fa1, $fa0
-; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_1)
-; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI2_1)
-; LA32D-NEXT:    fld.s $fa1, $a2, 0
-; LA32D-NEXT:    fld.s $fa2, $a0, 8
-; LA32D-NEXT:    fadd.s $fa1, $fa2, $fa1
-; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_2)
-; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI2_2)
-; LA32D-NEXT:    fld.s $fa2, $a2, 0
-; LA32D-NEXT:    fld.s $fa3, $a0, 12
-; LA32D-NEXT:    fadd.s $fa2, $fa3, $fa2
-; LA32D-NEXT:    fst.s $fa2, $a1, 12
-; LA32D-NEXT:    fst.s $fa1, $a1, 8
-; LA32D-NEXT:    fst.s $fa0, $a1, 4
-; LA32D-NEXT:    fld.s $fa0, $a0, 0
+; LA32D-NEXT:    fld.s $fa0, $a0, 12
+; LA32D-NEXT:    fld.s $fa1, $a0, 8
+; LA32D-NEXT:    fld.s $fa2, $a0, 4
+; LA32D-NEXT:    fld.s $fa3, $a0, 0
 ; LA32D-NEXT:    addi.w $a0, $zero, 1
-; LA32D-NEXT:    movgr2fr.w $fa1, $a0
-; LA32D-NEXT:    ffint.s.w $fa1, $fa1
-; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA32D-NEXT:    fst.s $fa0, $a1, 0
+; LA32D-NEXT:    movgr2fr.w $fa4, $a0
+; LA32D-NEXT:    ffint.s.w $fa4, $fa4
+; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_0)
+; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI2_0)
+; LA32D-NEXT:    fld.s $fa5, $a0, 0
+; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_1)
+; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI2_1)
+; LA32D-NEXT:    fld.s $fa6, $a0, 0
+; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_2)
+; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI2_2)
+; LA32D-NEXT:    fld.s $fa7, $a0, 0
+; LA32D-NEXT:    fadd.s $fa3, $fa3, $fa4
+; LA32D-NEXT:    fadd.s $fa2, $fa2, $fa5
+; LA32D-NEXT:    fadd.s $fa1, $fa1, $fa6
+; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa7
+; LA32D-NEXT:    fst.s $fa0, $a1, 12
+; LA32D-NEXT:    fst.s $fa1, $a1, 8
+; LA32D-NEXT:    fst.s $fa2, $a1, 4
+; LA32D-NEXT:    fst.s $fa3, $a1, 0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: test_f4:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_0)
-; LA64F-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI2_0)
-; LA64F-NEXT:    fld.s $fa0, $a2, 0
-; LA64F-NEXT:    fld.s $fa1, $a0, 4
-; LA64F-NEXT:    fadd.s $fa0, $fa1, $fa0
-; LA64F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_1)
-; LA64F-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI2_1)
-; LA64F-NEXT:    fld.s $fa1, $a2, 0
-; LA64F-NEXT:    fld.s $fa2, $a0, 8
-; LA64F-NEXT:    fadd.s $fa1, $fa2, $fa1
-; LA64F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_2)
-; LA64F-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI2_2)
-; LA64F-NEXT:    fld.s $fa2, $a2, 0
-; LA64F-NEXT:    fld.s $fa3, $a0, 12
-; LA64F-NEXT:    fadd.s $fa2, $fa3, $fa2
-; LA64F-NEXT:    fst.s $fa2, $a1, 12
-; LA64F-NEXT:    fst.s $fa1, $a1, 8
-; LA64F-NEXT:    fst.s $fa0, $a1, 4
-; LA64F-NEXT:    fld.s $fa0, $a0, 0
+; LA64F-NEXT:    fld.s $fa0, $a0, 12
+; LA64F-NEXT:    fld.s $fa1, $a0, 8
+; LA64F-NEXT:    fld.s $fa2, $a0, 4
+; LA64F-NEXT:    fld.s $fa3, $a0, 0
 ; LA64F-NEXT:    addi.w $a0, $zero, 1
-; LA64F-NEXT:    movgr2fr.w $fa1, $a0
-; LA64F-NEXT:    ffint.s.w $fa1, $fa1
-; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA64F-NEXT:    fst.s $fa0, $a1, 0
+; LA64F-NEXT:    movgr2fr.w $fa4, $a0
+; LA64F-NEXT:    ffint.s.w $fa4, $fa4
+; LA64F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_0)
+; LA64F-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI2_0)
+; LA64F-NEXT:    fld.s $fa5, $a0, 0
+; LA64F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_1)
+; LA64F-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI2_1)
+; LA64F-NEXT:    fld.s $fa6, $a0, 0
+; LA64F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_2)
+; LA64F-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI2_2)
+; LA64F-NEXT:    fld.s $fa7, $a0, 0
+; LA64F-NEXT:    fadd.s $fa3, $fa3, $fa4
+; LA64F-NEXT:    fadd.s $fa2, $fa2, $fa5
+; LA64F-NEXT:    fadd.s $fa1, $fa1, $fa6
+; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa7
+; LA64F-NEXT:    fst.s $fa0, $a1, 12
+; LA64F-NEXT:    fst.s $fa1, $a1, 8
+; LA64F-NEXT:    fst.s $fa2, $a1, 4
+; LA64F-NEXT:    fst.s $fa3, $a1, 0
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: test_f4:
 ; LA64D:       # %bb.0:
-; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_0)
-; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI2_0)
-; LA64D-NEXT:    fld.s $fa0, $a2, 0
-; LA64D-NEXT:    fld.s $fa1, $a0, 4
-; LA64D-NEXT:    fadd.s $fa0, $fa1, $fa0
-; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_1)
-; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI2_1)
-; LA64D-NEXT:    fld.s $fa1, $a2, 0
-; LA64D-NEXT:    fld.s $fa2, $a0, 8
-; LA64D-NEXT:    fadd.s $fa1, $fa2, $fa1
-; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_2)
-; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI2_2)
-; LA64D-NEXT:    fld.s $fa2, $a2, 0
-; LA64D-NEXT:    fld.s $fa3, $a0, 12
-; LA64D-NEXT:    fadd.s $fa2, $fa3, $fa2
-; LA64D-NEXT:    fst.s $fa2, $a1, 12
-; LA64D-NEXT:    fst.s $fa1, $a1, 8
-; LA64D-NEXT:    fst.s $fa0, $a1, 4
-; LA64D-NEXT:    fld.s $fa0, $a0, 0
+; LA64D-NEXT:    fld.s $fa0, $a0, 12
+; LA64D-NEXT:    fld.s $fa1, $a0, 8
+; LA64D-NEXT:    fld.s $fa2, $a0, 4
+; LA64D-NEXT:    fld.s $fa3, $a0, 0
 ; LA64D-NEXT:    addi.w $a0, $zero, 1
-; LA64D-NEXT:    movgr2fr.w $fa1, $a0
-; LA64D-NEXT:    ffint.s.w $fa1, $fa1
-; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa1
-; LA64D-NEXT:    fst.s $fa0, $a1, 0
+; LA64D-NEXT:    movgr2fr.w $fa4, $a0
+; LA64D-NEXT:    ffint.s.w $fa4, $fa4
+; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_0)
+; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI2_0)
+; LA64D-NEXT:    fld.s $fa5, $a0, 0
+; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_1)
+; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI2_1)
+; LA64D-NEXT:    fld.s $fa6, $a0, 0
+; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_2)
+; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI2_2)
+; LA64D-NEXT:    fld.s $fa7, $a0, 0
+; LA64D-NEXT:    fadd.s $fa3, $fa3, $fa4
+; LA64D-NEXT:    fadd.s $fa2, $fa2, $fa5
+; LA64D-NEXT:    fadd.s $fa1, $fa1, $fa6
+; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa7
+; LA64D-NEXT:    fst.s $fa0, $a1, 12
+; LA64D-NEXT:    fst.s $fa1, $a1, 8
+; LA64D-NEXT:    fst.s $fa2, $a1, 4
+; LA64D-NEXT:    fst.s $fa3, $a1, 0
 ; LA64D-NEXT:    ret
   %p = load %f4, ptr %P
   %R = fadd %f4 %p, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
@@ -278,162 +278,162 @@ define void @test_f4(ptr %P, ptr %S) nounwind {
 define void @test_f8(ptr %P, ptr %S) nounwind {
 ; LA32F-LABEL: test_f8:
 ; LA32F:       # %bb.0:
+; LA32F-NEXT:    addi.w $a2, $zero, 1
+; LA32F-NEXT:    movgr2fr.w $fa0, $a2
 ; LA32F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_0)
 ; LA32F-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI3_0)
-; LA32F-NEXT:    fld.s $fa0, $a2, 0
-; LA32F-NEXT:    fld.s $fa1, $a0, 4
-; LA32F-NEXT:    fadd.s $fa1, $fa1, $fa0
-; LA32F-NEXT:    fld.s $fa2, $a0, 20
-; LA32F-NEXT:    fadd.s $fa0, $fa2, $fa0
+; LA32F-NEXT:    fld.s $fa1, $a2, 0
 ; LA32F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_1)
 ; LA32F-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI3_1)
 ; LA32F-NEXT:    fld.s $fa2, $a2, 0
-; LA32F-NEXT:    fld.s $fa3, $a0, 8
-; LA32F-NEXT:    fadd.s $fa3, $fa3, $fa2
-; LA32F-NEXT:    fld.s $fa4, $a0, 24
-; LA32F-NEXT:    fadd.s $fa2, $fa4, $fa2
 ; LA32F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_2)
 ; LA32F-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI3_2)
-; LA32F-NEXT:    fld.s $fa4, $a2, 0
-; LA32F-NEXT:    fld.s $fa5, $a0, 12
-; LA32F-NEXT:    fadd.s $fa5, $fa5, $fa4
-; LA32F-NEXT:    fld.s $fa6, $a0, 28
-; LA32F-NEXT:    fadd.s $fa4, $fa6, $fa4
-; LA32F-NEXT:    fst.s $fa4, $a1, 28
-; LA32F-NEXT:    fst.s $fa2, $a1, 24
-; LA32F-NEXT:    fst.s $fa0, $a1, 20
-; LA32F-NEXT:    fst.s $fa5, $a1, 12
-; LA32F-NEXT:    fst.s $fa3, $a1, 8
-; LA32F-NEXT:    fst.s $fa1, $a1, 4
-; LA32F-NEXT:    addi.w $a2, $zero, 1
-; LA32F-NEXT:    movgr2fr.w $fa0, $a2
+; LA32F-NEXT:    fld.s $fa3, $a2, 0
+; LA32F-NEXT:    fld.s $fa4, $a0, 28
+; LA32F-NEXT:    fld.s $fa5, $a0, 24
+; LA32F-NEXT:    fld.s $fa6, $a0, 12
+; LA32F-NEXT:    fld.s $fa7, $a0, 8
+; LA32F-NEXT:    fld.s $ft0, $a0, 0
+; LA32F-NEXT:    fld.s $ft1, $a0, 16
+; LA32F-NEXT:    fld.s $ft2, $a0, 4
 ; LA32F-NEXT:    ffint.s.w $fa0, $fa0
-; LA32F-NEXT:    fld.s $fa1, $a0, 16
-; LA32F-NEXT:    fadd.s $fa1, $fa1, $fa0
-; LA32F-NEXT:    fst.s $fa1, $a1, 16
-; LA32F-NEXT:    fld.s $fa1, $a0, 0
-; LA32F-NEXT:    fadd.s $fa0, $fa1, $fa0
-; LA32F-NEXT:    fst.s $fa0, $a1, 0
+; LA32F-NEXT:    fadd.s $ft0, $ft0, $fa0
+; LA32F-NEXT:    fadd.s $fa0, $ft1, $fa0
+; LA32F-NEXT:    fld.s $ft1, $a0, 20
+; LA32F-NEXT:    fadd.s $ft2, $ft2, $fa1
+; LA32F-NEXT:    fadd.s $fa7, $fa7, $fa2
+; LA32F-NEXT:    fadd.s $fa6, $fa6, $fa3
+; LA32F-NEXT:    fadd.s $fa1, $ft1, $fa1
+; LA32F-NEXT:    fadd.s $fa2, $fa5, $fa2
+; LA32F-NEXT:    fadd.s $fa3, $fa4, $fa3
+; LA32F-NEXT:    fst.s $fa3, $a1, 28
+; LA32F-NEXT:    fst.s $fa2, $a1, 24
+; LA32F-NEXT:    fst.s $fa1, $a1, 20
+; LA32F-NEXT:    fst.s $fa6, $a1, 12
+; LA32F-NEXT:    fst.s $fa7, $a1, 8
+; LA32F-NEXT:    fst.s $ft2, $a1, 4
+; LA32F-NEXT:    fst.s $fa0, $a1, 16
+; LA32F-NEXT:    fst.s $ft0, $a1, 0
 ; LA32F-NEXT:    ret
 ;
 ; LA32D-LABEL: test_f8:
 ; LA32D:       # %bb.0:
+; LA32D-NEXT:    addi.w $a2, $zero, 1
+; LA32D-NEXT:    movgr2fr.w $fa0, $a2
 ; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_0)
 ; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI3_0)
-; LA32D-NEXT:    fld.s $fa0, $a2, 0
-; LA32D-NEXT:    fld.s $fa1, $a0, 4
-; LA32D-NEXT:    fadd.s $fa1, $fa1, $fa0
-; LA32D-NEXT:    fld.s $fa2, $a0, 20
-; LA32D-NEXT:    fadd.s $fa0, $fa2, $fa0
+; LA32D-NEXT:    fld.s $fa1, $a2, 0
 ; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_1)
 ; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI3_1)
 ; LA32D-NEXT:    fld.s $fa2, $a2, 0
-; LA32D-NEXT:    fld.s $fa3, $a0, 8
-; LA32D-NEXT:    fadd.s $fa3, $fa3, $fa2
-; LA32D-NEXT:    fld.s $fa4, $a0, 24
-; LA32D-NEXT:    fadd.s $fa2, $fa4, $fa2
 ; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_2)
 ; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI3_2)
-; LA32D-NEXT:    fld.s $fa4, $a2, 0
-; LA32D-NEXT:    fld.s $fa5, $a0, 12
-; LA32D-NEXT:    fadd.s $fa5, $fa5, $fa4
-; LA32D-NEXT:    fld.s $fa6, $a0, 28
-; LA32D-NEXT:    fadd.s $fa4, $fa6, $fa4
-; LA32D-NEXT:    fst.s $fa4, $a1, 28
-; LA32D-NEXT:    fst.s $fa2, $a1, 24
-; LA32D-NEXT:    fst.s $fa0, $a1, 20
-; LA32D-NEXT:    fst.s $fa5, $a1, 12
-; LA32D-NEXT:    fst.s $fa3, $a1, 8
-; LA32D-NEXT:    fst.s $fa1, $a1, 4
-; LA32D-NEXT:    addi.w $a2, $zero, 1
-; LA32D-NEXT:    movgr2fr.w $fa0, $a2
+; LA32D-NEXT:    fld.s $fa3, $a2, 0
+; LA32D-NEXT:    fld.s $fa4, $a0, 28
+; LA32D-NEXT:    fld.s $fa5, $a0, 24
+; LA32D-NEXT:    fld.s $fa6, $a0, 12
+; LA32D-NEXT:    fld.s $fa7, $a0, 8
+; LA32D-NEXT:    fld.s $ft0, $a0, 0
+; LA32D-NEXT:    fld.s $ft1, $a0, 16
+; LA32D-NEXT:    fld.s $ft2, $a0, 4
 ; LA32D-NEXT:    ffint.s.w $fa0, $fa0
-; LA32D-NEXT:    fld.s $fa1, $a0, 16
-; LA32D-NEXT:    fadd.s $fa1, $fa1, $fa0
-; LA32D-NEXT:    fst.s $fa1, $a1, 16
-; LA32D-NEXT:    fld.s $fa1, $a0, 0
-; LA32D-NEXT:    fadd.s $fa0, $fa1, $fa0
-; LA32D-NEXT:    fst.s $fa0, $a1, 0
+; LA32D-NEXT:    fadd.s $ft0, $ft0, $fa0
+; LA32D-NEXT:    fadd.s $fa0, $ft1, $fa0
+; LA32D-NEXT:    fld.s $ft1, $a0, 20
+; LA32D-NEXT:    fadd.s $ft2, $ft2, $fa1
+; LA32D-NEXT:    fadd.s $fa7, $fa7, $fa2
+; LA32D-NEXT:    fadd.s $fa6, $fa6, $fa3
+; LA32D-NEXT:    fadd.s $fa1, $ft1, $fa1
+; LA32D-NEXT:    fadd.s $fa2, $fa5, $fa2
+; LA32D-NEXT:    fadd.s $fa3, $fa4, $fa3
+; LA32D-NEXT:    fst.s $fa3, $a1, 28
+; LA32D-NEXT:    fst.s $fa2, $a1, 24
+; LA32D-NEXT:    fst.s $fa1, $a1, 20
+; LA32D-NEXT:    fst.s $fa6, $a1, 12
+; LA32D-NEXT:    fst.s $fa7, $a1, 8
+; LA32D-NEXT:    fst.s $ft2, $a1, 4
+; LA32D-NEXT:    fst.s $fa0, $a1, 16
+; LA32D-NEXT:    fst.s $ft0, $a1, 0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: test_f8:
 ; LA64F:       # %bb.0:
+; LA64F-NEXT:    addi.w $a2, $zero, 1
+; LA64F-NEXT:    movgr2fr.w $fa0, $a2
 ; LA64F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_0)
 ; LA64F-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI3_0)
-; LA64F-NEXT:    fld.s $fa0, $a2, 0
-; LA64F-NEXT:    fld.s $fa1, $a0, 4
-; LA64F-NEXT:    fadd.s $fa1, $fa1, $fa0
-; LA64F-NEXT:    fld.s $fa2, $a0, 20
-; LA64F-NEXT:    fadd.s $fa0, $fa2, $fa0
+; LA64F-NEXT:    fld.s $fa1, $a2, 0
 ; LA64F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_1)
 ; LA64F-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI3_1)
 ; LA64F-NEXT:    fld.s $fa2, $a2, 0
-; LA64F-NEXT:    fld.s $fa3, $a0, 8
-; LA64F-NEXT:    fadd.s $fa3, $fa3, $fa2
-; LA64F-NEXT:    fld.s $fa4, $a0, 24
-; LA64F-NEXT:    fadd.s $fa2, $fa4, $fa2
 ; LA64F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_2)
 ; LA64F-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI3_2)
-; LA64F-NEXT:    fld.s $fa4, $a2, 0
-; LA64F-NEXT:    fld.s $fa5, $a0, 12
-; LA64F-NEXT:    fadd.s $fa5, $fa5, $fa4
-; LA64F-NEXT:    fld.s $fa6, $a0, 28
-; LA64F-NEXT:    fadd.s $fa4, $fa6, $fa4
-; LA64F-NEXT:    fst.s $fa4, $a1, 28
-; LA64F-NEXT:    fst.s $fa2, $a1, 24
-; LA64F-NEXT:    fst.s $fa0, $a1, 20
-; LA64F-NEXT:    fst.s $fa5, $a1, 12
-; LA64F-NEXT:    fst.s $fa3, $a1, 8
-; LA64F-NEXT:    fst.s $fa1, $a1, 4
-; LA64F-NEXT:    addi.w $a2, $zero, 1
-; LA64F-NEXT:    movgr2fr.w $fa0, $a2
+; LA64F-NEXT:    fld.s $fa3, $a2, 0
+; LA64F-NEXT:    fld.s $fa4, $a0, 28
+; LA64F-NEXT:    fld.s $fa5, $a0, 24
+; LA64F-NEXT:    fld.s $fa6, $a0, 12
+; LA64F-NEXT:    fld.s $fa7, $a0, 8
+; LA64F-NEXT:    fld.s $ft0, $a0, 0
+; LA64F-NEXT:    fld.s $ft1, $a0, 16
+; LA64F-NEXT:    fld.s $ft2, $a0, 4
 ; LA64F-NEXT:    ffint.s.w $fa0, $fa0
-; LA64F-NEXT:    fld.s $fa1, $a0, 16
-; LA64F-NEXT:    fadd.s $fa1, $fa1, $fa0
-; LA64F-NEXT:    fst.s $fa1, $a1, 16
-; LA64F-NEXT:    fld.s $fa1, $a0, 0
-; LA64F-NEXT:    fadd.s $fa0, $fa1, $fa0
-; LA64F-NEXT:    fst.s $fa0, $a1, 0
+; LA64F-NEXT:    fadd.s $ft0, $ft0, $fa0
+; LA64F-NEXT:    fadd.s $fa0, $ft1, $fa0
+; LA64F-NEXT:    fld.s $ft1, $a0, 20
+; LA64F-NEXT:    fadd.s $ft2, $ft2, $fa1
+; LA64F-NEXT:    fadd.s $fa7, $fa7, $fa2
+; LA64F-NEXT:    fadd.s $fa6, $fa6, $fa3
+; LA64F-NEXT:    fadd.s $fa1, $ft1, $fa1
+; LA64F-NEXT:    fadd.s $fa2, $fa5, $fa2
+; LA64F-NEXT:    fadd.s $fa3, $fa4, $fa3
+; LA64F-NEXT:    fst.s $fa3, $a1, 28
+; LA64F-NEXT:    fst.s $fa2, $a1, 24
+; LA64F-NEXT:    fst.s $fa1, $a1, 20
+; LA64F-NEXT:    fst.s $fa6, $a1, 12
+; LA64F-NEXT:    fst.s $fa7, $a1, 8
+; LA64F-NEXT:    fst.s $ft2, $a1, 4
+; LA64F-NEXT:    fst.s $fa0, $a1, 16
+; LA64F-NEXT:    fst.s $ft0, $a1, 0
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: test_f8:
 ; LA64D:       # %bb.0:
+; LA64D-NEXT:    addi.w $a2, $zero, 1
+; LA64D-NEXT:    movgr2fr.w $fa0, $a2
 ; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_0)
 ; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI3_0)
-; LA64D-NEXT:    fld.s $fa0, $a2, 0
-; LA64D-NEXT:    fld.s $fa1, $a0, 4
-; LA64D-NEXT:    fadd.s $fa1, $fa1, $fa0
-; LA64D-NEXT:    fld.s $fa2, $a0, 20
-; LA64D-NEXT:    fadd.s $fa0, $fa2, $fa0
+; LA64D-NEXT:    fld.s $fa1, $a2, 0
 ; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_1)
 ; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI3_1)
 ; LA64D-NEXT:    fld.s $fa2, $a2, 0
-; LA64D-NEXT:    fld.s $fa3, $a0, 8
-; LA64D-NEXT:    fadd.s $fa3, $fa3, $fa2
-; LA64D-NEXT:    fld.s $fa4, $a0, 24
-; LA64D-NEXT:    fadd.s $fa2, $fa4, $fa2
 ; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_2)
 ; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI3_2)
-; LA64D-NEXT:    fld.s $fa4, $a2, 0
-; LA64D-NEXT:    fld.s $fa5, $a0, 12
-; LA64D-NEXT:    fadd.s $fa5, $fa5, $fa4
-; LA64D-NEXT:    fld.s $fa6, $a0, 28
-; LA64D-NEXT:    fadd.s $fa4, $fa6, $fa4
-; LA64D-NEXT:    fst.s $fa4, $a1, 28
-; LA64D-NEXT:    fst.s $fa2, $a1, 24
-; LA64D-NEXT:    fst.s $fa0, $a1, 20
-; LA64D-NEXT:    fst.s $fa5, $a1, 12
-; LA64D-NEXT:    fst.s $fa3, $a1, 8
-; LA64D-NEXT:    fst.s $fa1, $a1, 4
-; LA64D-NEXT:    addi.w $a2, $zero, 1
-; LA64D-NEXT:    movgr2fr.w $fa0, $a2
+; LA64D-NEXT:    fld.s $fa3, $a2, 0
+; LA64D-NEXT:    fld.s $fa4, $a0, 28
+; LA64D-NEXT:    fld.s $fa5, $a0, 24
+; LA64D-NEXT:    fld.s $fa6, $a0, 12
+; LA64D-NEXT:    fld.s $fa7, $a0, 8
+; LA64D-NEXT:    fld.s $ft0, $a0, 0
+; LA64D-NEXT:    fld.s $ft1, $a0, 16
+; LA64D-NEXT:    fld.s $ft2, $a0, 4
 ; LA64D-NEXT:    ffint.s.w $fa0, $fa0
-; LA64D-NEXT:    fld.s $fa1, $a0, 16
-; LA64D-NEXT:    fadd.s $fa1, $fa1, $fa0
-; LA64D-NEXT:    fst.s $fa1, $a1, 16
-; LA64D-NEXT:    fld.s $fa1, $a0, 0
-; LA64D-NEXT:    fadd.s $fa0, $fa1, $fa0
-; LA64D-NEXT:    fst.s $fa0, $a1, 0
+; LA64D-NEXT:    fadd.s $ft0, $ft0, $fa0
+; LA64D-NEXT:    fadd.s $fa0, $ft1, $fa0
+; LA64D-NEXT:    fld.s $ft1, $a0, 20
+; LA64D-NEXT:    fadd.s $ft2, $ft2, $fa1
+; LA64D-NEXT:    fadd.s $fa7, $fa7, $fa2
+; LA64D-NEXT:    fadd.s $fa6, $fa6, $fa3
+; LA64D-NEXT:    fadd.s $fa1, $ft1, $fa1
+; LA64D-NEXT:    fadd.s $fa2, $fa5, $fa2
+; LA64D-NEXT:    fadd.s $fa3, $fa4, $fa3
+; LA64D-NEXT:    fst.s $fa3, $a1, 28
+; LA64D-NEXT:    fst.s $fa2, $a1, 24
+; LA64D-NEXT:    fst.s $fa1, $a1, 20
+; LA64D-NEXT:    fst.s $fa6, $a1, 12
+; LA64D-NEXT:    fst.s $fa7, $a1, 8
+; LA64D-NEXT:    fst.s $ft2, $a1, 4
+; LA64D-NEXT:    fst.s $fa0, $a1, 16
+; LA64D-NEXT:    fst.s $ft0, $a1, 0
 ; LA64D-NEXT:    ret
   %p = load %f8, ptr %P
   %R = fadd %f8 %p, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
@@ -444,47 +444,58 @@ define void @test_f8(ptr %P, ptr %S) nounwind {
 define void @test_d2(ptr %P, ptr %S) nounwind {
 ; LA32F-LABEL: test_d2:
 ; LA32F:       # %bb.0:
-; LA32F-NEXT:    addi.w $sp, $sp, -16
-; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $fp, $sp, 8 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s0, $sp, 4 # 4-byte Folded Spill
-; LA32F-NEXT:    move $fp, $a1
-; LA32F-NEXT:    move $s0, $a0
-; LA32F-NEXT:    ld.w $a0, $a0, 8
-; LA32F-NEXT:    ld.w $a1, $s0, 12
-; LA32F-NEXT:    lu12i.w $a3, 262144
+; LA32F-NEXT:    addi.w $sp, $sp, -32
+; LA32F-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s0, $sp, 20 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s1, $sp, 16 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s2, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s3, $sp, 8 # 4-byte Folded Spill
+; LA32F-NEXT:    ld.w $fp, $a0, 8
+; LA32F-NEXT:    ld.w $s0, $a0, 12
+; LA32F-NEXT:    ld.w $a2, $a0, 0
+; LA32F-NEXT:    ld.w $a4, $a0, 4
+; LA32F-NEXT:    move $s1, $a1
+; LA32F-NEXT:    lu12i.w $a3, 261888
+; LA32F-NEXT:    move $a0, $a2
+; LA32F-NEXT:    move $a1, $a4
 ; LA32F-NEXT:    move $a2, $zero
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $fp, 8
-; LA32F-NEXT:    st.w $a1, $fp, 12
-; LA32F-NEXT:    ld.w $a0, $s0, 0
-; LA32F-NEXT:    ld.w $a1, $s0, 4
-; LA32F-NEXT:    lu12i.w $a3, 261888
+; LA32F-NEXT:    move $s2, $a0
+; LA32F-NEXT:    move $s3, $a1
+; LA32F-NEXT:    lu12i.w $a3, 262144
+; LA32F-NEXT:    move $a0, $fp
+; LA32F-NEXT:    move $a1, $s0
 ; LA32F-NEXT:    move $a2, $zero
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $fp, 0
-; LA32F-NEXT:    st.w $a1, $fp, 4
-; LA32F-NEXT:    ld.w $s0, $sp, 4 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $fp, $sp, 8 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32F-NEXT:    addi.w $sp, $sp, 16
+; LA32F-NEXT:    st.w $a0, $s1, 8
+; LA32F-NEXT:    st.w $a1, $s1, 12
+; LA32F-NEXT:    st.w $s2, $s1, 0
+; LA32F-NEXT:    st.w $s3, $s1, 4
+; LA32F-NEXT:    ld.w $s3, $sp, 8 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s2, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s1, $sp, 16 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s0, $sp, 20 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 32
 ; LA32F-NEXT:    ret
 ;
 ; LA32D-LABEL: test_d2:
 ; LA32D:       # %bb.0:
-; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI4_0)
-; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI4_0)
-; LA32D-NEXT:    fld.d $fa0, $a2, 0
-; LA32D-NEXT:    fld.d $fa1, $a0, 8
-; LA32D-NEXT:    fadd.d $fa0, $fa1, $fa0
-; LA32D-NEXT:    fst.d $fa0, $a1, 8
-; LA32D-NEXT:    fld.d $fa0, $a0, 0
+; LA32D-NEXT:    fld.d $fa0, $a0, 8
+; LA32D-NEXT:    fld.d $fa1, $a0, 0
 ; LA32D-NEXT:    addi.w $a0, $zero, 1
-; LA32D-NEXT:    movgr2fr.w $fa1, $a0
-; LA32D-NEXT:    ffint.s.w $fa1, $fa1
-; LA32D-NEXT:    fcvt.d.s $fa1, $fa1
-; LA32D-NEXT:    fadd.d $fa0, $fa0, $fa1
-; LA32D-NEXT:    fst.d $fa0, $a1, 0
+; LA32D-NEXT:    movgr2fr.w $fa2, $a0
+; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI4_0)
+; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI4_0)
+; LA32D-NEXT:    fld.d $fa3, $a0, 0
+; LA32D-NEXT:    ffint.s.w $fa2, $fa2
+; LA32D-NEXT:    fcvt.d.s $fa2, $fa2
+; LA32D-NEXT:    fadd.d $fa1, $fa1, $fa2
+; LA32D-NEXT:    fadd.d $fa0, $fa0, $fa3
+; LA32D-NEXT:    fst.d $fa0, $a1, 8
+; LA32D-NEXT:    fst.d $fa1, $a1, 0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: test_d2:
@@ -493,16 +504,19 @@ define void @test_d2(ptr %P, ptr %S) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
-; LA64F-NEXT:    move $fp, $a1
-; LA64F-NEXT:    move $s0, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 8
-; LA64F-NEXT:    lu52i.d $a1, $zero, 1024
-; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $fp, 8
-; LA64F-NEXT:    ld.d $a0, $s0, 0
+; LA64F-NEXT:    st.d $s1, $sp, 0 # 8-byte Folded Spill
+; LA64F-NEXT:    ld.d $fp, $a0, 8
+; LA64F-NEXT:    ld.d $a0, $a0, 0
+; LA64F-NEXT:    move $s0, $a1
 ; LA64F-NEXT:    lu52i.d $a1, $zero, 1023
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $fp, 0
+; LA64F-NEXT:    move $s1, $a0
+; LA64F-NEXT:    lu52i.d $a1, $zero, 1024
+; LA64F-NEXT:    move $a0, $fp
+; LA64F-NEXT:    bl %plt(__adddf3)
+; LA64F-NEXT:    st.d $a0, $s0, 8
+; LA64F-NEXT:    st.d $s1, $s0, 0
+; LA64F-NEXT:    ld.d $s1, $sp, 0 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
@@ -511,18 +525,18 @@ define void @test_d2(ptr %P, ptr %S) nounwind {
 ;
 ; LA64D-LABEL: test_d2:
 ; LA64D:       # %bb.0:
-; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI4_0)
-; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI4_0)
-; LA64D-NEXT:    fld.d $fa0, $a2, 0
-; LA64D-NEXT:    fld.d $fa1, $a0, 8
-; LA64D-NEXT:    fadd.d $fa0, $fa1, $fa0
-; LA64D-NEXT:    fst.d $fa0, $a1, 8
-; LA64D-NEXT:    fld.d $fa0, $a0, 0
+; LA64D-NEXT:    fld.d $fa0, $a0, 8
+; LA64D-NEXT:    fld.d $fa1, $a0, 0
+; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI4_0)
+; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI4_0)
+; LA64D-NEXT:    fld.d $fa2, $a0, 0
 ; LA64D-NEXT:    addi.d $a0, $zero, 1
-; LA64D-NEXT:    movgr2fr.d $fa1, $a0
-; LA64D-NEXT:    ffint.d.l $fa1, $fa1
-; LA64D-NEXT:    fadd.d $fa0, $fa0, $fa1
-; LA64D-NEXT:    fst.d $fa0, $a1, 0
+; LA64D-NEXT:    movgr2fr.d $fa3, $a0
+; LA64D-NEXT:    ffint.d.l $fa3, $fa3
+; LA64D-NEXT:    fadd.d $fa1, $fa1, $fa3
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fa2
+; LA64D-NEXT:    fst.d $fa0, $a1, 8
+; LA64D-NEXT:    fst.d $fa1, $a1, 0
 ; LA64D-NEXT:    ret
   %p = load %d2, ptr %P
   %R = fadd %d2 %p, < double 1.000000e+00, double 2.000000e+00 >
@@ -533,133 +547,171 @@ define void @test_d2(ptr %P, ptr %S) nounwind {
 define void @test_d4(ptr %P, ptr %S) nounwind {
 ; LA32F-LABEL: test_d4:
 ; LA32F:       # %bb.0:
-; LA32F-NEXT:    addi.w $sp, $sp, -16
-; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $fp, $sp, 8 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s0, $sp, 4 # 4-byte Folded Spill
-; LA32F-NEXT:    move $fp, $a1
-; LA32F-NEXT:    move $s0, $a0
-; LA32F-NEXT:    ld.w $a0, $a0, 24
-; LA32F-NEXT:    ld.w $a1, $s0, 28
-; LA32F-NEXT:    lu12i.w $a3, 262400
+; LA32F-NEXT:    addi.w $sp, $sp, -48
+; LA32F-NEXT:    st.w $ra, $sp, 44 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $fp, $sp, 40 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s0, $sp, 36 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s1, $sp, 32 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s2, $sp, 28 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s3, $sp, 24 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s4, $sp, 20 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s5, $sp, 16 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s6, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s7, $sp, 8 # 4-byte Folded Spill
+; LA32F-NEXT:    ld.w $fp, $a0, 24
+; LA32F-NEXT:    ld.w $s0, $a0, 28
+; LA32F-NEXT:    ld.w $s1, $a0, 16
+; LA32F-NEXT:    ld.w $s2, $a0, 20
+; LA32F-NEXT:    ld.w $s3, $a0, 8
+; LA32F-NEXT:    ld.w $s4, $a0, 12
+; LA32F-NEXT:    ld.w $a2, $a0, 0
+; LA32F-NEXT:    ld.w $a4, $a0, 4
+; LA32F-NEXT:    move $s5, $a1
+; LA32F-NEXT:    lu12i.w $a3, 261888
+; LA32F-NEXT:    move $a0, $a2
+; LA32F-NEXT:    move $a1, $a4
 ; LA32F-NEXT:    move $a2, $zero
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $fp, 24
-; LA32F-NEXT:    st.w $a1, $fp, 28
-; LA32F-NEXT:    ld.w $a0, $s0, 16
-; LA32F-NEXT:    ld.w $a1, $s0, 20
-; LA32F-NEXT:    lu12i.w $a3, 262272
+; LA32F-NEXT:    move $s6, $a0
+; LA32F-NEXT:    move $s7, $a1
+; LA32F-NEXT:    lu12i.w $a3, 262144
+; LA32F-NEXT:    move $a0, $s3
+; LA32F-NEXT:    move $a1, $s4
 ; LA32F-NEXT:    move $a2, $zero
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $fp, 16
-; LA32F-NEXT:    st.w $a1, $fp, 20
-; LA32F-NEXT:    ld.w $a0, $s0, 8
-; LA32F-NEXT:    ld.w $a1, $s0, 12
-; LA32F-NEXT:    lu12i.w $a3, 262144
+; LA32F-NEXT:    move $s3, $a0
+; LA32F-NEXT:    move $s4, $a1
+; LA32F-NEXT:    lu12i.w $a3, 262272
+; LA32F-NEXT:    move $a0, $s1
+; LA32F-NEXT:    move $a1, $s2
 ; LA32F-NEXT:    move $a2, $zero
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $fp, 8
-; LA32F-NEXT:    st.w $a1, $fp, 12
-; LA32F-NEXT:    ld.w $a0, $s0, 0
-; LA32F-NEXT:    ld.w $a1, $s0, 4
-; LA32F-NEXT:    lu12i.w $a3, 261888
+; LA32F-NEXT:    move $s1, $a0
+; LA32F-NEXT:    move $s2, $a1
+; LA32F-NEXT:    lu12i.w $a3, 262400
+; LA32F-NEXT:    move $a0, $fp
+; LA32F-NEXT:    move $a1, $s0
 ; LA32F-NEXT:    move $a2, $zero
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $fp, 0
-; LA32F-NEXT:    st.w $a1, $fp, 4
-; LA32F-NEXT:    ld.w $s0, $sp, 4 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $fp, $sp, 8 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32F-NEXT:    addi.w $sp, $sp, 16
+; LA32F-NEXT:    st.w $a0, $s5, 24
+; LA32F-NEXT:    st.w $a1, $s5, 28
+; LA32F-NEXT:    st.w $s1, $s5, 16
+; LA32F-NEXT:    st.w $s2, $s5, 20
+; LA32F-NEXT:    st.w $s3, $s5, 8
+; LA32F-NEXT:    st.w $s4, $s5, 12
+; LA32F-NEXT:    st.w $s6, $s5, 0
+; LA32F-NEXT:    st.w $s7, $s5, 4
+; LA32F-NEXT:    ld.w $s7, $sp, 8 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s6, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s5, $sp, 16 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s4, $sp, 20 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s3, $sp, 24 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s2, $sp, 28 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s1, $sp, 32 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s0, $sp, 36 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $fp, $sp, 40 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $ra, $sp, 44 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 48
 ; LA32F-NEXT:    ret
 ;
 ; LA32D-LABEL: test_d4:
 ; LA32D:       # %bb.0:
-; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI5_0)
-; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI5_0)
-; LA32D-NEXT:    fld.d $fa0, $a2, 0
-; LA32D-NEXT:    fld.d $fa1, $a0, 8
-; LA32D-NEXT:    fadd.d $fa0, $fa1, $fa0
-; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI5_1)
-; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI5_1)
-; LA32D-NEXT:    fld.d $fa1, $a2, 0
-; LA32D-NEXT:    fld.d $fa2, $a0, 16
-; LA32D-NEXT:    fadd.d $fa1, $fa2, $fa1
-; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI5_2)
-; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI5_2)
-; LA32D-NEXT:    fld.d $fa2, $a2, 0
-; LA32D-NEXT:    fld.d $fa3, $a0, 24
-; LA32D-NEXT:    fadd.d $fa2, $fa3, $fa2
-; LA32D-NEXT:    fst.d $fa2, $a1, 24
-; LA32D-NEXT:    fst.d $fa1, $a1, 16
-; LA32D-NEXT:    fst.d $fa0, $a1, 8
-; LA32D-NEXT:    fld.d $fa0, $a0, 0
+; LA32D-NEXT:    fld.d $fa0, $a0, 24
+; LA32D-NEXT:    fld.d $fa1, $a0, 16
+; LA32D-NEXT:    fld.d $fa2, $a0, 8
+; LA32D-NEXT:    fld.d $fa3, $a0, 0
 ; LA32D-NEXT:    addi.w $a0, $zero, 1
-; LA32D-NEXT:    movgr2fr.w $fa1, $a0
-; LA32D-NEXT:    ffint.s.w $fa1, $fa1
-; LA32D-NEXT:    fcvt.d.s $fa1, $fa1
-; LA32D-NEXT:    fadd.d $fa0, $fa0, $fa1
-; LA32D-NEXT:    fst.d $fa0, $a1, 0
+; LA32D-NEXT:    movgr2fr.w $fa4, $a0
+; LA32D-NEXT:    ffint.s.w $fa4, $fa4
+; LA32D-NEXT:    fcvt.d.s $fa4, $fa4
+; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_0)
+; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI5_0)
+; LA32D-NEXT:    fld.d $fa5, $a0, 0
+; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_1)
+; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI5_1)
+; LA32D-NEXT:    fld.d $fa6, $a0, 0
+; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_2)
+; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI5_2)
+; LA32D-NEXT:    fld.d $fa7, $a0, 0
+; LA32D-NEXT:    fadd.d $fa3, $fa3, $fa4
+; LA32D-NEXT:    fadd.d $fa2, $fa2, $fa5
+; LA32D-NEXT:    fadd.d $fa1, $fa1, $fa6
+; LA32D-NEXT:    fadd.d $fa0, $fa0, $fa7
+; LA32D-NEXT:    fst.d $fa0, $a1, 24
+; LA32D-NEXT:    fst.d $fa1, $a1, 16
+; LA32D-NEXT:    fst.d $fa2, $a1, 8
+; LA32D-NEXT:    fst.d $fa3, $a1, 0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: test_d4:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    addi.d $sp, $sp, -32
-; LA64F-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
-; LA64F-NEXT:    move $fp, $a1
-; LA64F-NEXT:    move $s0, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 24
-; LA64F-NEXT:    lu52i.d $a1, $zero, 1025
-; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $fp, 24
-; LA64F-NEXT:    ld.d $a0, $s0, 8
-; LA64F-NEXT:    lu52i.d $a1, $zero, 1024
-; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $fp, 8
-; LA64F-NEXT:    ld.d $a0, $s0, 0
-; LA64F-NEXT:    lu52i.d $a1, $zero, 1023
-; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $fp, 0
-; LA64F-NEXT:    ld.d $a0, $s0, 16
+; LA64F-NEXT:    addi.d $sp, $sp, -48
+; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s2, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s3, $sp, 0 # 8-byte Folded Spill
+; LA64F-NEXT:    ld.d $fp, $a0, 24
+; LA64F-NEXT:    ld.d $s0, $a0, 8
+; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a0, $a0, 16
+; LA64F-NEXT:    move $s2, $a1
 ; LA64F-NEXT:    ori $a1, $zero, 0
 ; LA64F-NEXT:    lu32i.d $a1, -524288
 ; LA64F-NEXT:    lu52i.d $a1, $a1, 1024
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $fp, 16
-; LA64F-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT:    addi.d $sp, $sp, 32
+; LA64F-NEXT:    move $s3, $a0
+; LA64F-NEXT:    lu52i.d $a1, $zero, 1023
+; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    bl %plt(__adddf3)
+; LA64F-NEXT:    move $s1, $a0
+; LA64F-NEXT:    lu52i.d $a1, $zero, 1024
+; LA64F-NEXT:    move $a0, $s0
+; LA64F-NEXT:    bl %plt(__adddf3)
+; LA64F-NEXT:    move $s0, $a0
+; LA64F-NEXT:    lu52i.d $a1, $zero, 1025
+; LA64F-NEXT:    move $a0, $fp
+; LA64F-NEXT:    bl %plt(__adddf3)
+; LA64F-NEXT:    st.d $a0, $s2, 24
+; LA64F-NEXT:    st.d $s0, $s2, 8
+; LA64F-NEXT:    st.d $s1, $s2, 0
+; LA64F-NEXT:    st.d $s3, $s2, 16
+; LA64F-NEXT:    ld.d $s3, $sp, 0 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s2, $sp, 8 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 48
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: test_d4:
 ; LA64D:       # %bb.0:
-; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI5_0)
-; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI5_0)
-; LA64D-NEXT:    fld.d $fa0, $a2, 0
-; LA64D-NEXT:    fld.d $fa1, $a0, 8
-; LA64D-NEXT:    fadd.d $fa0, $fa1, $fa0
-; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI5_1)
-; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI5_1)
-; LA64D-NEXT:    fld.d $fa1, $a2, 0
-; LA64D-NEXT:    fld.d $fa2, $a0, 16
-; LA64D-NEXT:    fadd.d $fa1, $fa2, $fa1
-; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI5_2)
-; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI5_2)
-; LA64D-NEXT:    fld.d $fa2, $a2, 0
-; LA64D-NEXT:    fld.d $fa3, $a0, 24
-; LA64D-NEXT:    fadd.d $fa2, $fa3, $fa2
-; LA64D-NEXT:    fst.d $fa2, $a1, 24
-; LA64D-NEXT:    fst.d $fa1, $a1, 16
-; LA64D-NEXT:    fst.d $fa0, $a1, 8
-; LA64D-NEXT:    fld.d $fa0, $a0, 0
+; LA64D-NEXT:    fld.d $fa0, $a0, 24
+; LA64D-NEXT:    fld.d $fa1, $a0, 16
+; LA64D-NEXT:    fld.d $fa2, $a0, 8
+; LA64D-NEXT:    fld.d $fa3, $a0, 0
 ; LA64D-NEXT:    addi.d $a0, $zero, 1
-; LA64D-NEXT:    movgr2fr.d $fa1, $a0
-; LA64D-NEXT:    ffint.d.l $fa1, $fa1
-; LA64D-NEXT:    fadd.d $fa0, $fa0, $fa1
-; LA64D-NEXT:    fst.d $fa0, $a1, 0
+; LA64D-NEXT:    movgr2fr.d $fa4, $a0
+; LA64D-NEXT:    ffint.d.l $fa4, $fa4
+; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_0)
+; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI5_0)
+; LA64D-NEXT:    fld.d $fa5, $a0, 0
+; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_1)
+; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI5_1)
+; LA64D-NEXT:    fld.d $fa6, $a0, 0
+; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_2)
+; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI5_2)
+; LA64D-NEXT:    fld.d $fa7, $a0, 0
+; LA64D-NEXT:    fadd.d $fa3, $fa3, $fa4
+; LA64D-NEXT:    fadd.d $fa2, $fa2, $fa5
+; LA64D-NEXT:    fadd.d $fa1, $fa1, $fa6
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fa7
+; LA64D-NEXT:    fst.d $fa0, $a1, 24
+; LA64D-NEXT:    fst.d $fa1, $a1, 16
+; LA64D-NEXT:    fst.d $fa2, $a1, 8
+; LA64D-NEXT:    fst.d $fa3, $a1, 0
 ; LA64D-NEXT:    ret
   %p = load %d4, ptr %P
   %R = fadd %d4 %p, < double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00 >
@@ -670,223 +722,298 @@ define void @test_d4(ptr %P, ptr %S) nounwind {
 define void @test_d8(ptr %P, ptr %S) nounwind {
 ; LA32F-LABEL: test_d8:
 ; LA32F:       # %bb.0:
-; LA32F-NEXT:    addi.w $sp, $sp, -32
-; LA32F-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $fp, $sp, 24 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s0, $sp, 20 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s1, $sp, 16 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s2, $sp, 12 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s3, $sp, 8 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s4, $sp, 4 # 4-byte Folded Spill
+; LA32F-NEXT:    addi.w $sp, $sp, -96
+; LA32F-NEXT:    st.w $ra, $sp, 92 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $fp, $sp, 88 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s0, $sp, 84 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s1, $sp, 80 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s2, $sp, 76 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s3, $sp, 72 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s4, $sp, 68 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s5, $sp, 64 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s6, $sp, 60 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s7, $sp, 56 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s8, $sp, 52 # 4-byte Folded Spill
+; LA32F-NEXT:    ld.w $a2, $a0, 56
+; LA32F-NEXT:    st.w $a2, $sp, 48 # 4-byte Folded Spill
+; LA32F-NEXT:    ld.w $a2, $a0, 60
+; LA32F-NEXT:    st.w $a2, $sp, 44 # 4-byte Folded Spill
+; LA32F-NEXT:    ld.w $a2, $a0, 48
+; LA32F-NEXT:    st.w $a2, $sp, 32 # 4-byte Folded Spill
+; LA32F-NEXT:    ld.w $a2, $a0, 52
+; LA32F-NEXT:    st.w $a2, $sp, 28 # 4-byte Folded Spill
+; LA32F-NEXT:    ld.w $a2, $a0, 40
+; LA32F-NEXT:    st.w $a2, $sp, 16 # 4-byte Folded Spill
+; LA32F-NEXT:    ld.w $a2, $a0, 44
+; LA32F-NEXT:    st.w $a2, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    ld.w $a2, $a0, 32
+; LA32F-NEXT:    st.w $a2, $sp, 0 # 4-byte Folded Spill
+; LA32F-NEXT:    ld.w $s4, $a0, 36
+; LA32F-NEXT:    ld.w $s5, $a0, 24
+; LA32F-NEXT:    ld.w $s6, $a0, 28
+; LA32F-NEXT:    ld.w $s1, $a0, 16
+; LA32F-NEXT:    ld.w $s2, $a0, 20
+; LA32F-NEXT:    ld.w $s7, $a0, 8
+; LA32F-NEXT:    ld.w $s0, $a0, 12
+; LA32F-NEXT:    ld.w $a2, $a0, 0
+; LA32F-NEXT:    ld.w $a3, $a0, 4
 ; LA32F-NEXT:    move $fp, $a1
-; LA32F-NEXT:    move $s0, $a0
-; LA32F-NEXT:    ld.w $a0, $a0, 56
-; LA32F-NEXT:    ld.w $a1, $s0, 60
-; LA32F-NEXT:    lu12i.w $s1, 262400
-; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s1
-; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $fp, 56
-; LA32F-NEXT:    st.w $a1, $fp, 60
-; LA32F-NEXT:    ld.w $a0, $s0, 48
-; LA32F-NEXT:    ld.w $a1, $s0, 52
-; LA32F-NEXT:    lu12i.w $s2, 262272
+; LA32F-NEXT:    lu12i.w $s8, 261888
+; LA32F-NEXT:    move $a0, $a2
+; LA32F-NEXT:    move $a1, $a3
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s2
+; LA32F-NEXT:    move $a3, $s8
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $fp, 48
-; LA32F-NEXT:    st.w $a1, $fp, 52
-; LA32F-NEXT:    ld.w $a0, $s0, 40
-; LA32F-NEXT:    ld.w $a1, $s0, 44
+; LA32F-NEXT:    st.w $a0, $sp, 40 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $a1, $sp, 36 # 4-byte Folded Spill
 ; LA32F-NEXT:    lu12i.w $s3, 262144
+; LA32F-NEXT:    move $a0, $s7
+; LA32F-NEXT:    move $a1, $s0
 ; LA32F-NEXT:    move $a2, $zero
 ; LA32F-NEXT:    move $a3, $s3
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $fp, 40
-; LA32F-NEXT:    st.w $a1, $fp, 44
-; LA32F-NEXT:    ld.w $a0, $s0, 32
-; LA32F-NEXT:    ld.w $a1, $s0, 36
-; LA32F-NEXT:    lu12i.w $s4, 261888
+; LA32F-NEXT:    st.w $a0, $sp, 24 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $a1, $sp, 20 # 4-byte Folded Spill
+; LA32F-NEXT:    lu12i.w $s0, 262272
+; LA32F-NEXT:    move $a0, $s1
+; LA32F-NEXT:    move $a1, $s2
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s4
+; LA32F-NEXT:    move $a3, $s0
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $fp, 32
-; LA32F-NEXT:    st.w $a1, $fp, 36
-; LA32F-NEXT:    ld.w $a0, $s0, 24
-; LA32F-NEXT:    ld.w $a1, $s0, 28
+; LA32F-NEXT:    st.w $a0, $sp, 8 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $a1, $sp, 4 # 4-byte Folded Spill
+; LA32F-NEXT:    lu12i.w $s7, 262400
+; LA32F-NEXT:    move $a0, $s5
+; LA32F-NEXT:    move $a1, $s6
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s1
+; LA32F-NEXT:    move $a3, $s7
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $fp, 24
-; LA32F-NEXT:    st.w $a1, $fp, 28
-; LA32F-NEXT:    ld.w $a0, $s0, 16
-; LA32F-NEXT:    ld.w $a1, $s0, 20
+; LA32F-NEXT:    move $s5, $a0
+; LA32F-NEXT:    move $s6, $a1
+; LA32F-NEXT:    ld.w $a0, $sp, 0 # 4-byte Folded Reload
+; LA32F-NEXT:    move $a1, $s4
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s2
+; LA32F-NEXT:    move $a3, $s8
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $fp, 16
-; LA32F-NEXT:    st.w $a1, $fp, 20
-; LA32F-NEXT:    ld.w $a0, $s0, 8
-; LA32F-NEXT:    ld.w $a1, $s0, 12
+; LA32F-NEXT:    move $s4, $a0
+; LA32F-NEXT:    move $s8, $a1
+; LA32F-NEXT:    ld.w $a0, $sp, 16 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $a1, $sp, 12 # 4-byte Folded Reload
 ; LA32F-NEXT:    move $a2, $zero
 ; LA32F-NEXT:    move $a3, $s3
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $fp, 8
-; LA32F-NEXT:    st.w $a1, $fp, 12
-; LA32F-NEXT:    ld.w $a0, $s0, 0
-; LA32F-NEXT:    ld.w $a1, $s0, 4
+; LA32F-NEXT:    move $s3, $a0
+; LA32F-NEXT:    move $s1, $a1
+; LA32F-NEXT:    ld.w $a0, $sp, 32 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $a1, $sp, 28 # 4-byte Folded Reload
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s4
+; LA32F-NEXT:    move $a3, $s0
 ; LA32F-NEXT:    bl %plt(__adddf3)
+; LA32F-NEXT:    move $s0, $a0
+; LA32F-NEXT:    move $s2, $a1
+; LA32F-NEXT:    ld.w $a0, $sp, 48 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $a1, $sp, 44 # 4-byte Folded Reload
+; LA32F-NEXT:    move $a2, $zero
+; LA32F-NEXT:    move $a3, $s7
+; LA32F-NEXT:    bl %plt(__adddf3)
+; LA32F-NEXT:    st.w $a0, $fp, 56
+; LA32F-NEXT:    st.w $a1, $fp, 60
+; LA32F-NEXT:    st.w $s0, $fp, 48
+; LA32F-NEXT:    st.w $s2, $fp, 52
+; LA32F-NEXT:    st.w $s3, $fp, 40
+; LA32F-NEXT:    st.w $s1, $fp, 44
+; LA32F-NEXT:    st.w $s4, $fp, 32
+; LA32F-NEXT:    st.w $s8, $fp, 36
+; LA32F-NEXT:    st.w $s5, $fp, 24
+; LA32F-NEXT:    st.w $s6, $fp, 28
+; LA32F-NEXT:    ld.w $a0, $sp, 8 # 4-byte Folded Reload
+; LA32F-NEXT:    st.w $a0, $fp, 16
+; LA32F-NEXT:    ld.w $a0, $sp, 4 # 4-byte Folded Reload
+; LA32F-NEXT:    st.w $a0, $fp, 20
+; LA32F-NEXT:    ld.w $a0, $sp, 24 # 4-byte Folded Reload
+; LA32F-NEXT:    st.w $a0, $fp, 8
+; LA32F-NEXT:    ld.w $a0, $sp, 20 # 4-byte Folded Reload
+; LA32F-NEXT:    st.w $a0, $fp, 12
+; LA32F-NEXT:    ld.w $a0, $sp, 40 # 4-byte Folded Reload
 ; LA32F-NEXT:    st.w $a0, $fp, 0
-; LA32F-NEXT:    st.w $a1, $fp, 4
-; LA32F-NEXT:    ld.w $s4, $sp, 4 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s3, $sp, 8 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s2, $sp, 12 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s1, $sp, 16 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s0, $sp, 20 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32F-NEXT:    addi.w $sp, $sp, 32
+; LA32F-NEXT:    ld.w $a0, $sp, 36 # 4-byte Folded Reload
+; LA32F-NEXT:    st.w $a0, $fp, 4
+; LA32F-NEXT:    ld.w $s8, $sp, 52 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s7, $sp, 56 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s6, $sp, 60 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s5, $sp, 64 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s4, $sp, 68 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s3, $sp, 72 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s2, $sp, 76 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s1, $sp, 80 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s0, $sp, 84 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $fp, $sp, 88 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $ra, $sp, 92 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 96
 ; LA32F-NEXT:    ret
 ;
 ; LA32D-LABEL: test_d8:
 ; LA32D:       # %bb.0:
+; LA32D-NEXT:    addi.w $a2, $zero, 1
+; LA32D-NEXT:    movgr2fr.w $fa0, $a2
 ; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI6_0)
 ; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI6_0)
-; LA32D-NEXT:    fld.d $fa0, $a2, 0
-; LA32D-NEXT:    fld.d $fa1, $a0, 8
-; LA32D-NEXT:    fadd.d $fa1, $fa1, $fa0
-; LA32D-NEXT:    fld.d $fa2, $a0, 40
-; LA32D-NEXT:    fadd.d $fa0, $fa2, $fa0
+; LA32D-NEXT:    fld.d $fa1, $a2, 0
 ; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI6_1)
 ; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI6_1)
 ; LA32D-NEXT:    fld.d $fa2, $a2, 0
-; LA32D-NEXT:    fld.d $fa3, $a0, 16
-; LA32D-NEXT:    fadd.d $fa3, $fa3, $fa2
-; LA32D-NEXT:    fld.d $fa4, $a0, 48
-; LA32D-NEXT:    fadd.d $fa2, $fa4, $fa2
 ; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI6_2)
 ; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI6_2)
-; LA32D-NEXT:    fld.d $fa4, $a2, 0
-; LA32D-NEXT:    fld.d $fa5, $a0, 24
-; LA32D-NEXT:    fadd.d $fa5, $fa5, $fa4
-; LA32D-NEXT:    fld.d $fa6, $a0, 56
-; LA32D-NEXT:    fadd.d $fa4, $fa6, $fa4
-; LA32D-NEXT:    fst.d $fa4, $a1, 56
-; LA32D-NEXT:    fst.d $fa2, $a1, 48
-; LA32D-NEXT:    fst.d $fa0, $a1, 40
-; LA32D-NEXT:    fst.d $fa5, $a1, 24
-; LA32D-NEXT:    fst.d $fa3, $a1, 16
-; LA32D-NEXT:    fst.d $fa1, $a1, 8
-; LA32D-NEXT:    addi.w $a2, $zero, 1
-; LA32D-NEXT:    movgr2fr.w $fa0, $a2
+; LA32D-NEXT:    fld.d $fa3, $a2, 0
+; LA32D-NEXT:    fld.d $fa4, $a0, 56
+; LA32D-NEXT:    fld.d $fa5, $a0, 48
+; LA32D-NEXT:    fld.d $fa6, $a0, 24
+; LA32D-NEXT:    fld.d $fa7, $a0, 16
+; LA32D-NEXT:    fld.d $ft0, $a0, 8
+; LA32D-NEXT:    fld.d $ft1, $a0, 0
+; LA32D-NEXT:    fld.d $ft2, $a0, 32
 ; LA32D-NEXT:    ffint.s.w $fa0, $fa0
 ; LA32D-NEXT:    fcvt.d.s $fa0, $fa0
-; LA32D-NEXT:    fld.d $fa1, $a0, 32
-; LA32D-NEXT:    fadd.d $fa1, $fa1, $fa0
-; LA32D-NEXT:    fst.d $fa1, $a1, 32
-; LA32D-NEXT:    fld.d $fa1, $a0, 0
-; LA32D-NEXT:    fadd.d $fa0, $fa1, $fa0
-; LA32D-NEXT:    fst.d $fa0, $a1, 0
+; LA32D-NEXT:    fadd.d $ft1, $ft1, $fa0
+; LA32D-NEXT:    fadd.d $fa0, $ft2, $fa0
+; LA32D-NEXT:    fld.d $ft2, $a0, 40
+; LA32D-NEXT:    fadd.d $ft0, $ft0, $fa1
+; LA32D-NEXT:    fadd.d $fa7, $fa7, $fa2
+; LA32D-NEXT:    fadd.d $fa6, $fa6, $fa3
+; LA32D-NEXT:    fadd.d $fa1, $ft2, $fa1
+; LA32D-NEXT:    fadd.d $fa2, $fa5, $fa2
+; LA32D-NEXT:    fadd.d $fa3, $fa4, $fa3
+; LA32D-NEXT:    fst.d $fa3, $a1, 56
+; LA32D-NEXT:    fst.d $fa2, $a1, 48
+; LA32D-NEXT:    fst.d $fa1, $a1, 40
+; LA32D-NEXT:    fst.d $fa6, $a1, 24
+; LA32D-NEXT:    fst.d $fa7, $a1, 16
+; LA32D-NEXT:    fst.d $ft0, $a1, 8
+; LA32D-NEXT:    fst.d $fa0, $a1, 32
+; LA32D-NEXT:    fst.d $ft1, $a1, 0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: test_d8:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    addi.d $sp, $sp, -48
-; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s2, $sp, 8 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s3, $sp, 0 # 8-byte Folded Spill
+; LA64F-NEXT:    addi.d $sp, $sp, -112
+; LA64F-NEXT:    st.d $ra, $sp, 104 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $fp, $sp, 96 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s0, $sp, 88 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s1, $sp, 80 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s2, $sp, 72 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s3, $sp, 64 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s4, $sp, 56 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s5, $sp, 48 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s6, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s7, $sp, 32 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s8, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    ld.d $a2, $a0, 56
+; LA64F-NEXT:    st.d $a2, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT:    ld.d $s1, $a0, 40
+; LA64F-NEXT:    ld.d $s2, $a0, 32
+; LA64F-NEXT:    ld.d $s3, $a0, 24
+; LA64F-NEXT:    ld.d $s4, $a0, 8
+; LA64F-NEXT:    ld.d $s5, $a0, 0
+; LA64F-NEXT:    ld.d $s6, $a0, 48
+; LA64F-NEXT:    ld.d $a0, $a0, 16
 ; LA64F-NEXT:    move $fp, $a1
-; LA64F-NEXT:    move $s0, $a0
-; LA64F-NEXT:    ld.d $a0, $a0, 56
-; LA64F-NEXT:    lu52i.d $s1, $zero, 1025
-; LA64F-NEXT:    move $a1, $s1
+; LA64F-NEXT:    ori $a1, $zero, 0
+; LA64F-NEXT:    lu32i.d $a1, -524288
+; LA64F-NEXT:    lu52i.d $s7, $a1, 1024
+; LA64F-NEXT:    move $a1, $s7
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $fp, 56
-; LA64F-NEXT:    ld.d $a0, $s0, 40
-; LA64F-NEXT:    lu52i.d $s2, $zero, 1024
-; LA64F-NEXT:    move $a1, $s2
+; LA64F-NEXT:    st.d $a0, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    move $a0, $s6
+; LA64F-NEXT:    move $a1, $s7
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $fp, 40
-; LA64F-NEXT:    ld.d $a0, $s0, 32
-; LA64F-NEXT:    lu52i.d $s3, $zero, 1023
-; LA64F-NEXT:    move $a1, $s3
+; LA64F-NEXT:    move $s6, $a0
+; LA64F-NEXT:    lu52i.d $s7, $zero, 1023
+; LA64F-NEXT:    move $a0, $s5
+; LA64F-NEXT:    move $a1, $s7
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $fp, 32
-; LA64F-NEXT:    ld.d $a0, $s0, 24
-; LA64F-NEXT:    move $a1, $s1
+; LA64F-NEXT:    move $s5, $a0
+; LA64F-NEXT:    lu52i.d $s0, $zero, 1024
+; LA64F-NEXT:    move $a0, $s4
+; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $fp, 24
-; LA64F-NEXT:    ld.d $a0, $s0, 8
-; LA64F-NEXT:    move $a1, $s2
+; LA64F-NEXT:    move $s4, $a0
+; LA64F-NEXT:    lu52i.d $s8, $zero, 1025
+; LA64F-NEXT:    move $a0, $s3
+; LA64F-NEXT:    move $a1, $s8
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $fp, 8
-; LA64F-NEXT:    ld.d $a0, $s0, 0
-; LA64F-NEXT:    move $a1, $s3
+; LA64F-NEXT:    move $s3, $a0
+; LA64F-NEXT:    move $a0, $s2
+; LA64F-NEXT:    move $a1, $s7
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $fp, 0
-; LA64F-NEXT:    ori $a0, $zero, 0
-; LA64F-NEXT:    lu32i.d $a0, -524288
-; LA64F-NEXT:    lu52i.d $s1, $a0, 1024
-; LA64F-NEXT:    ld.d $a0, $s0, 48
-; LA64F-NEXT:    move $a1, $s1
+; LA64F-NEXT:    move $s2, $a0
+; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $fp, 48
-; LA64F-NEXT:    ld.d $a0, $s0, 16
-; LA64F-NEXT:    move $a1, $s1
+; LA64F-NEXT:    move $s0, $a0
+; LA64F-NEXT:    ld.d $a0, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a1, $s8
 ; LA64F-NEXT:    bl %plt(__adddf3)
+; LA64F-NEXT:    st.d $a0, $fp, 56
+; LA64F-NEXT:    st.d $s0, $fp, 40
+; LA64F-NEXT:    st.d $s2, $fp, 32
+; LA64F-NEXT:    st.d $s3, $fp, 24
+; LA64F-NEXT:    st.d $s4, $fp, 8
+; LA64F-NEXT:    st.d $s5, $fp, 0
+; LA64F-NEXT:    st.d $s6, $fp, 48
+; LA64F-NEXT:    ld.d $a0, $sp, 8 # 8-byte Folded Reload
 ; LA64F-NEXT:    st.d $a0, $fp, 16
-; LA64F-NEXT:    ld.d $s3, $sp, 0 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s2, $sp, 8 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT:    addi.d $sp, $sp, 48
+; LA64F-NEXT:    ld.d $s8, $sp, 24 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s7, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s6, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s5, $sp, 48 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s4, $sp, 56 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s3, $sp, 64 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s2, $sp, 72 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s1, $sp, 80 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s0, $sp, 88 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $fp, $sp, 96 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $ra, $sp, 104 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 112
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: test_d8:
 ; LA64D:       # %bb.0:
+; LA64D-NEXT:    addi.d $a2, $zero, 1
+; LA64D-NEXT:    movgr2fr.d $fa0, $a2
 ; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI6_0)
 ; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI6_0)
-; LA64D-NEXT:    fld.d $fa0, $a2, 0
-; LA64D-NEXT:    fld.d $fa1, $a0, 8
-; LA64D-NEXT:    fadd.d $fa1, $fa1, $fa0
-; LA64D-NEXT:    fld.d $fa2, $a0, 40
-; LA64D-NEXT:    fadd.d $fa0, $fa2, $fa0
+; LA64D-NEXT:    fld.d $fa1, $a2, 0
 ; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI6_1)
 ; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI6_1)
 ; LA64D-NEXT:    fld.d $fa2, $a2, 0
-; LA64D-NEXT:    fld.d $fa3, $a0, 16
-; LA64D-NEXT:    fadd.d $fa3, $fa3, $fa2
-; LA64D-NEXT:    fld.d $fa4, $a0, 48
-; LA64D-NEXT:    fadd.d $fa2, $fa4, $fa2
 ; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI6_2)
 ; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI6_2)
-; LA64D-NEXT:    fld.d $fa4, $a2, 0
-; LA64D-NEXT:    fld.d $fa5, $a0, 24
-; LA64D-NEXT:    fadd.d $fa5, $fa5, $fa4
-; LA64D-NEXT:    fld.d $fa6, $a0, 56
-; LA64D-NEXT:    fadd.d $fa4, $fa6, $fa4
-; LA64D-NEXT:    fst.d $fa4, $a1, 56
-; LA64D-NEXT:    fst.d $fa2, $a1, 48
-; LA64D-NEXT:    fst.d $fa0, $a1, 40
-; LA64D-NEXT:    fst.d $fa5, $a1, 24
-; LA64D-NEXT:    fst.d $fa3, $a1, 16
-; LA64D-NEXT:    fst.d $fa1, $a1, 8
-; LA64D-NEXT:    addi.d $a2, $zero, 1
-; LA64D-NEXT:    movgr2fr.d $fa0, $a2
+; LA64D-NEXT:    fld.d $fa3, $a2, 0
+; LA64D-NEXT:    fld.d $fa4, $a0, 56
+; LA64D-NEXT:    fld.d $fa5, $a0, 48
+; LA64D-NEXT:    fld.d $fa6, $a0, 24
+; LA64D-NEXT:    fld.d $fa7, $a0, 16
+; LA64D-NEXT:    fld.d $ft0, $a0, 0
+; LA64D-NEXT:    fld.d $ft1, $a0, 32
+; LA64D-NEXT:    fld.d $ft2, $a0, 8
 ; LA64D-NEXT:    ffint.d.l $fa0, $fa0
-; LA64D-NEXT:    fld.d $fa1, $a0, 32
-; LA64D-NEXT:    fadd.d $fa1, $fa1, $fa0
-; LA64D-NEXT:    fst.d $fa1, $a1, 32
-; LA64D-NEXT:    fld.d $fa1, $a0, 0
-; LA64D-NEXT:    fadd.d $fa0, $fa1, $fa0
-; LA64D-NEXT:    fst.d $fa0, $a1, 0
+; LA64D-NEXT:    fadd.d $ft0, $ft0, $fa0
+; LA64D-NEXT:    fadd.d $fa0, $ft1, $fa0
+; LA64D-NEXT:    fld.d $ft1, $a0, 40
+; LA64D-NEXT:    fadd.d $ft2, $ft2, $fa1
+; LA64D-NEXT:    fadd.d $fa7, $fa7, $fa2
+; LA64D-NEXT:    fadd.d $fa6, $fa6, $fa3
+; LA64D-NEXT:    fadd.d $fa1, $ft1, $fa1
+; LA64D-NEXT:    fadd.d $fa2, $fa5, $fa2
+; LA64D-NEXT:    fadd.d $fa3, $fa4, $fa3
+; LA64D-NEXT:    fst.d $fa3, $a1, 56
+; LA64D-NEXT:    fst.d $fa2, $a1, 48
+; LA64D-NEXT:    fst.d $fa1, $a1, 40
+; LA64D-NEXT:    fst.d $fa6, $a1, 24
+; LA64D-NEXT:    fst.d $fa7, $a1, 16
+; LA64D-NEXT:    fst.d $ft2, $a1, 8
+; LA64D-NEXT:    fst.d $fa0, $a1, 32
+; LA64D-NEXT:    fst.d $ft0, $a1, 0
 ; LA64D-NEXT:    ret
   %p = load %d8, ptr %P
   %R = fadd %d8 %p, < double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00 >
diff --git a/llvm/test/CodeGen/LoongArch/zext-with-load-is-free.ll b/llvm/test/CodeGen/LoongArch/zext-with-load-is-free.ll
index d05a0c7453485c..d5c505f7160e06 100644
--- a/llvm/test/CodeGen/LoongArch/zext-with-load-is-free.ll
+++ b/llvm/test/CodeGen/LoongArch/zext-with-load-is-free.ll
@@ -23,10 +23,10 @@ exit:
 define zeroext i16 @test_zext_i16(ptr %p) nounwind {
 ; LA32-LABEL: test_zext_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    ld.bu $a1, $a0, 0
-; LA32-NEXT:    ld.bu $a0, $a0, 1
-; LA32-NEXT:    slli.w $a0, $a0, 8
-; LA32-NEXT:    or $a0, $a0, $a1
+; LA32-NEXT:    ld.bu $a1, $a0, 1
+; LA32-NEXT:    ld.bu $a0, $a0, 0
+; LA32-NEXT:    slli.w $a1, $a1, 8
+; LA32-NEXT:    or $a0, $a1, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_zext_i16:
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected
index e5bdc8b010e4da..56b6c90a2f6f33 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected
@@ -72,11 +72,11 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
 ; CHECK-NEXT:    .cfi_offset 22, -8
 ; CHECK-NEXT:    addi.w $fp, $sp, 32
 ; CHECK-NEXT:    .cfi_def_cfa 22, 0
-; CHECK-NEXT:    st.w $zero, $fp, -16
 ; CHECK-NEXT:    st.w $zero, $fp, -12
+; CHECK-NEXT:    st.w $zero, $fp, -16
+; CHECK-NEXT:    ori $a0, $zero, 1
 ; CHECK-NEXT:    beqz $zero, .LBB0_3
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ori $a0, $zero, 1
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    ld.w $a0, $fp, -16
 ; CHECK-NEXT:    beqz $a0, .LBB0_4
@@ -85,10 +85,9 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    b .LBB0_5
 ; CHECK-NEXT:  .LBB0_3:
+; CHECK-NEXT:    st.w $a0, $fp, -16
 ; CHECK-NEXT:    ori $a0, $zero, 2
 ; CHECK-NEXT:    st.w $a0, $fp, -20
-; CHECK-NEXT:    ori $a0, $zero, 1
-; CHECK-NEXT:    st.w $a0, $fp, -16
 ; CHECK-NEXT:    ori $a0, $zero, 3
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    ori $a0, $zero, 4
@@ -96,10 +95,10 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
 ; CHECK-NEXT:    ld.w $a0, $fp, -16
 ; CHECK-NEXT:    bnez $a0, .LBB0_2
 ; CHECK-NEXT:  .LBB0_4:
-; CHECK-NEXT:    ori $a0, $zero, 2
-; CHECK-NEXT:    st.w $a0, $fp, -20
 ; CHECK-NEXT:    ori $a0, $zero, 1
 ; CHECK-NEXT:    st.w $a0, $fp, -16
+; CHECK-NEXT:    ori $a0, $zero, 2
+; CHECK-NEXT:    st.w $a0, $fp, -20
 ; CHECK-NEXT:    ori $a0, $zero, 3
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    ori $a0, $zero, 4
@@ -121,24 +120,24 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
 ; CHECK-NEXT:    .cfi_offset 22, -8
 ; CHECK-NEXT:    addi.w $fp, $sp, 32
 ; CHECK-NEXT:    .cfi_def_cfa 22, 0
+; CHECK-NEXT:    st.w $zero, $fp, -12
 ; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(x)
 ; CHECK-NEXT:    addi.w $a0, $a0, %pc_lo12(x)
 ; CHECK-NEXT:    ori $a1, $zero, 1
-; CHECK-NEXT:    st.w $a1, $a0, 0
-; CHECK-NEXT:    st.w $zero, $fp, -12
 ; CHECK-NEXT:    st.w $a1, $fp, -16
-; CHECK-NEXT:    ori $a0, $zero, 2
-; CHECK-NEXT:    st.w $a0, $fp, -20
-; CHECK-NEXT:    ori $a2, $zero, 3
-; CHECK-NEXT:    st.w $a2, $fp, -24
-; CHECK-NEXT:    ori $a3, $zero, 4
-; CHECK-NEXT:    st.w $a3, $fp, -28
+; CHECK-NEXT:    ori $a2, $zero, 2
+; CHECK-NEXT:    st.w $a2, $fp, -20
+; CHECK-NEXT:    ori $a3, $zero, 3
+; CHECK-NEXT:    st.w $a3, $fp, -24
+; CHECK-NEXT:    ori $a4, $zero, 4
+; CHECK-NEXT:    st.w $a4, $fp, -28
+; CHECK-NEXT:    st.w $a1, $a0, 0
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    st.w $a0, $fp, -20
 ; CHECK-NEXT:    st.w $a1, $fp, -16
-; CHECK-NEXT:    st.w $a2, $fp, -24
-; CHECK-NEXT:    st.w $a3, $fp, -28
+; CHECK-NEXT:    st.w $a2, $fp, -20
+; CHECK-NEXT:    st.w $a3, $fp, -24
+; CHECK-NEXT:    st.w $a4, $fp, -28
 ; CHECK-NEXT:    move $a0, $zero
 ; CHECK-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
 ; CHECK-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected
index 20e34cdf3c64c5..2e063202fcf79e 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected
@@ -13,11 +13,11 @@ define dso_local i32 @check_boundaries() #0 {
 ; CHECK-NEXT:    .cfi_offset 22, -8
 ; CHECK-NEXT:    addi.w $fp, $sp, 32
 ; CHECK-NEXT:    .cfi_def_cfa 22, 0
-; CHECK-NEXT:    st.w $zero, $fp, -16
 ; CHECK-NEXT:    st.w $zero, $fp, -12
+; CHECK-NEXT:    st.w $zero, $fp, -16
+; CHECK-NEXT:    ori $a0, $zero, 1
 ; CHECK-NEXT:    beqz $zero, .LBB0_3
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ori $a0, $zero, 1
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    ld.w $a0, $fp, -16
 ; CHECK-NEXT:    beqz $a0, .LBB0_4
@@ -26,10 +26,9 @@ define dso_local i32 @check_boundaries() #0 {
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    b .LBB0_5
 ; CHECK-NEXT:  .LBB0_3:
+; CHECK-NEXT:    st.w $a0, $fp, -16
 ; CHECK-NEXT:    ori $a0, $zero, 2
 ; CHECK-NEXT:    st.w $a0, $fp, -20
-; CHECK-NEXT:    ori $a0, $zero, 1
-; CHECK-NEXT:    st.w $a0, $fp, -16
 ; CHECK-NEXT:    ori $a0, $zero, 3
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    ori $a0, $zero, 4
@@ -37,10 +36,10 @@ define dso_local i32 @check_boundaries() #0 {
 ; CHECK-NEXT:    ld.w $a0, $fp, -16
 ; CHECK-NEXT:    bnez $a0, .LBB0_2
 ; CHECK-NEXT:  .LBB0_4:
-; CHECK-NEXT:    ori $a0, $zero, 2
-; CHECK-NEXT:    st.w $a0, $fp, -20
 ; CHECK-NEXT:    ori $a0, $zero, 1
 ; CHECK-NEXT:    st.w $a0, $fp, -16
+; CHECK-NEXT:    ori $a0, $zero, 2
+; CHECK-NEXT:    st.w $a0, $fp, -20
 ; CHECK-NEXT:    ori $a0, $zero, 3
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    ori $a0, $zero, 4
@@ -98,24 +97,24 @@ define dso_local i32 @main() #0 {
 ; CHECK-NEXT:    .cfi_offset 22, -8
 ; CHECK-NEXT:    addi.w $fp, $sp, 32
 ; CHECK-NEXT:    .cfi_def_cfa 22, 0
+; CHECK-NEXT:    st.w $zero, $fp, -12
 ; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(x)
 ; CHECK-NEXT:    addi.w $a0, $a0, %pc_lo12(x)
 ; CHECK-NEXT:    ori $a1, $zero, 1
-; CHECK-NEXT:    st.w $a1, $a0, 0
-; CHECK-NEXT:    st.w $zero, $fp, -12
 ; CHECK-NEXT:    st.w $a1, $fp, -16
-; CHECK-NEXT:    ori $a0, $zero, 2
-; CHECK-NEXT:    st.w $a0, $fp, -20
-; CHECK-NEXT:    ori $a2, $zero, 3
-; CHECK-NEXT:    st.w $a2, $fp, -24
-; CHECK-NEXT:    ori $a3, $zero, 4
-; CHECK-NEXT:    st.w $a3, $fp, -28
+; CHECK-NEXT:    ori $a2, $zero, 2
+; CHECK-NEXT:    st.w $a2, $fp, -20
+; CHECK-NEXT:    ori $a3, $zero, 3
+; CHECK-NEXT:    st.w $a3, $fp, -24
+; CHECK-NEXT:    ori $a4, $zero, 4
+; CHECK-NEXT:    st.w $a4, $fp, -28
+; CHECK-NEXT:    st.w $a1, $a0, 0
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    st.w $a0, $fp, -20
 ; CHECK-NEXT:    st.w $a1, $fp, -16
-; CHECK-NEXT:    st.w $a2, $fp, -24
-; CHECK-NEXT:    st.w $a3, $fp, -28
+; CHECK-NEXT:    st.w $a2, $fp, -20
+; CHECK-NEXT:    st.w $a3, $fp, -24
+; CHECK-NEXT:    st.w $a4, $fp, -28
 ; CHECK-NEXT:    move $a0, $zero
 ; CHECK-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
 ; CHECK-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload



More information about the llvm-commits mailing list