[llvm] [LoongArch] Set scheduler to register pressure (PR #95741)

via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 16 23:02:54 PDT 2024


https://github.com/heiher created https://github.com/llvm/llvm-project/pull/95741

Performance measurement results indicate that the register pressure scheduling preference yields the best performance on both the 3A5000 and 3A6000 micro-architectures.

>From ee1c64cebea3e87dda2dcef3bc02d3c667156fcc Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui at loongson.cn>
Date: Mon, 17 Jun 2024 14:02:17 +0800
Subject: [PATCH] [LoongArch] Set scheduler to register pressure

Performance measurement results indicate that the register pressure
scheduling preference yields the best performance on both the 3A5000
and 3A6000 micro-architectures.
---
 .../LoongArch/LoongArchISelLowering.cpp       |    3 +
 .../lib/Target/LoongArch/LoongArchSubtarget.h |    1 +
 llvm/test/CodeGen/LoongArch/alsl.ll           |   21 +-
 .../LoongArch/atomicrmw-uinc-udec-wrap.ll     |  112 +-
 llvm/test/CodeGen/LoongArch/bitreverse.ll     |   44 +-
 .../CodeGen/LoongArch/bswap-bitreverse.ll     |   12 +-
 llvm/test/CodeGen/LoongArch/bswap.ll          |   40 +-
 llvm/test/CodeGen/LoongArch/bytepick.ll       |   15 +-
 .../CodeGen/LoongArch/calling-conv-common.ll  |  126 +-
 .../CodeGen/LoongArch/calling-conv-lp64d.ll   |   12 +-
 .../LoongArch/can-not-realign-stack.ll        |   16 +-
 .../test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll |   60 +-
 llvm/test/CodeGen/LoongArch/fcopysign.ll      |    8 +-
 llvm/test/CodeGen/LoongArch/gep-imm.ll        |    4 +-
 .../LoongArch/get-setcc-result-type.ll        |   22 +-
 llvm/test/CodeGen/LoongArch/ghc-cc.ll         |   96 +-
 .../CodeGen/LoongArch/intrinsic-memcpy.ll     |   22 +-
 .../CodeGen/LoongArch/ir-instruction/and.ll   |   42 +-
 .../ir-instruction/atomic-cmpxchg.ll          |   84 +-
 .../LoongArch/ir-instruction/atomicrmw-fp.ll  |  460 +++---
 .../ir-instruction/atomicrmw-minmax.ll        |  280 ++--
 .../LoongArch/ir-instruction/atomicrmw.ll     |  360 ++---
 .../ir-instruction/double-convert.ll          |    6 +-
 .../LoongArch/ir-instruction/float-convert.ll |    6 +-
 .../LoongArch/ir-instruction/load-store.ll    |  240 ++-
 .../CodeGen/LoongArch/ir-instruction/mul.ll   |   72 +-
 .../CodeGen/LoongArch/lasx/build-vector.ll    |   62 +-
 llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll |  368 ++---
 llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll |  368 ++---
 .../LoongArch/lasx/ir-instruction/add.ll      |   24 +-
 .../LoongArch/lasx/ir-instruction/and.ll      |   24 +-
 .../LoongArch/lasx/ir-instruction/ashr.ll     |   24 +-
 .../LoongArch/lasx/ir-instruction/fadd.ll     |   12 +-
 .../LoongArch/lasx/ir-instruction/fcmp.ll     |  168 +--
 .../LoongArch/lasx/ir-instruction/fdiv.ll     |   12 +-
 .../LoongArch/lasx/ir-instruction/fmul.ll     |   12 +-
 .../LoongArch/lasx/ir-instruction/fsub.ll     |   12 +-
 .../LoongArch/lasx/ir-instruction/icmp.ll     |  144 +-
 .../LoongArch/lasx/ir-instruction/lshr.ll     |   24 +-
 .../LoongArch/lasx/ir-instruction/mul.ll      |   24 +-
 .../LoongArch/lasx/ir-instruction/or.ll       |   24 +-
 .../LoongArch/lasx/ir-instruction/sdiv.ll     |   24 +-
 .../LoongArch/lasx/ir-instruction/shl.ll      |   24 +-
 .../LoongArch/lasx/ir-instruction/sub.ll      |   24 +-
 .../LoongArch/lasx/ir-instruction/udiv.ll     |   24 +-
 .../LoongArch/lasx/ir-instruction/xor.ll      |   24 +-
 llvm/test/CodeGen/LoongArch/lasx/mulh.ll      |   48 +-
 llvm/test/CodeGen/LoongArch/lasx/vselect.ll   |   12 +-
 .../CodeGen/LoongArch/lsx/build-vector.ll     |   40 +-
 llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll  |  368 ++---
 llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll  |  368 ++---
 .../LoongArch/lsx/ir-instruction/add.ll       |   24 +-
 .../LoongArch/lsx/ir-instruction/and.ll       |   24 +-
 .../LoongArch/lsx/ir-instruction/ashr.ll      |   24 +-
 .../LoongArch/lsx/ir-instruction/fadd.ll      |   12 +-
 .../LoongArch/lsx/ir-instruction/fcmp.ll      |  168 +--
 .../LoongArch/lsx/ir-instruction/fdiv.ll      |   12 +-
 .../LoongArch/lsx/ir-instruction/fmul.ll      |   12 +-
 .../LoongArch/lsx/ir-instruction/fsub.ll      |   12 +-
 .../LoongArch/lsx/ir-instruction/icmp.ll      |  144 +-
 .../LoongArch/lsx/ir-instruction/lshr.ll      |   24 +-
 .../LoongArch/lsx/ir-instruction/mul.ll       |   24 +-
 .../LoongArch/lsx/ir-instruction/or.ll        |   24 +-
 .../LoongArch/lsx/ir-instruction/sdiv.ll      |   24 +-
 .../LoongArch/lsx/ir-instruction/shl.ll       |   24 +-
 .../LoongArch/lsx/ir-instruction/sub.ll       |   24 +-
 .../LoongArch/lsx/ir-instruction/udiv.ll      |   24 +-
 .../LoongArch/lsx/ir-instruction/xor.ll       |   24 +-
 llvm/test/CodeGen/LoongArch/lsx/mulh.ll       |   48 +-
 llvm/test/CodeGen/LoongArch/lsx/vselect.ll    |   12 +-
 llvm/test/CodeGen/LoongArch/rotl-rotr.ll      |  364 ++---
 .../CodeGen/LoongArch/select-to-shiftand.ll   |    6 +-
 llvm/test/CodeGen/LoongArch/sextw-removal.ll  |  222 +--
 .../CodeGen/LoongArch/smul-with-overflow.ll   |  604 ++++----
 llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll |   24 +-
 .../LoongArch/spill-ra-without-kill.ll        |   12 +-
 .../CodeGen/LoongArch/spill-reload-cfr.ll     |   62 +-
 llvm/test/CodeGen/LoongArch/vector-fp-imm.ll  | 1312 ++++++++---------
 ...arch_generated_funcs.ll.generated.expected |   33 +-
 ...ch_generated_funcs.ll.nogenerated.expected |   33 +-
 80 files changed, 3814 insertions(+), 3964 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 1721287dab4dd..c0ef6ffa756af 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -367,6 +367,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
   setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
   setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
   setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
+
+  // Set scheduler.
+  setSchedulingPreference(Sched::RegPressure);
 }
 
 bool LoongArchTargetLowering::isOffsetFoldingLegal(
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
index a8752c8070aa6..86a8e0ed2d18b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -105,6 +105,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
   unsigned getMaxBytesForAlignment() const { return MaxBytesForAlignment; }
   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
   bool enableMachineScheduler() const override { return true; }
+  bool enableMachineSchedDefaultSched() const override { return false; }
 };
 } // end namespace llvm
 
diff --git a/llvm/test/CodeGen/LoongArch/alsl.ll b/llvm/test/CodeGen/LoongArch/alsl.ll
index 34baccc60d547..92288059c7744 100644
--- a/llvm/test/CodeGen/LoongArch/alsl.ll
+++ b/llvm/test/CodeGen/LoongArch/alsl.ll
@@ -53,13 +53,14 @@ entry:
 define i64 @alsl_i64(i64 signext %a, i64 signext %b) nounwind {
 ; LA32-LABEL: alsl_i64:
 ; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    srli.w $a4, $a0, 28
+; LA32-NEXT:    alsl.w $a4, $a0, $a2, 4
+; LA32-NEXT:    sltu $a2, $a4, $a2
+; LA32-NEXT:    srli.w $a0, $a0, 28
 ; LA32-NEXT:    slli.w $a1, $a1, 4
-; LA32-NEXT:    or $a1, $a1, $a4
-; LA32-NEXT:    alsl.w $a0, $a0, $a2, 4
-; LA32-NEXT:    sltu $a2, $a0, $a2
-; LA32-NEXT:    add.w $a1, $a3, $a1
-; LA32-NEXT:    add.w $a1, $a1, $a2
+; LA32-NEXT:    or $a0, $a1, $a0
+; LA32-NEXT:    add.w $a0, $a3, $a0
+; LA32-NEXT:    add.w $a1, $a0, $a2
+; LA32-NEXT:    move $a0, $a4
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: alsl_i64:
@@ -194,9 +195,9 @@ define i64 @mul_add_i64(i64 signext %a, i64 signext %b) nounwind {
 ; LA32-NEXT:    slli.w $a5, $a1, 4
 ; LA32-NEXT:    sub.w $a1, $a5, $a1
 ; LA32-NEXT:    add.w $a1, $a4, $a1
-; LA32-NEXT:    slli.w $a4, $a0, 4
-; LA32-NEXT:    sub.w $a0, $a4, $a0
 ; LA32-NEXT:    add.w $a1, $a3, $a1
+; LA32-NEXT:    slli.w $a3, $a0, 4
+; LA32-NEXT:    sub.w $a0, $a3, $a0
 ; LA32-NEXT:    add.w $a0, $a2, $a0
 ; LA32-NEXT:    sltu $a2, $a0, $a2
 ; LA32-NEXT:    add.w $a1, $a1, $a2
@@ -342,9 +343,9 @@ define i64 @mul_add_neg_i64(i64 signext %a, i64 signext %b) nounwind {
 ; LA32-NEXT:    mulh.wu $a4, $a0, $a4
 ; LA32-NEXT:    sub.w $a4, $a4, $a0
 ; LA32-NEXT:    add.w $a1, $a4, $a1
-; LA32-NEXT:    slli.w $a4, $a0, 4
-; LA32-NEXT:    sub.w $a0, $a0, $a4
 ; LA32-NEXT:    add.w $a1, $a3, $a1
+; LA32-NEXT:    slli.w $a3, $a0, 4
+; LA32-NEXT:    sub.w $a0, $a0, $a3
 ; LA32-NEXT:    add.w $a0, $a2, $a0
 ; LA32-NEXT:    sltu $a2, $a0, $a2
 ; LA32-NEXT:    add.w $a1, $a1, $a2
diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
index 854518ed1fc97..6c73eb8c71e51 100644
--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
@@ -4,13 +4,13 @@
 define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
 ; LA64-LABEL: atomicrmw_uinc_wrap_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    ori $a3, $zero, 255
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a2, $a3, 24
-; LA64-NEXT:    ori $a5, $zero, 255
 ; LA64-NEXT:    ld.w $a4, $a0, 0
-; LA64-NEXT:    sll.w $a3, $a5, $a3
 ; LA64-NEXT:    nor $a3, $a3, $zero
+; LA64-NEXT:    andi $a2, $a2, 24
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB0_1: # %atomicrmw.start
@@ -18,11 +18,11 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
 ; LA64-NEXT:    # Child Loop BB0_3 Depth 2
 ; LA64-NEXT:    move $a5, $a4
 ; LA64-NEXT:    srl.w $a4, $a4, $a2
-; LA64-NEXT:    andi $a6, $a4, 255
-; LA64-NEXT:    addi.d $a4, $a4, 1
-; LA64-NEXT:    sltu $a6, $a6, $a1
-; LA64-NEXT:    xori $a6, $a6, 1
-; LA64-NEXT:    masknez $a4, $a4, $a6
+; LA64-NEXT:    addi.d $a6, $a4, 1
+; LA64-NEXT:    andi $a4, $a4, 255
+; LA64-NEXT:    sltu $a4, $a4, $a1
+; LA64-NEXT:    xori $a4, $a4, 1
+; LA64-NEXT:    masknez $a4, $a6, $a4
 ; LA64-NEXT:    andi $a4, $a4, 255
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    and $a6, $a5, $a3
@@ -54,14 +54,14 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
 define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
 ; LA64-LABEL: atomicrmw_uinc_wrap_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a2, $a3, 24
-; LA64-NEXT:    lu12i.w $a4, 15
-; LA64-NEXT:    ori $a5, $a4, 4095
 ; LA64-NEXT:    ld.w $a4, $a0, 0
-; LA64-NEXT:    sll.w $a3, $a5, $a3
 ; LA64-NEXT:    nor $a3, $a3, $zero
+; LA64-NEXT:    andi $a2, $a2, 24
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB1_1: # %atomicrmw.start
@@ -69,11 +69,11 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
 ; LA64-NEXT:    # Child Loop BB1_3 Depth 2
 ; LA64-NEXT:    move $a5, $a4
 ; LA64-NEXT:    srl.w $a4, $a4, $a2
-; LA64-NEXT:    bstrpick.d $a6, $a4, 15, 0
-; LA64-NEXT:    addi.d $a4, $a4, 1
-; LA64-NEXT:    sltu $a6, $a6, $a1
-; LA64-NEXT:    xori $a6, $a6, 1
-; LA64-NEXT:    masknez $a4, $a4, $a6
+; LA64-NEXT:    addi.d $a6, $a4, 1
+; LA64-NEXT:    bstrpick.d $a4, $a4, 15, 0
+; LA64-NEXT:    sltu $a4, $a4, $a1
+; LA64-NEXT:    xori $a4, $a4, 1
+; LA64-NEXT:    masknez $a4, $a6, $a4
 ; LA64-NEXT:    bstrpick.d $a4, $a4, 15, 0
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    and $a6, $a5, $a3
@@ -112,10 +112,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB2_3 Depth 2
 ; LA64-NEXT:    move $a3, $a2
-; LA64-NEXT:    addi.w $a2, $a2, 1
-; LA64-NEXT:    sltu $a4, $a3, $a1
-; LA64-NEXT:    xori $a4, $a4, 1
-; LA64-NEXT:    masknez $a4, $a2, $a4
+; LA64-NEXT:    sltu $a2, $a2, $a1
+; LA64-NEXT:    xori $a2, $a2, 1
+; LA64-NEXT:    addi.w $a4, $a3, 1
+; LA64-NEXT:    masknez $a4, $a4, $a2
 ; LA64-NEXT:  .LBB2_3: # %atomicrmw.start
 ; LA64-NEXT:    # Parent Loop BB2_1 Depth=1
 ; LA64-NEXT:    # => This Inner Loop Header: Depth=2
@@ -149,10 +149,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB3_3 Depth 2
 ; LA64-NEXT:    move $a3, $a2
-; LA64-NEXT:    addi.d $a2, $a2, 1
-; LA64-NEXT:    sltu $a4, $a3, $a1
-; LA64-NEXT:    xori $a4, $a4, 1
-; LA64-NEXT:    masknez $a4, $a2, $a4
+; LA64-NEXT:    sltu $a2, $a2, $a1
+; LA64-NEXT:    xori $a2, $a2, 1
+; LA64-NEXT:    addi.d $a4, $a3, 1
+; LA64-NEXT:    masknez $a4, $a4, $a2
 ; LA64-NEXT:  .LBB3_3: # %atomicrmw.start
 ; LA64-NEXT:    # Parent Loop BB3_1 Depth=1
 ; LA64-NEXT:    # => This Inner Loop Header: Depth=2
@@ -180,13 +180,13 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
 define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
 ; LA64-LABEL: atomicrmw_udec_wrap_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    ori $a3, $zero, 255
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a2, $a3, 24
-; LA64-NEXT:    ori $a4, $zero, 255
 ; LA64-NEXT:    ld.w $a5, $a0, 0
-; LA64-NEXT:    sll.w $a3, $a4, $a3
 ; LA64-NEXT:    nor $a3, $a3, $zero
+; LA64-NEXT:    andi $a2, $a2, 24
 ; LA64-NEXT:    andi $a4, $a1, 255
 ; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB4_1: # %atomicrmw.start
@@ -195,15 +195,15 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
 ; LA64-NEXT:    move $a6, $a5
 ; LA64-NEXT:    srl.w $a5, $a5, $a2
 ; LA64-NEXT:    andi $a7, $a5, 255
+; LA64-NEXT:    sltu $t0, $a4, $a7
 ; LA64-NEXT:    addi.d $a5, $a5, -1
-; LA64-NEXT:    sltui $t0, $a7, 1
-; LA64-NEXT:    sltu $a7, $a4, $a7
+; LA64-NEXT:    masknez $a5, $a5, $t0
+; LA64-NEXT:    maskeqz $t0, $a1, $t0
+; LA64-NEXT:    or $a5, $t0, $a5
+; LA64-NEXT:    sltui $a7, $a7, 1
 ; LA64-NEXT:    masknez $a5, $a5, $a7
 ; LA64-NEXT:    maskeqz $a7, $a1, $a7
 ; LA64-NEXT:    or $a5, $a7, $a5
-; LA64-NEXT:    masknez $a5, $a5, $t0
-; LA64-NEXT:    maskeqz $a7, $a1, $t0
-; LA64-NEXT:    or $a5, $a7, $a5
 ; LA64-NEXT:    andi $a5, $a5, 255
 ; LA64-NEXT:    sll.w $a5, $a5, $a2
 ; LA64-NEXT:    and $a7, $a6, $a3
@@ -235,14 +235,14 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
 define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
 ; LA64-LABEL: atomicrmw_udec_wrap_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    slli.d $a3, $a0, 3
+; LA64-NEXT:    slli.d $a2, $a0, 3
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a2, $a3, 24
-; LA64-NEXT:    lu12i.w $a4, 15
-; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    ld.w $a5, $a0, 0
-; LA64-NEXT:    sll.w $a3, $a4, $a3
 ; LA64-NEXT:    nor $a3, $a3, $zero
+; LA64-NEXT:    andi $a2, $a2, 24
 ; LA64-NEXT:    bstrpick.d $a4, $a1, 15, 0
 ; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB5_1: # %atomicrmw.start
@@ -251,15 +251,15 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
 ; LA64-NEXT:    move $a6, $a5
 ; LA64-NEXT:    srl.w $a5, $a5, $a2
 ; LA64-NEXT:    bstrpick.d $a7, $a5, 15, 0
+; LA64-NEXT:    sltu $t0, $a4, $a7
 ; LA64-NEXT:    addi.d $a5, $a5, -1
-; LA64-NEXT:    sltui $t0, $a7, 1
-; LA64-NEXT:    sltu $a7, $a4, $a7
+; LA64-NEXT:    masknez $a5, $a5, $t0
+; LA64-NEXT:    maskeqz $t0, $a1, $t0
+; LA64-NEXT:    or $a5, $t0, $a5
+; LA64-NEXT:    sltui $a7, $a7, 1
 ; LA64-NEXT:    masknez $a5, $a5, $a7
 ; LA64-NEXT:    maskeqz $a7, $a1, $a7
 ; LA64-NEXT:    or $a5, $a7, $a5
-; LA64-NEXT:    masknez $a5, $a5, $t0
-; LA64-NEXT:    maskeqz $a7, $a1, $t0
-; LA64-NEXT:    or $a5, $a7, $a5
 ; LA64-NEXT:    bstrpick.d $a5, $a5, 15, 0
 ; LA64-NEXT:    sll.w $a5, $a5, $a2
 ; LA64-NEXT:    and $a7, $a6, $a3
@@ -298,12 +298,12 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB6_3 Depth 2
 ; LA64-NEXT:    move $a4, $a2
-; LA64-NEXT:    addi.w $a2, $a2, -1
+; LA64-NEXT:    sltu $a2, $a3, $a2
+; LA64-NEXT:    addi.w $a5, $a4, -1
+; LA64-NEXT:    masknez $a5, $a5, $a2
+; LA64-NEXT:    maskeqz $a2, $a1, $a2
+; LA64-NEXT:    or $a2, $a2, $a5
 ; LA64-NEXT:    sltui $a5, $a4, 1
-; LA64-NEXT:    sltu $a6, $a3, $a4
-; LA64-NEXT:    masknez $a2, $a2, $a6
-; LA64-NEXT:    maskeqz $a6, $a1, $a6
-; LA64-NEXT:    or $a2, $a6, $a2
 ; LA64-NEXT:    masknez $a2, $a2, $a5
 ; LA64-NEXT:    maskeqz $a5, $a1, $a5
 ; LA64-NEXT:    or $a5, $a5, $a2
@@ -340,12 +340,12 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB7_3 Depth 2
 ; LA64-NEXT:    move $a3, $a2
-; LA64-NEXT:    addi.d $a2, $a2, -1
+; LA64-NEXT:    sltu $a2, $a1, $a2
+; LA64-NEXT:    addi.d $a4, $a3, -1
+; LA64-NEXT:    masknez $a4, $a4, $a2
+; LA64-NEXT:    maskeqz $a2, $a1, $a2
+; LA64-NEXT:    or $a2, $a2, $a4
 ; LA64-NEXT:    sltui $a4, $a3, 1
-; LA64-NEXT:    sltu $a5, $a1, $a3
-; LA64-NEXT:    masknez $a2, $a2, $a5
-; LA64-NEXT:    maskeqz $a5, $a1, $a5
-; LA64-NEXT:    or $a2, $a5, $a2
 ; LA64-NEXT:    masknez $a2, $a2, $a4
 ; LA64-NEXT:    maskeqz $a4, $a1, $a4
 ; LA64-NEXT:    or $a4, $a4, $a2
diff --git a/llvm/test/CodeGen/LoongArch/bitreverse.ll b/llvm/test/CodeGen/LoongArch/bitreverse.ll
index 78d5c7e4a7977..d2e90f2e85a3e 100644
--- a/llvm/test/CodeGen/LoongArch/bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/bitreverse.ll
@@ -129,22 +129,22 @@ define i48 @test_bitreverse_i48(i48 %a) nounwind {
 define i77 @test_bitreverse_i77(i77 %a) nounwind {
 ; LA32-LABEL: test_bitreverse_i77:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    ld.w $a2, $a1, 4
-; LA32-NEXT:    ld.w $a3, $a1, 8
-; LA32-NEXT:    ld.w $a1, $a1, 0
+; LA32-NEXT:    ld.w $a2, $a1, 0
 ; LA32-NEXT:    bitrev.w $a2, $a2
-; LA32-NEXT:    slli.w $a4, $a2, 13
+; LA32-NEXT:    srli.w $a3, $a2, 19
+; LA32-NEXT:    st.h $a3, $a0, 8
+; LA32-NEXT:    ld.w $a3, $a1, 4
 ; LA32-NEXT:    bitrev.w $a3, $a3
-; LA32-NEXT:    srli.w $a3, $a3, 19
-; LA32-NEXT:    or $a3, $a3, $a4
-; LA32-NEXT:    srli.w $a2, $a2, 19
+; LA32-NEXT:    srli.w $a4, $a3, 19
+; LA32-NEXT:    slli.w $a2, $a2, 13
+; LA32-NEXT:    or $a2, $a2, $a4
+; LA32-NEXT:    st.w $a2, $a0, 4
+; LA32-NEXT:    ld.w $a1, $a1, 8
+; LA32-NEXT:    slli.w $a2, $a3, 13
 ; LA32-NEXT:    bitrev.w $a1, $a1
-; LA32-NEXT:    slli.w $a4, $a1, 13
-; LA32-NEXT:    or $a2, $a4, $a2
 ; LA32-NEXT:    srli.w $a1, $a1, 19
-; LA32-NEXT:    st.h $a1, $a0, 8
-; LA32-NEXT:    st.w $a2, $a0, 4
-; LA32-NEXT:    st.w $a3, $a0, 0
+; LA32-NEXT:    or $a1, $a1, $a2
+; LA32-NEXT:    st.w $a1, $a0, 0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_bitreverse_i77:
@@ -163,18 +163,18 @@ define i77 @test_bitreverse_i77(i77 %a) nounwind {
 define i128 @test_bitreverse_i128(i128 %a) nounwind {
 ; LA32-LABEL: test_bitreverse_i128:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    ld.w $a2, $a1, 12
-; LA32-NEXT:    ld.w $a3, $a1, 8
-; LA32-NEXT:    ld.w $a4, $a1, 4
-; LA32-NEXT:    ld.w $a1, $a1, 0
+; LA32-NEXT:    ld.w $a2, $a1, 0
 ; LA32-NEXT:    bitrev.w $a2, $a2
-; LA32-NEXT:    bitrev.w $a3, $a3
-; LA32-NEXT:    bitrev.w $a4, $a4
+; LA32-NEXT:    st.w $a2, $a0, 12
+; LA32-NEXT:    ld.w $a2, $a1, 4
+; LA32-NEXT:    bitrev.w $a2, $a2
+; LA32-NEXT:    st.w $a2, $a0, 8
+; LA32-NEXT:    ld.w $a2, $a1, 8
+; LA32-NEXT:    bitrev.w $a2, $a2
+; LA32-NEXT:    st.w $a2, $a0, 4
+; LA32-NEXT:    ld.w $a1, $a1, 12
 ; LA32-NEXT:    bitrev.w $a1, $a1
-; LA32-NEXT:    st.w $a1, $a0, 12
-; LA32-NEXT:    st.w $a4, $a0, 8
-; LA32-NEXT:    st.w $a3, $a0, 4
-; LA32-NEXT:    st.w $a2, $a0, 0
+; LA32-NEXT:    st.w $a1, $a0, 0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_bitreverse_i128:
diff --git a/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll b/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll
index c8f9596b9b0c1..88fda30efeacd 100644
--- a/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll
@@ -114,17 +114,17 @@ define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind {
 define i32 @pr55484(i32 %0) {
 ; LA32-LABEL: pr55484:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a1, $a0, 8
-; LA32-NEXT:    slli.w $a0, $a0, 8
-; LA32-NEXT:    or $a0, $a1, $a0
+; LA32-NEXT:    slli.w $a1, $a0, 8
+; LA32-NEXT:    srli.w $a0, $a0, 8
+; LA32-NEXT:    or $a0, $a0, $a1
 ; LA32-NEXT:    ext.w.h $a0, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pr55484:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    srli.d $a1, $a0, 8
-; LA64-NEXT:    slli.d $a0, $a0, 8
-; LA64-NEXT:    or $a0, $a1, $a0
+; LA64-NEXT:    slli.d $a1, $a0, 8
+; LA64-NEXT:    srli.d $a0, $a0, 8
+; LA64-NEXT:    or $a0, $a0, $a1
 ; LA64-NEXT:    ext.w.h $a0, $a0
 ; LA64-NEXT:    ret
   %2 = lshr i32 %0, 8
diff --git a/llvm/test/CodeGen/LoongArch/bswap.ll b/llvm/test/CodeGen/LoongArch/bswap.ll
index 122dab7fb4963..47e2172df1497 100644
--- a/llvm/test/CodeGen/LoongArch/bswap.ll
+++ b/llvm/test/CodeGen/LoongArch/bswap.ll
@@ -83,20 +83,20 @@ define i48 @test_bswap_i48(i48 %a) nounwind {
 define i80 @test_bswap_i80(i80 %a) nounwind {
 ; LA32-LABEL: test_bswap_i80:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    ld.w $a2, $a1, 4
-; LA32-NEXT:    ld.w $a3, $a1, 8
-; LA32-NEXT:    ld.w $a1, $a1, 0
+; LA32-NEXT:    ld.w $a2, $a1, 0
+; LA32-NEXT:    ld.w $a3, $a1, 4
 ; LA32-NEXT:    revb.2h $a2, $a2
 ; LA32-NEXT:    rotri.w $a2, $a2, 16
 ; LA32-NEXT:    revb.2h $a3, $a3
 ; LA32-NEXT:    rotri.w $a3, $a3, 16
-; LA32-NEXT:    bytepick.w $a3, $a3, $a2, 2
+; LA32-NEXT:    bytepick.w $a4, $a3, $a2, 2
+; LA32-NEXT:    st.w $a4, $a0, 4
+; LA32-NEXT:    ld.w $a1, $a1, 8
 ; LA32-NEXT:    revb.2h $a1, $a1
 ; LA32-NEXT:    rotri.w $a1, $a1, 16
-; LA32-NEXT:    bytepick.w $a2, $a2, $a1, 2
-; LA32-NEXT:    srli.w $a1, $a1, 16
-; LA32-NEXT:    st.w $a2, $a0, 4
-; LA32-NEXT:    st.w $a3, $a0, 0
+; LA32-NEXT:    bytepick.w $a1, $a1, $a3, 2
+; LA32-NEXT:    st.w $a1, $a0, 0
+; LA32-NEXT:    srli.w $a1, $a2, 16
 ; LA32-NEXT:    st.h $a1, $a0, 8
 ; LA32-NEXT:    ret
 ;
@@ -114,22 +114,22 @@ define i80 @test_bswap_i80(i80 %a) nounwind {
 define i128 @test_bswap_i128(i128 %a) nounwind {
 ; LA32-LABEL: test_bswap_i128:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    ld.w $a2, $a1, 12
-; LA32-NEXT:    ld.w $a3, $a1, 0
-; LA32-NEXT:    ld.w $a4, $a1, 8
-; LA32-NEXT:    ld.w $a1, $a1, 4
+; LA32-NEXT:    ld.w $a2, $a1, 0
+; LA32-NEXT:    revb.2h $a2, $a2
+; LA32-NEXT:    rotri.w $a2, $a2, 16
+; LA32-NEXT:    st.w $a2, $a0, 12
+; LA32-NEXT:    ld.w $a2, $a1, 4
 ; LA32-NEXT:    revb.2h $a2, $a2
 ; LA32-NEXT:    rotri.w $a2, $a2, 16
-; LA32-NEXT:    revb.2h $a4, $a4
-; LA32-NEXT:    rotri.w $a4, $a4, 16
+; LA32-NEXT:    st.w $a2, $a0, 8
+; LA32-NEXT:    ld.w $a2, $a1, 8
+; LA32-NEXT:    revb.2h $a2, $a2
+; LA32-NEXT:    rotri.w $a2, $a2, 16
+; LA32-NEXT:    st.w $a2, $a0, 4
+; LA32-NEXT:    ld.w $a1, $a1, 12
 ; LA32-NEXT:    revb.2h $a1, $a1
 ; LA32-NEXT:    rotri.w $a1, $a1, 16
-; LA32-NEXT:    revb.2h $a3, $a3
-; LA32-NEXT:    rotri.w $a3, $a3, 16
-; LA32-NEXT:    st.w $a3, $a0, 12
-; LA32-NEXT:    st.w $a1, $a0, 8
-; LA32-NEXT:    st.w $a4, $a0, 4
-; LA32-NEXT:    st.w $a2, $a0, 0
+; LA32-NEXT:    st.w $a1, $a0, 0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_bswap_i128:
diff --git a/llvm/test/CodeGen/LoongArch/bytepick.ll b/llvm/test/CodeGen/LoongArch/bytepick.ll
index 22a78bcd56119..cb728619a8e97 100644
--- a/llvm/test/CodeGen/LoongArch/bytepick.ll
+++ b/llvm/test/CodeGen/LoongArch/bytepick.ll
@@ -14,8 +14,8 @@ define i32 @pick_i32_1(i32 %a, i32 %b) {
 ;
 ; LA64-LABEL: pick_i32_1:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 24
 ; LA64-NEXT:    slli.d $a0, $a0, 8
+; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 24
 ; LA64-NEXT:    or $a0, $a1, $a0
 ; LA64-NEXT:    ret
   %1 = lshr i32 %b, 24
@@ -52,8 +52,8 @@ define i32 @pick_i32_2(i32 %a, i32 %b) {
 ;
 ; LA64-LABEL: pick_i32_2:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 16
 ; LA64-NEXT:    slli.d $a0, $a0, 16
+; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 16
 ; LA64-NEXT:    or $a0, $a1, $a0
 ; LA64-NEXT:    ret
   %1 = lshr i32 %b, 16
@@ -90,8 +90,8 @@ define i32 @pick_i32_3(i32 %a, i32 %b) {
 ;
 ; LA64-LABEL: pick_i32_3:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 8
 ; LA64-NEXT:    slli.d $a0, $a0, 24
+; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 8
 ; LA64-NEXT:    or $a0, $a1, $a0
 ; LA64-NEXT:    ret
   %1 = lshr i32 %b, 8
@@ -123,8 +123,9 @@ define signext i32 @pick_i32_3_sext(i32 %a, i32 %b) {
 define i64 @pick_i64_1(i64 %a, i64 %b) {
 ; LA32-LABEL: pick_i64_1:
 ; LA32:       # %bb.0:
+; LA32-NEXT:    bytepick.w $a2, $a3, $a0, 1
 ; LA32-NEXT:    bytepick.w $a1, $a0, $a1, 1
-; LA32-NEXT:    bytepick.w $a0, $a3, $a0, 1
+; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i64_1:
@@ -142,8 +143,9 @@ define i64 @pick_i64_1(i64 %a, i64 %b) {
 define i64 @pick_i64_2(i64 %a, i64 %b) {
 ; LA32-LABEL: pick_i64_2:
 ; LA32:       # %bb.0:
+; LA32-NEXT:    bytepick.w $a2, $a3, $a0, 2
 ; LA32-NEXT:    bytepick.w $a1, $a0, $a1, 2
-; LA32-NEXT:    bytepick.w $a0, $a3, $a0, 2
+; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i64_2:
@@ -161,8 +163,9 @@ define i64 @pick_i64_2(i64 %a, i64 %b) {
 define i64 @pick_i64_3(i64 %a, i64 %b) {
 ; LA32-LABEL: pick_i64_3:
 ; LA32:       # %bb.0:
+; LA32-NEXT:    bytepick.w $a2, $a3, $a0, 3
 ; LA32-NEXT:    bytepick.w $a1, $a0, $a1, 3
-; LA32-NEXT:    bytepick.w $a0, $a3, $a0, 3
+; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: pick_i64_3:
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
index 06dfe00d90847..6ac1f98470155 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
@@ -36,21 +36,21 @@ define i64 @caller_i128_in_regs() nounwind {
 define i64 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i128 %e, i64 %f, i128 %g, i64 %h) nounwind {
 ; CHECK-LABEL: callee_many_scalars:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    ld.d $t0, $sp, 8
-; CHECK-NEXT:    ld.d $t1, $sp, 0
-; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    ld.d $t0, $sp, 0
+; CHECK-NEXT:    xor $a5, $a5, $t0
+; CHECK-NEXT:    xor $a4, $a4, $a7
+; CHECK-NEXT:    or $a4, $a4, $a5
+; CHECK-NEXT:    sltui $a4, $a4, 1
 ; CHECK-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; CHECK-NEXT:    bstrpick.d $a2, $a2, 31, 0
+; CHECK-NEXT:    andi $a0, $a0, 255
 ; CHECK-NEXT:    add.d $a0, $a0, $a1
-; CHECK-NEXT:    add.d $a0, $a0, $a2
+; CHECK-NEXT:    bstrpick.d $a1, $a2, 31, 0
+; CHECK-NEXT:    add.d $a0, $a0, $a1
+; CHECK-NEXT:    ld.d $a1, $sp, 8
 ; CHECK-NEXT:    add.d $a0, $a0, $a3
-; CHECK-NEXT:    xor $a1, $a5, $t1
-; CHECK-NEXT:    xor $a2, $a4, $a7
-; CHECK-NEXT:    or $a1, $a2, $a1
-; CHECK-NEXT:    sltui $a1, $a1, 1
-; CHECK-NEXT:    add.d $a0, $a1, $a0
+; CHECK-NEXT:    add.d $a0, $a4, $a0
 ; CHECK-NEXT:    add.d $a0, $a0, $a6
-; CHECK-NEXT:    add.d $a0, $a0, $t0
+; CHECK-NEXT:    add.d $a0, $a0, $a1
 ; CHECK-NEXT:    ret
   %a_ext = zext i8 %a to i64
   %b_ext = zext i16 %b to i64
@@ -95,21 +95,21 @@ define i64 @caller_many_scalars() nounwind {
 define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind {
 ; CHECK-LABEL: callee_large_scalars:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    ld.d $a2, $a1, 0
-; CHECK-NEXT:    ld.d $a3, $a0, 0
-; CHECK-NEXT:    ld.d $a4, $a1, 8
-; CHECK-NEXT:    ld.d $a5, $a1, 24
-; CHECK-NEXT:    ld.d $a6, $a0, 24
-; CHECK-NEXT:    ld.d $a7, $a0, 8
-; CHECK-NEXT:    ld.d $a1, $a1, 16
-; CHECK-NEXT:    ld.d $a0, $a0, 16
-; CHECK-NEXT:    xor $a5, $a6, $a5
-; CHECK-NEXT:    xor $a4, $a7, $a4
-; CHECK-NEXT:    or $a4, $a4, $a5
+; CHECK-NEXT:    ld.d $a2, $a1, 24
+; CHECK-NEXT:    ld.d $a3, $a0, 24
+; CHECK-NEXT:    xor $a2, $a3, $a2
+; CHECK-NEXT:    ld.d $a3, $a1, 8
+; CHECK-NEXT:    ld.d $a4, $a0, 8
+; CHECK-NEXT:    ld.d $a5, $a1, 16
+; CHECK-NEXT:    ld.d $a6, $a0, 16
+; CHECK-NEXT:    ld.d $a1, $a1, 0
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    xor $a3, $a4, $a3
+; CHECK-NEXT:    or $a2, $a3, $a2
+; CHECK-NEXT:    xor $a3, $a6, $a5
 ; CHECK-NEXT:    xor $a0, $a0, $a1
-; CHECK-NEXT:    xor $a1, $a3, $a2
-; CHECK-NEXT:    or $a0, $a1, $a0
-; CHECK-NEXT:    or $a0, $a0, $a4
+; CHECK-NEXT:    or $a0, $a0, $a3
+; CHECK-NEXT:    or $a0, $a0, $a2
 ; CHECK-NEXT:    sltui $a0, $a0, 1
 ; CHECK-NEXT:    ret
   %1 = icmp eq i256 %a, %b
@@ -150,21 +150,21 @@ define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d,
 ; CHECK-LABEL: callee_large_scalars_exhausted_regs:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ld.d $a0, $sp, 8
-; CHECK-NEXT:    ld.d $a1, $a0, 0
-; CHECK-NEXT:    ld.d $a2, $a7, 0
-; CHECK-NEXT:    ld.d $a3, $a0, 8
-; CHECK-NEXT:    ld.d $a4, $a0, 24
-; CHECK-NEXT:    ld.d $a5, $a7, 24
-; CHECK-NEXT:    ld.d $a6, $a7, 8
-; CHECK-NEXT:    ld.d $a0, $a0, 16
-; CHECK-NEXT:    ld.d $a7, $a7, 16
-; CHECK-NEXT:    xor $a4, $a5, $a4
-; CHECK-NEXT:    xor $a3, $a6, $a3
-; CHECK-NEXT:    or $a3, $a3, $a4
-; CHECK-NEXT:    xor $a0, $a7, $a0
+; CHECK-NEXT:    ld.d $a1, $a0, 24
+; CHECK-NEXT:    ld.d $a2, $a7, 24
 ; CHECK-NEXT:    xor $a1, $a2, $a1
-; CHECK-NEXT:    or $a0, $a1, $a0
-; CHECK-NEXT:    or $a0, $a0, $a3
+; CHECK-NEXT:    ld.d $a2, $a0, 8
+; CHECK-NEXT:    ld.d $a3, $a7, 8
+; CHECK-NEXT:    ld.d $a4, $a0, 16
+; CHECK-NEXT:    ld.d $a5, $a7, 16
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    ld.d $a6, $a7, 0
+; CHECK-NEXT:    xor $a2, $a3, $a2
+; CHECK-NEXT:    or $a1, $a2, $a1
+; CHECK-NEXT:    xor $a2, $a5, $a4
+; CHECK-NEXT:    xor $a0, $a6, $a0
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    or $a0, $a0, $a1
 ; CHECK-NEXT:    sltui $a0, $a0, 1
 ; CHECK-NEXT:    ret
   %1 = icmp eq i256 %h, %j
@@ -216,9 +216,9 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind {
 define i64 @callee_large_struct(ptr byval(%struct.large) align 8 %a) nounwind {
 ; CHECK-LABEL: callee_large_struct:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    ld.d $a1, $a0, 0
-; CHECK-NEXT:    ld.d $a0, $a0, 24
-; CHECK-NEXT:    add.d $a0, $a1, $a0
+; CHECK-NEXT:    ld.d $a1, $a0, 24
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    add.d $a0, $a0, $a1
 ; CHECK-NEXT:    ret
   %1 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 0
   %2 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 3
@@ -233,18 +233,18 @@ define i64 @caller_large_struct() nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi.d $sp, $sp, -80
 ; CHECK-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
-; CHECK-NEXT:    ori $a0, $zero, 1
-; CHECK-NEXT:    st.d $a0, $sp, 40
-; CHECK-NEXT:    ori $a1, $zero, 2
-; CHECK-NEXT:    st.d $a1, $sp, 48
-; CHECK-NEXT:    ori $a2, $zero, 3
-; CHECK-NEXT:    st.d $a2, $sp, 56
-; CHECK-NEXT:    ori $a3, $zero, 4
-; CHECK-NEXT:    st.d $a3, $sp, 64
-; CHECK-NEXT:    st.d $a0, $sp, 8
-; CHECK-NEXT:    st.d $a1, $sp, 16
-; CHECK-NEXT:    st.d $a2, $sp, 24
-; CHECK-NEXT:    st.d $a3, $sp, 32
+; CHECK-NEXT:    ori $a0, $zero, 4
+; CHECK-NEXT:    st.d $a0, $sp, 64
+; CHECK-NEXT:    ori $a1, $zero, 3
+; CHECK-NEXT:    st.d $a1, $sp, 56
+; CHECK-NEXT:    ori $a2, $zero, 2
+; CHECK-NEXT:    st.d $a2, $sp, 48
+; CHECK-NEXT:    ori $a3, $zero, 1
+; CHECK-NEXT:    st.d $a3, $sp, 40
+; CHECK-NEXT:    st.d $a3, $sp, 8
+; CHECK-NEXT:    st.d $a2, $sp, 16
+; CHECK-NEXT:    st.d $a1, $sp, 24
+; CHECK-NEXT:    st.d $a0, $sp, 32
 ; CHECK-NEXT:    addi.d $a0, $sp, 8
 ; CHECK-NEXT:    bl %plt(callee_large_struct)
 ; CHECK-NEXT:    ld.d $ra, $sp, 72 # 8-byte Folded Reload
@@ -359,14 +359,14 @@ define void @caller_large_scalar_ret() nounwind {
 define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result) nounwind {
 ; CHECK-LABEL: callee_large_struct_ret:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    ori $a1, $zero, 1
-; CHECK-NEXT:    st.d $a1, $a0, 0
-; CHECK-NEXT:    ori $a1, $zero, 2
-; CHECK-NEXT:    st.d $a1, $a0, 8
-; CHECK-NEXT:    ori $a1, $zero, 3
-; CHECK-NEXT:    st.d $a1, $a0, 16
 ; CHECK-NEXT:    ori $a1, $zero, 4
 ; CHECK-NEXT:    st.d $a1, $a0, 24
+; CHECK-NEXT:    ori $a1, $zero, 3
+; CHECK-NEXT:    st.d $a1, $a0, 16
+; CHECK-NEXT:    ori $a1, $zero, 2
+; CHECK-NEXT:    st.d $a1, $a0, 8
+; CHECK-NEXT:    ori $a1, $zero, 1
+; CHECK-NEXT:    st.d $a1, $a0, 0
 ; CHECK-NEXT:    ret
   %a = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 0
   store i64 1, ptr %a, align 4
@@ -386,9 +386,9 @@ define i64 @caller_large_struct_ret() nounwind {
 ; CHECK-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; CHECK-NEXT:    addi.d $a0, $sp, 8
 ; CHECK-NEXT:    bl %plt(callee_large_struct_ret)
-; CHECK-NEXT:    ld.d $a0, $sp, 8
-; CHECK-NEXT:    ld.d $a1, $sp, 32
-; CHECK-NEXT:    add.d $a0, $a0, $a1
+; CHECK-NEXT:    ld.d $a0, $sp, 32
+; CHECK-NEXT:    ld.d $a1, $sp, 8
+; CHECK-NEXT:    add.d $a0, $a1, $a0
 ; CHECK-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
 ; CHECK-NEXT:    addi.d $sp, $sp, 48
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
index cc6ba057019c6..c1b0899de0f16 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
@@ -11,10 +11,10 @@ define i64 @callee_float_in_fpr(i64 %a, float %b, double %c) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ftintrz.l.s $fa0, $fa0
 ; CHECK-NEXT:    movfr2gr.d $a1, $fa0
+; CHECK-NEXT:    add.d $a0, $a0, $a1
 ; CHECK-NEXT:    ftintrz.l.d $fa0, $fa1
-; CHECK-NEXT:    movfr2gr.d $a2, $fa0
+; CHECK-NEXT:    movfr2gr.d $a1, $fa0
 ; CHECK-NEXT:    add.d $a0, $a0, $a1
-; CHECK-NEXT:    add.d $a0, $a0, $a2
 ; CHECK-NEXT:    ret
   %b_fptosi = fptosi float %b to i64
   %c_fptosi = fptosi double %c to i64
@@ -45,12 +45,12 @@ define i64 @caller_float_in_fpr() nounwind {
 define i64 @callee_double_in_gpr_exhausted_fprs(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) nounwind {
 ; CHECK-LABEL: callee_double_in_gpr_exhausted_fprs:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    ftintrz.l.d $fa0, $fa7
+; CHECK-NEXT:    movfr2gr.d $a1, $fa0
 ; CHECK-NEXT:    movgr2fr.d $fa0, $a0
-; CHECK-NEXT:    ftintrz.l.d $fa1, $fa7
-; CHECK-NEXT:    movfr2gr.d $a0, $fa1
 ; CHECK-NEXT:    ftintrz.l.d $fa0, $fa0
-; CHECK-NEXT:    movfr2gr.d $a1, $fa0
-; CHECK-NEXT:    add.d $a0, $a0, $a1
+; CHECK-NEXT:    movfr2gr.d $a0, $fa0
+; CHECK-NEXT:    add.d $a0, $a1, $a0
 ; CHECK-NEXT:    ret
   %h_fptosi = fptosi double %h to i64
   %i_fptosi = fptosi double %i to i64
diff --git a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
index 49155a3966a84..0e6814acfc953 100644
--- a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
+++ b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
@@ -32,20 +32,20 @@ define dso_local noundef signext i32 @main() nounwind {
 ; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI0_3)
 ; CHECK-NEXT:    xvld $xr3, $a0, 0
 ; CHECK-NEXT:    xvst $xr3, $sp, 0 # 32-byte Folded Spill
-; CHECK-NEXT:    xvst $xr0, $sp, 136
-; CHECK-NEXT:    xvst $xr1, $sp, 168
-; CHECK-NEXT:    xvst $xr2, $sp, 200
-; CHECK-NEXT:    xvst $xr3, $sp, 232
+; CHECK-NEXT:    xvst $xr0, $sp, 232
+; CHECK-NEXT:    xvst $xr1, $sp, 200
+; CHECK-NEXT:    xvst $xr2, $sp, 168
+; CHECK-NEXT:    xvst $xr3, $sp, 136
 ; CHECK-NEXT:    addi.d $a0, $sp, 136
 ; CHECK-NEXT:    bl %plt(foo)
 ; CHECK-NEXT:    xvld $xr0, $sp, 96 # 32-byte Folded Reload
-; CHECK-NEXT:    xvst $xr0, $sp, 136
+; CHECK-NEXT:    xvst $xr0, $sp, 232
 ; CHECK-NEXT:    xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; CHECK-NEXT:    xvst $xr0, $sp, 168
-; CHECK-NEXT:    xvld $xr0, $sp, 32 # 32-byte Folded Reload
 ; CHECK-NEXT:    xvst $xr0, $sp, 200
+; CHECK-NEXT:    xvld $xr0, $sp, 32 # 32-byte Folded Reload
+; CHECK-NEXT:    xvst $xr0, $sp, 168
 ; CHECK-NEXT:    xvld $xr0, $sp, 0 # 32-byte Folded Reload
-; CHECK-NEXT:    xvst $xr0, $sp, 232
+; CHECK-NEXT:    xvst $xr0, $sp, 136
 ; CHECK-NEXT:    addi.d $a0, $sp, 136
 ; CHECK-NEXT:    bl %plt(bar)
 ; CHECK-NEXT:    move $a0, $zero
diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
index f17cec231f323..4b4414044224f 100644
--- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -201,10 +201,10 @@ define i8 @test_ctpop_i8(i8 %a) nounwind {
 define i16 @test_ctpop_i16(i16 %a) nounwind {
 ; LA32-LABEL: test_ctpop_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a1, $a0, 1
-; LA32-NEXT:    lu12i.w $a2, 5
-; LA32-NEXT:    ori $a2, $a2, 1365
-; LA32-NEXT:    and $a1, $a1, $a2
+; LA32-NEXT:    lu12i.w $a1, 5
+; LA32-NEXT:    ori $a1, $a1, 1365
+; LA32-NEXT:    srli.w $a2, $a0, 1
+; LA32-NEXT:    and $a1, $a2, $a1
 ; LA32-NEXT:    sub.w $a0, $a0, $a1
 ; LA32-NEXT:    lu12i.w $a1, 3
 ; LA32-NEXT:    ori $a1, $a1, 819
@@ -221,10 +221,10 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
 ;
 ; LA64-LABEL: test_ctpop_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    srli.d $a1, $a0, 1
-; LA64-NEXT:    lu12i.w $a2, 5
-; LA64-NEXT:    ori $a2, $a2, 1365
-; LA64-NEXT:    and $a1, $a1, $a2
+; LA64-NEXT:    lu12i.w $a1, 5
+; LA64-NEXT:    ori $a1, $a1, 1365
+; LA64-NEXT:    srli.d $a2, $a0, 1
+; LA64-NEXT:    and $a1, $a2, $a1
 ; LA64-NEXT:    sub.d $a0, $a0, $a1
 ; LA64-NEXT:    lu12i.w $a1, 3
 ; LA64-NEXT:    ori $a1, $a1, 819
@@ -245,10 +245,10 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
 define i32 @test_ctpop_i32(i32 %a) nounwind {
 ; LA32-LABEL: test_ctpop_i32:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a1, $a0, 1
-; LA32-NEXT:    lu12i.w $a2, 349525
-; LA32-NEXT:    ori $a2, $a2, 1365
-; LA32-NEXT:    and $a1, $a1, $a2
+; LA32-NEXT:    lu12i.w $a1, 349525
+; LA32-NEXT:    ori $a1, $a1, 1365
+; LA32-NEXT:    srli.w $a2, $a0, 1
+; LA32-NEXT:    and $a1, $a2, $a1
 ; LA32-NEXT:    sub.w $a0, $a0, $a1
 ; LA32-NEXT:    lu12i.w $a1, 209715
 ; LA32-NEXT:    ori $a1, $a1, 819
@@ -269,10 +269,10 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
 ;
 ; LA64-LABEL: test_ctpop_i32:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    srli.d $a1, $a0, 1
-; LA64-NEXT:    lu12i.w $a2, 349525
-; LA64-NEXT:    ori $a2, $a2, 1365
-; LA64-NEXT:    and $a1, $a1, $a2
+; LA64-NEXT:    lu12i.w $a1, 349525
+; LA64-NEXT:    ori $a1, $a1, 1365
+; LA64-NEXT:    srli.d $a2, $a0, 1
+; LA64-NEXT:    and $a1, $a2, $a1
 ; LA64-NEXT:    sub.d $a0, $a0, $a1
 ; LA64-NEXT:    lu12i.w $a1, 209715
 ; LA64-NEXT:    ori $a1, $a1, 819
@@ -297,16 +297,16 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
 define i64 @test_ctpop_i64(i64 %a) nounwind {
 ; LA32-LABEL: test_ctpop_i64:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a2, $a1, 1
-; LA32-NEXT:    lu12i.w $a3, 349525
-; LA32-NEXT:    ori $a3, $a3, 1365
-; LA32-NEXT:    and $a2, $a2, $a3
-; LA32-NEXT:    sub.w $a1, $a1, $a2
-; LA32-NEXT:    lu12i.w $a2, 209715
-; LA32-NEXT:    ori $a2, $a2, 819
-; LA32-NEXT:    and $a4, $a1, $a2
+; LA32-NEXT:    lu12i.w $a2, 349525
+; LA32-NEXT:    ori $a2, $a2, 1365
+; LA32-NEXT:    srli.w $a3, $a1, 1
+; LA32-NEXT:    and $a3, $a3, $a2
+; LA32-NEXT:    sub.w $a1, $a1, $a3
+; LA32-NEXT:    lu12i.w $a3, 209715
+; LA32-NEXT:    ori $a3, $a3, 819
+; LA32-NEXT:    and $a4, $a1, $a3
 ; LA32-NEXT:    srli.w $a1, $a1, 2
-; LA32-NEXT:    and $a1, $a1, $a2
+; LA32-NEXT:    and $a1, $a1, $a3
 ; LA32-NEXT:    add.w $a1, $a4, $a1
 ; LA32-NEXT:    srli.w $a4, $a1, 4
 ; LA32-NEXT:    add.w $a1, $a1, $a4
@@ -318,12 +318,12 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
 ; LA32-NEXT:    mul.w $a1, $a1, $a5
 ; LA32-NEXT:    srli.w $a1, $a1, 24
 ; LA32-NEXT:    srli.w $a6, $a0, 1
-; LA32-NEXT:    and $a3, $a6, $a3
-; LA32-NEXT:    sub.w $a0, $a0, $a3
-; LA32-NEXT:    and $a3, $a0, $a2
+; LA32-NEXT:    and $a2, $a6, $a2
+; LA32-NEXT:    sub.w $a0, $a0, $a2
+; LA32-NEXT:    and $a2, $a0, $a3
 ; LA32-NEXT:    srli.w $a0, $a0, 2
-; LA32-NEXT:    and $a0, $a0, $a2
-; LA32-NEXT:    add.w $a0, $a3, $a0
+; LA32-NEXT:    and $a0, $a0, $a3
+; LA32-NEXT:    add.w $a0, $a2, $a0
 ; LA32-NEXT:    srli.w $a2, $a0, 4
 ; LA32-NEXT:    add.w $a0, $a0, $a2
 ; LA32-NEXT:    and $a0, $a0, $a4
diff --git a/llvm/test/CodeGen/LoongArch/fcopysign.ll b/llvm/test/CodeGen/LoongArch/fcopysign.ll
index 49e8fbca3e12e..181130d2c6a5f 100644
--- a/llvm/test/CodeGen/LoongArch/fcopysign.ll
+++ b/llvm/test/CodeGen/LoongArch/fcopysign.ll
@@ -73,10 +73,10 @@ define double @fold_promote_d_s(double %a, float %b) nounwind {
 ;
 ; LA64F-LABEL: fold_promote_d_s:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    movfr2gr.s $a1, $fa0
-; LA64F-NEXT:    lu12i.w $a2, -524288
-; LA64F-NEXT:    lu32i.d $a2, 0
-; LA64F-NEXT:    and $a1, $a1, $a2
+; LA64F-NEXT:    lu12i.w $a1, -524288
+; LA64F-NEXT:    lu32i.d $a1, 0
+; LA64F-NEXT:    movfr2gr.s $a2, $fa0
+; LA64F-NEXT:    and $a1, $a2, $a1
 ; LA64F-NEXT:    slli.d $a1, $a1, 32
 ; LA64F-NEXT:    bstrins.d $a1, $a0, 62, 0
 ; LA64F-NEXT:    move $a0, $a1
diff --git a/llvm/test/CodeGen/LoongArch/gep-imm.ll b/llvm/test/CodeGen/LoongArch/gep-imm.ll
index 567d3ea43ac91..a583a3554a73a 100644
--- a/llvm/test/CodeGen/LoongArch/gep-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/gep-imm.ll
@@ -11,11 +11,11 @@ define void @test(ptr %sp, ptr %t, i32 %n) {
 ; CHECK-NEXT:    .p2align 4, , 16
 ; CHECK-NEXT:  .LBB0_1: # %while_body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    stptr.w $a3, $a0, 8004
 ; CHECK-NEXT:    addi.w $a4, $a3, 1
 ; CHECK-NEXT:    stptr.w $a4, $a0, 8000
-; CHECK-NEXT:    stptr.w $a3, $a0, 8004
-; CHECK-NEXT:    stptr.w $a4, $a1, 8000
 ; CHECK-NEXT:    stptr.w $a3, $a1, 8004
+; CHECK-NEXT:    stptr.w $a4, $a1, 8000
 ; CHECK-NEXT:    move $a3, $a4
 ; CHECK-NEXT:    blt $a3, $a2, .LBB0_1
 ; CHECK-NEXT:  .LBB0_2: # %while_end
diff --git a/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll b/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll
index 6cf9d7d75b996..e4fb79d5e4387 100644
--- a/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll
+++ b/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll
@@ -5,21 +5,21 @@
 define void @getSetCCResultType(ptr %p) {
 ; CHECK-LABEL: getSetCCResultType:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    ld.w $a1, $a0, 0
-; CHECK-NEXT:    ld.w $a2, $a0, 12
-; CHECK-NEXT:    ld.w $a3, $a0, 4
-; CHECK-NEXT:    ld.w $a4, $a0, 8
+; CHECK-NEXT:    ld.w $a1, $a0, 12
+; CHECK-NEXT:    ld.w $a2, $a0, 8
 ; CHECK-NEXT:    sltui $a1, $a1, 1
 ; CHECK-NEXT:    sub.d $a1, $zero, $a1
-; CHECK-NEXT:    sltui $a3, $a3, 1
-; CHECK-NEXT:    sub.d $a3, $zero, $a3
-; CHECK-NEXT:    sltui $a4, $a4, 1
-; CHECK-NEXT:    sub.d $a4, $zero, $a4
+; CHECK-NEXT:    st.w $a1, $a0, 12
+; CHECK-NEXT:    sltui $a1, $a2, 1
+; CHECK-NEXT:    ld.w $a2, $a0, 4
+; CHECK-NEXT:    sub.d $a1, $zero, $a1
+; CHECK-NEXT:    st.w $a1, $a0, 8
+; CHECK-NEXT:    ld.w $a1, $a0, 0
 ; CHECK-NEXT:    sltui $a2, $a2, 1
 ; CHECK-NEXT:    sub.d $a2, $zero, $a2
-; CHECK-NEXT:    st.w $a2, $a0, 12
-; CHECK-NEXT:    st.w $a4, $a0, 8
-; CHECK-NEXT:    st.w $a3, $a0, 4
+; CHECK-NEXT:    st.w $a2, $a0, 4
+; CHECK-NEXT:    sltui $a1, $a1, 1
+; CHECK-NEXT:    sub.d $a1, $zero, $a1
 ; CHECK-NEXT:    st.w $a1, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/ghc-cc.ll b/llvm/test/CodeGen/LoongArch/ghc-cc.ll
index 735315d323a36..0ab125e875b99 100644
--- a/llvm/test/CodeGen/LoongArch/ghc-cc.ll
+++ b/llvm/test/CodeGen/LoongArch/ghc-cc.ll
@@ -26,57 +26,57 @@
 define ghccc void @foo() nounwind {
 ; LA64-LABEL: foo:
 ; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(d4)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(d4)
-; LA64-NEXT:    fld.d $fs7, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(d3)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(d3)
-; LA64-NEXT:    fld.d $fs6, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(d2)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(d2)
-; LA64-NEXT:    fld.d $fs5, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(d1)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(d1)
-; LA64-NEXT:    fld.d $fs4, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f4)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f4)
-; LA64-NEXT:    fld.s $fs3, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f3)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f3)
-; LA64-NEXT:    fld.s $fs2, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f2)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f2)
-; LA64-NEXT:    fld.s $fs1, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f1)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f1)
-; LA64-NEXT:    fld.s $fs0, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(splim)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(splim)
-; LA64-NEXT:    ld.d $s8, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r5)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r5)
-; LA64-NEXT:    ld.d $s7, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r4)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r4)
-; LA64-NEXT:    ld.d $s6, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r3)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r3)
-; LA64-NEXT:    ld.d $s5, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r2)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r2)
-; LA64-NEXT:    ld.d $s4, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r1)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r1)
-; LA64-NEXT:    ld.d $s3, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(hp)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(hp)
-; LA64-NEXT:    ld.d $s2, $a0, 0
-; LA64-NEXT:    pcalau12i $a0, %pc_hi20(sp)
-; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(sp)
-; LA64-NEXT:    ld.d $s1, $a0, 0
 ; LA64-NEXT:    pcalau12i $a0, %pc_hi20(base)
 ; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(base)
 ; LA64-NEXT:    ld.d $s0, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(sp)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(sp)
+; LA64-NEXT:    ld.d $s1, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(hp)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(hp)
+; LA64-NEXT:    ld.d $s2, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r1)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r1)
+; LA64-NEXT:    ld.d $s3, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r2)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r2)
+; LA64-NEXT:    ld.d $s4, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r3)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r3)
+; LA64-NEXT:    ld.d $s5, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r4)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r4)
+; LA64-NEXT:    ld.d $s6, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(r5)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(r5)
+; LA64-NEXT:    ld.d $s7, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(splim)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(splim)
+; LA64-NEXT:    ld.d $s8, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f1)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f1)
+; LA64-NEXT:    fld.s $fs0, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f2)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f2)
+; LA64-NEXT:    fld.s $fs1, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f3)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f3)
+; LA64-NEXT:    fld.s $fs2, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(f4)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(f4)
+; LA64-NEXT:    fld.s $fs3, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(d1)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(d1)
+; LA64-NEXT:    fld.d $fs4, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(d2)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(d2)
+; LA64-NEXT:    fld.d $fs5, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(d3)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(d3)
+; LA64-NEXT:    fld.d $fs6, $a0, 0
+; LA64-NEXT:    pcalau12i $a0, %pc_hi20(d4)
+; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(d4)
+; LA64-NEXT:    fld.d $fs7, $a0, 0
 ; LA64-NEXT:    b %plt(bar)
 
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll b/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll
index 622001db32955..b36be7b4ade77 100644
--- a/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll
+++ b/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll
@@ -11,19 +11,19 @@ define void @box(ptr noalias nocapture noundef writeonly sret(%Box) align 16 der
 ; CHECK-NEXT:    slli.d $a2, $a1, 5
 ; CHECK-NEXT:    alsl.d $a1, $a1, $a2, 4
 ; CHECK-NEXT:    addi.d $a2, $sp, 0
-; CHECK-NEXT:    add.d $a3, $a2, $a1
-; CHECK-NEXT:    ldx.d $a1, $a1, $a2
-; CHECK-NEXT:    ld.d $a2, $a3, 40
-; CHECK-NEXT:    st.d $a1, $a0, 0
+; CHECK-NEXT:    ldx.d $a3, $a1, $a2
+; CHECK-NEXT:    add.d $a1, $a2, $a1
+; CHECK-NEXT:    ld.d $a2, $a1, 40
+; CHECK-NEXT:    st.d $a3, $a0, 0
 ; CHECK-NEXT:    st.d $a2, $a0, 40
-; CHECK-NEXT:    ld.d $a1, $a3, 32
-; CHECK-NEXT:    ld.d $a2, $a3, 24
-; CHECK-NEXT:    ld.d $a4, $a3, 16
-; CHECK-NEXT:    ld.d $a3, $a3, 8
-; CHECK-NEXT:    st.d $a1, $a0, 32
-; CHECK-NEXT:    st.d $a2, $a0, 24
+; CHECK-NEXT:    ld.d $a2, $a1, 32
+; CHECK-NEXT:    ld.d $a3, $a1, 24
+; CHECK-NEXT:    ld.d $a4, $a1, 16
+; CHECK-NEXT:    ld.d $a1, $a1, 8
+; CHECK-NEXT:    st.d $a2, $a0, 32
+; CHECK-NEXT:    st.d $a3, $a0, 24
 ; CHECK-NEXT:    st.d $a4, $a0, 16
-; CHECK-NEXT:    st.d $a3, $a0, 8
+; CHECK-NEXT:    st.d $a1, $a0, 8
 ; CHECK-NEXT:    addi.d $sp, $sp, 96
 ; CHECK-NEXT:    ret
   %1 = alloca [2 x %Box], align 16
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll
index 9f534439b4f50..5400632ab4fa4 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll
@@ -285,19 +285,19 @@ define signext i32 @and_i32_0xfff0(i32 %a) {
 define signext i32 @and_i32_0xfff0_twice(i32 %a, i32 %b) {
 ; LA32-LABEL: and_i32_0xfff0_twice:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 4
-; LA32-NEXT:    slli.w $a0, $a0, 4
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 4
 ; LA32-NEXT:    slli.w $a1, $a1, 4
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 4
+; LA32-NEXT:    slli.w $a0, $a0, 4
 ; LA32-NEXT:    sub.w $a0, $a0, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: and_i32_0xfff0_twice:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 4
-; LA64-NEXT:    slli.d $a0, $a0, 4
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 4
 ; LA64-NEXT:    slli.d $a1, $a1, 4
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 4
+; LA64-NEXT:    slli.d $a0, $a0, 4
 ; LA64-NEXT:    sub.d $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %c = and i32 %a, 65520
@@ -326,21 +326,21 @@ define i64 @and_i64_0xfff0(i64 %a) {
 define i64 @and_i64_0xfff0_twice(i64 %a, i64 %b) {
 ; LA32-LABEL: and_i64_0xfff0_twice:
 ; LA32:       # %bb.0:
+; LA32-NEXT:    bstrpick.w $a1, $a2, 15, 4
+; LA32-NEXT:    slli.w $a1, $a1, 4
 ; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 4
-; LA32-NEXT:    slli.w $a1, $a0, 4
-; LA32-NEXT:    bstrpick.w $a0, $a2, 15, 4
 ; LA32-NEXT:    slli.w $a2, $a0, 4
-; LA32-NEXT:    sub.w $a0, $a1, $a2
-; LA32-NEXT:    sltu $a1, $a1, $a2
+; LA32-NEXT:    sub.w $a0, $a2, $a1
+; LA32-NEXT:    sltu $a1, $a2, $a1
 ; LA32-NEXT:    sub.w $a1, $zero, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: and_i64_0xfff0_twice:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 4
-; LA64-NEXT:    slli.d $a0, $a0, 4
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 4
 ; LA64-NEXT:    slli.d $a1, $a1, 4
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 4
+; LA64-NEXT:    slli.d $a0, $a0, 4
 ; LA64-NEXT:    sub.d $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %c = and i64 %a, 65520
@@ -390,26 +390,26 @@ define i64 @and_i64_0xfff0_multiple_times(i64 %a, i64 %b, i64 %c) {
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    lu12i.w $a1, 15
 ; LA32-NEXT:    ori $a1, $a1, 4080
-; LA32-NEXT:    and $a0, $a0, $a1
-; LA32-NEXT:    and $a2, $a2, $a1
 ; LA32-NEXT:    and $a3, $a4, $a1
-; LA32-NEXT:    sltu $a1, $a0, $a2
+; LA32-NEXT:    and $a2, $a2, $a1
+; LA32-NEXT:    mul.w $a3, $a2, $a3
+; LA32-NEXT:    and $a1, $a0, $a1
+; LA32-NEXT:    sub.w $a0, $a1, $a2
+; LA32-NEXT:    xor $a0, $a0, $a3
+; LA32-NEXT:    sltu $a1, $a1, $a2
 ; LA32-NEXT:    sub.w $a1, $zero, $a1
-; LA32-NEXT:    sub.w $a0, $a0, $a2
-; LA32-NEXT:    mul.w $a2, $a2, $a3
-; LA32-NEXT:    xor $a0, $a0, $a2
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: and_i64_0xfff0_multiple_times:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4080
-; LA64-NEXT:    and $a0, $a0, $a3
-; LA64-NEXT:    and $a1, $a1, $a3
 ; LA64-NEXT:    and $a2, $a2, $a3
+; LA64-NEXT:    and $a1, $a1, $a3
+; LA64-NEXT:    mul.d $a2, $a1, $a2
+; LA64-NEXT:    and $a0, $a0, $a3
 ; LA64-NEXT:    sub.d $a0, $a0, $a1
-; LA64-NEXT:    mul.d $a1, $a1, $a2
-; LA64-NEXT:    xor $a0, $a0, $a1
+; LA64-NEXT:    xor $a0, $a0, $a2
 ; LA64-NEXT:    ret
   %d = and i64 %a, 65520
   %e = and i64 %b, 65520
@@ -444,8 +444,8 @@ define i32 @and_add_lsr(i32 %x, i32 %y) {
 ;
 ; LA64-LABEL: and_add_lsr:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    addi.w $a0, $a0, -1
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 31, 20
+; LA64-NEXT:    addi.w $a0, $a0, -1
 ; LA64-NEXT:    and $a0, $a1, $a0
 ; LA64-NEXT:    ret
   %1 = add i32 %x, 4095
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
index ad98397dfe8f0..b530f53553ff1 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
@@ -5,13 +5,13 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i8_acquire_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a4, $zero, 255
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    andi $a1, $a1, 255
-; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    andi $a2, $a2, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a3, $a0, 0
 ; LA64-NEXT:    and $a5, $a3, $a4
@@ -34,14 +34,14 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
 ; LA64-LABEL: cmpxchg_i16_acquire_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a3, $a0, 0
 ; LA64-NEXT:    and $a5, $a3, $a4
@@ -103,13 +103,13 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
 ; LA64-LABEL: cmpxchg_i8_acquire_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a4, $zero, 255
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    andi $a1, $a1, 255
-; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    andi $a2, $a2, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a3, $a0, 0
 ; LA64-NEXT:    and $a5, $a3, $a4
@@ -132,14 +132,14 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
 ; LA64-LABEL: cmpxchg_i16_acquire_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a3, $a0, 0
 ; LA64-NEXT:    and $a5, $a3, $a4
@@ -201,13 +201,13 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind
 ; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a4, $zero, 255
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    andi $a1, $a1, 255
-; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    andi $a2, $a2, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a6, $a5, $a4
@@ -232,14 +232,14 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou
 ; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti16:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a6, $a5, $a4
@@ -307,13 +307,13 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind
 ; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti1:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a4, $zero, 255
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    andi $a1, $a1, 255
-; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    andi $a2, $a2, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a3, $a0, 0
 ; LA64-NEXT:    and $a5, $a3, $a4
@@ -340,14 +340,14 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw
 ; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti1:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a3, $a0, 0
 ; LA64-NEXT:    and $a5, $a3, $a4
@@ -419,13 +419,13 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind
 ; LA64-LABEL: cmpxchg_i8_monotonic_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a4, $zero, 255
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    andi $a1, $a1, 255
-; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    andi $a2, $a2, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a3, $a0, 0
 ; LA64-NEXT:    and $a5, $a3, $a4
@@ -448,14 +448,14 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw
 ; LA64-LABEL: cmpxchg_i16_monotonic_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a3, $a0, 0
 ; LA64-NEXT:    and $a5, $a3, $a4
@@ -517,13 +517,13 @@ define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) noun
 ; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti8:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a4, $zero, 255
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    andi $a1, $a1, 255
-; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    andi $a2, $a2, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a6, $a5, $a4
@@ -548,14 +548,14 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val)
 ; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti16:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a6, $a5, $a4
@@ -623,13 +623,13 @@ define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) noun
 ; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti1:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a4, $zero, 255
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    andi $a1, $a1, 255
-; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    andi $a2, $a2, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a3, $a0, 0
 ; LA64-NEXT:    and $a5, $a3, $a4
@@ -656,14 +656,14 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n
 ; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti1:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a3, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a3
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    sll.w $a1, $a1, $a3
 ; LA64-NEXT:    bstrpick.d $a2, $a2, 15, 0
 ; LA64-NEXT:    sll.w $a2, $a2, $a3
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a3, $a0, 0
 ; LA64-NEXT:    and $a5, $a3, $a4
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
index 193fa6c08600a..e0fd03b9e9c76 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
@@ -301,30 +301,28 @@ define double @double_fadd_acquire(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB4_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 2
 ; LA64F-NEXT:    ori $a5, $zero, 2
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB4_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -345,9 +343,9 @@ define double @double_fadd_acquire(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB4_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -374,30 +372,28 @@ define double @double_fsub_acquire(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, -1025
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB5_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 2
 ; LA64F-NEXT:    ori $a5, $zero, 2
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB5_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -418,9 +414,9 @@ define double @double_fsub_acquire(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB5_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -447,30 +443,28 @@ define double @double_fmin_acquire(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB6_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmin)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 2
 ; LA64F-NEXT:    ori $a5, $zero, 2
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB6_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -491,10 +485,10 @@ define double @double_fmin_acquire(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB6_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
-; LA64D-NEXT:    fmin.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmin.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -521,30 +515,28 @@ define double @double_fmax_acquire(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB7_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmax)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 2
 ; LA64F-NEXT:    ori $a5, $zero, 2
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB7_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -565,10 +557,10 @@ define double @double_fmax_acquire(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB7_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
-; LA64D-NEXT:    fmax.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -887,30 +879,28 @@ define double @double_fadd_release(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB12_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 3
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB12_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -931,9 +921,9 @@ define double @double_fadd_release(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB12_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -960,30 +950,28 @@ define double @double_fsub_release(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, -1025
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB13_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 3
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB13_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -1004,9 +992,9 @@ define double @double_fsub_release(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB13_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -1033,30 +1021,28 @@ define double @double_fmin_release(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB14_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmin)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 3
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB14_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -1077,10 +1063,10 @@ define double @double_fmin_release(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB14_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
-; LA64D-NEXT:    fmin.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmin.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -1107,30 +1093,28 @@ define double @double_fmax_release(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB15_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmax)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 3
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB15_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -1151,10 +1135,10 @@ define double @double_fmax_release(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB15_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
-; LA64D-NEXT:    fmax.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -1473,30 +1457,28 @@ define double @double_fadd_acq_rel(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB20_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 4
 ; LA64F-NEXT:    ori $a5, $zero, 2
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB20_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -1517,9 +1499,9 @@ define double @double_fadd_acq_rel(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB20_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -1546,30 +1528,28 @@ define double @double_fsub_acq_rel(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, -1025
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB21_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 4
 ; LA64F-NEXT:    ori $a5, $zero, 2
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB21_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -1590,9 +1570,9 @@ define double @double_fsub_acq_rel(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB21_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -1619,30 +1599,28 @@ define double @double_fmin_acq_rel(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB22_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmin)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 4
 ; LA64F-NEXT:    ori $a5, $zero, 2
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB22_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -1663,10 +1641,10 @@ define double @double_fmin_acq_rel(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB22_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
-; LA64D-NEXT:    fmin.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmin.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -1693,30 +1671,28 @@ define double @double_fmax_acq_rel(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB23_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmax)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 4
 ; LA64F-NEXT:    ori $a5, $zero, 2
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB23_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -1737,10 +1713,10 @@ define double @double_fmax_acq_rel(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB23_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
-; LA64D-NEXT:    fmax.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -2059,30 +2035,28 @@ define double @double_fadd_seq_cst(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB28_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 5
 ; LA64F-NEXT:    ori $a5, $zero, 5
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB28_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -2103,9 +2077,9 @@ define double @double_fadd_seq_cst(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB28_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -2132,30 +2106,28 @@ define double @double_fsub_seq_cst(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, -1025
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB29_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 5
 ; LA64F-NEXT:    ori $a5, $zero, 5
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB29_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -2176,9 +2148,9 @@ define double @double_fsub_seq_cst(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB29_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -2205,30 +2177,28 @@ define double @double_fmin_seq_cst(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB30_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmin)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 5
 ; LA64F-NEXT:    ori $a5, $zero, 5
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB30_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -2249,10 +2219,10 @@ define double @double_fmin_seq_cst(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB30_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
-; LA64D-NEXT:    fmin.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmin.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -2279,30 +2249,28 @@ define double @double_fmax_seq_cst(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB31_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmax)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    ori $a4, $zero, 5
 ; LA64F-NEXT:    ori $a5, $zero, 5
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB31_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -2323,10 +2291,10 @@ define double @double_fmax_seq_cst(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB31_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
-; LA64D-NEXT:    fmax.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -2645,30 +2613,28 @@ define double @double_fadd_monotonic(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB36_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a4, $zero
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB36_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -2689,9 +2655,9 @@ define double @double_fadd_monotonic(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB36_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -2718,30 +2684,28 @@ define double @double_fsub_monotonic(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, -1025
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB37_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a4, $zero
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB37_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -2762,9 +2726,9 @@ define double @double_fsub_monotonic(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB37_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fadd.d $fa1, $fa0, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -2791,30 +2755,28 @@ define double @double_fmin_monotonic(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB38_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmin)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a4, $zero
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB38_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -2835,10 +2797,10 @@ define double @double_fmin_monotonic(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB38_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
-; LA64D-NEXT:    fmin.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmin.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
@@ -2865,30 +2827,28 @@ define double @double_fmax_monotonic(ptr %p) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a0
-; LA64F-NEXT:    ld.d $s1, $a0, 0
+; LA64F-NEXT:    ld.d $a1, $a0, 0
 ; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
 ; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB39_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    st.d $a1, $sp, 16
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    move $a1, $s0
 ; LA64F-NEXT:    bl %plt(fmax)
-; LA64F-NEXT:    st.d $s1, $sp, 8
-; LA64F-NEXT:    st.d $a0, $sp, 0
+; LA64F-NEXT:    st.d $a0, $sp, 8
 ; LA64F-NEXT:    ori $a0, $zero, 8
-; LA64F-NEXT:    addi.d $a2, $sp, 8
-; LA64F-NEXT:    addi.d $a3, $sp, 0
+; LA64F-NEXT:    addi.d $a2, $sp, 16
+; LA64F-NEXT:    addi.d $a3, $sp, 8
 ; LA64F-NEXT:    move $a1, $fp
 ; LA64F-NEXT:    move $a4, $zero
 ; LA64F-NEXT:    move $a5, $zero
 ; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT:    ld.d $s1, $sp, 8
+; LA64F-NEXT:    ld.d $a1, $sp, 16
 ; LA64F-NEXT:    beqz $a0, .LBB39_1
 ; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    move $a0, $a1
 ; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
@@ -2909,10 +2869,10 @@ define double @double_fmax_monotonic(ptr %p) nounwind {
 ; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB39_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
-; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
-; LA64D-NEXT:    fmax.d $fa1, $fa1, $fs0
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
-; LA64D-NEXT:    fst.d $fa1, $sp, 8
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fs0
+; LA64D-NEXT:    fst.d $fa0, $sp, 8
 ; LA64D-NEXT:    ori $a0, $zero, 8
 ; LA64D-NEXT:    addi.d $a2, $sp, 16
 ; LA64D-NEXT:    addi.d $a3, $sp, 8
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
index 2bd29c2670a68..bc12cdcf573da 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
@@ -8,11 +8,11 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -36,12 +36,12 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i16_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -85,11 +85,11 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -113,12 +113,12 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i16_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -162,23 +162,23 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 255
-; LA64-NEXT:    sll.w $a4, $a4, $a2
+; LA64-NEXT:    ori $a3, $zero, 255
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    xori $a3, $a3, 56
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a4, $a2, 24
+; LA64-NEXT:    xori $a4, $a4, 56
 ; LA64-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a4
+; LA64-NEXT:    and $a7, $a5, $a3
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a3
-; LA64-NEXT:    sra.w $a7, $a7, $a3
+; LA64-NEXT:    sll.w $a7, $a7, $a4
+; LA64-NEXT:    sra.w $a7, $a7, $a4
 ; LA64-NEXT:    bge $a7, $a1, .LBB8_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB8_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a4
+; LA64-NEXT:    and $a6, $a6, $a3
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB8_3: # in Loop: Header=BB8_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -194,15 +194,15 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i16_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    ori $a5, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a5, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -248,23 +248,23 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 255
-; LA64-NEXT:    sll.w $a4, $a4, $a2
+; LA64-NEXT:    ori $a3, $zero, 255
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    xori $a3, $a3, 56
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a4, $a2, 24
+; LA64-NEXT:    xori $a4, $a4, 56
 ; LA64-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a4
+; LA64-NEXT:    and $a7, $a5, $a3
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a3
-; LA64-NEXT:    sra.w $a7, $a7, $a3
+; LA64-NEXT:    sll.w $a7, $a7, $a4
+; LA64-NEXT:    sra.w $a7, $a7, $a4
 ; LA64-NEXT:    bge $a1, $a7, .LBB12_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a4
+; LA64-NEXT:    and $a6, $a6, $a3
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB12_3: # in Loop: Header=BB12_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -280,15 +280,15 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i16_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    ori $a5, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a5, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -334,11 +334,11 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -362,12 +362,12 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i16_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -411,11 +411,11 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -439,12 +439,12 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i16_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -488,23 +488,23 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 255
-; LA64-NEXT:    sll.w $a4, $a4, $a2
+; LA64-NEXT:    ori $a3, $zero, 255
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    xori $a3, $a3, 56
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a4, $a2, 24
+; LA64-NEXT:    xori $a4, $a4, 56
 ; LA64-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a4
+; LA64-NEXT:    and $a7, $a5, $a3
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a3
-; LA64-NEXT:    sra.w $a7, $a7, $a3
+; LA64-NEXT:    sll.w $a7, $a7, $a4
+; LA64-NEXT:    sra.w $a7, $a7, $a4
 ; LA64-NEXT:    bge $a7, $a1, .LBB24_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB24_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a4
+; LA64-NEXT:    and $a6, $a6, $a3
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB24_3: # in Loop: Header=BB24_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -520,15 +520,15 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i16_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    ori $a5, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a5, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -574,23 +574,23 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 255
-; LA64-NEXT:    sll.w $a4, $a4, $a2
+; LA64-NEXT:    ori $a3, $zero, 255
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    xori $a3, $a3, 56
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a4, $a2, 24
+; LA64-NEXT:    xori $a4, $a4, 56
 ; LA64-NEXT:  .LBB28_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a4
+; LA64-NEXT:    and $a7, $a5, $a3
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a3
-; LA64-NEXT:    sra.w $a7, $a7, $a3
+; LA64-NEXT:    sll.w $a7, $a7, $a4
+; LA64-NEXT:    sra.w $a7, $a7, $a4
 ; LA64-NEXT:    bge $a1, $a7, .LBB28_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB28_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a4
+; LA64-NEXT:    and $a6, $a6, $a3
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB28_3: # in Loop: Header=BB28_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -606,15 +606,15 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i16_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    ori $a5, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a5, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB29_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -660,11 +660,11 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -688,12 +688,12 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i16_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -737,11 +737,11 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -765,12 +765,12 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i16_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB37_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -814,23 +814,23 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 255
-; LA64-NEXT:    sll.w $a4, $a4, $a2
+; LA64-NEXT:    ori $a3, $zero, 255
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    xori $a3, $a3, 56
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a4, $a2, 24
+; LA64-NEXT:    xori $a4, $a4, 56
 ; LA64-NEXT:  .LBB40_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a4
+; LA64-NEXT:    and $a7, $a5, $a3
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a3
-; LA64-NEXT:    sra.w $a7, $a7, $a3
+; LA64-NEXT:    sll.w $a7, $a7, $a4
+; LA64-NEXT:    sra.w $a7, $a7, $a4
 ; LA64-NEXT:    bge $a7, $a1, .LBB40_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB40_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a4
+; LA64-NEXT:    and $a6, $a6, $a3
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB40_3: # in Loop: Header=BB40_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -846,15 +846,15 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i16_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    ori $a5, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a5, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB41_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -900,23 +900,23 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 255
-; LA64-NEXT:    sll.w $a4, $a4, $a2
+; LA64-NEXT:    ori $a3, $zero, 255
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    xori $a3, $a3, 56
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a4, $a2, 24
+; LA64-NEXT:    xori $a4, $a4, 56
 ; LA64-NEXT:  .LBB44_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a4
+; LA64-NEXT:    and $a7, $a5, $a3
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a3
-; LA64-NEXT:    sra.w $a7, $a7, $a3
+; LA64-NEXT:    sll.w $a7, $a7, $a4
+; LA64-NEXT:    sra.w $a7, $a7, $a4
 ; LA64-NEXT:    bge $a1, $a7, .LBB44_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB44_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a4
+; LA64-NEXT:    and $a6, $a6, $a3
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB44_3: # in Loop: Header=BB44_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -932,15 +932,15 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i16_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    ori $a5, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a5, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB45_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -986,11 +986,11 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -1014,12 +1014,12 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i16_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -1063,11 +1063,11 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -1091,12 +1091,12 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i16_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -1140,23 +1140,23 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 255
-; LA64-NEXT:    sll.w $a4, $a4, $a2
+; LA64-NEXT:    ori $a3, $zero, 255
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    xori $a3, $a3, 56
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a4, $a2, 24
+; LA64-NEXT:    xori $a4, $a4, 56
 ; LA64-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a4
+; LA64-NEXT:    and $a7, $a5, $a3
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a3
-; LA64-NEXT:    sra.w $a7, $a7, $a3
+; LA64-NEXT:    sll.w $a7, $a7, $a4
+; LA64-NEXT:    sra.w $a7, $a7, $a4
 ; LA64-NEXT:    bge $a7, $a1, .LBB56_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a4
+; LA64-NEXT:    and $a6, $a6, $a3
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -1172,15 +1172,15 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i16_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    ori $a5, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a5, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -1226,23 +1226,23 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 255
-; LA64-NEXT:    sll.w $a4, $a4, $a2
+; LA64-NEXT:    ori $a3, $zero, 255
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    xori $a3, $a3, 56
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a4, $a2, 24
+; LA64-NEXT:    xori $a4, $a4, 56
 ; LA64-NEXT:  .LBB60_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a4
+; LA64-NEXT:    and $a7, $a5, $a3
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a3
-; LA64-NEXT:    sra.w $a7, $a7, $a3
+; LA64-NEXT:    sll.w $a7, $a7, $a4
+; LA64-NEXT:    sra.w $a7, $a7, $a4
 ; LA64-NEXT:    bge $a1, $a7, .LBB60_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB60_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a4
+; LA64-NEXT:    and $a6, $a6, $a3
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB60_3: # in Loop: Header=BB60_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -1258,15 +1258,15 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i16_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    ori $a5, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a5, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -1312,11 +1312,11 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB64_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -1340,12 +1340,12 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umax_i16_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB65_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -1389,11 +1389,11 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -1417,12 +1417,12 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_umin_i16_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB69_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a6, $a4, $a3
@@ -1466,23 +1466,23 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 255
-; LA64-NEXT:    sll.w $a4, $a4, $a2
+; LA64-NEXT:    ori $a3, $zero, 255
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    xori $a3, $a3, 56
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a4, $a2, 24
+; LA64-NEXT:    xori $a4, $a4, 56
 ; LA64-NEXT:  .LBB72_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a4
+; LA64-NEXT:    and $a7, $a5, $a3
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a3
-; LA64-NEXT:    sra.w $a7, $a7, $a3
+; LA64-NEXT:    sll.w $a7, $a7, $a4
+; LA64-NEXT:    sra.w $a7, $a7, $a4
 ; LA64-NEXT:    bge $a7, $a1, .LBB72_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB72_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a4
+; LA64-NEXT:    and $a6, $a6, $a3
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB72_3: # in Loop: Header=BB72_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -1498,15 +1498,15 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_max_i16_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    ori $a5, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a5, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB73_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
@@ -1552,23 +1552,23 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT:    andi $a3, $a2, 24
-; LA64-NEXT:    ori $a4, $zero, 255
-; LA64-NEXT:    sll.w $a4, $a4, $a2
+; LA64-NEXT:    ori $a3, $zero, 255
+; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    ext.w.b $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    xori $a3, $a3, 56
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT:    andi $a4, $a2, 24
+; LA64-NEXT:    xori $a4, $a4, 56
 ; LA64-NEXT:  .LBB76_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
-; LA64-NEXT:    and $a7, $a5, $a4
+; LA64-NEXT:    and $a7, $a5, $a3
 ; LA64-NEXT:    move $a6, $a5
-; LA64-NEXT:    sll.w $a7, $a7, $a3
-; LA64-NEXT:    sra.w $a7, $a7, $a3
+; LA64-NEXT:    sll.w $a7, $a7, $a4
+; LA64-NEXT:    sra.w $a7, $a7, $a4
 ; LA64-NEXT:    bge $a1, $a7, .LBB76_3
 ; LA64-NEXT:  # %bb.2: # in Loop: Header=BB76_1 Depth=1
 ; LA64-NEXT:    xor $a6, $a5, $a1
-; LA64-NEXT:    and $a6, $a6, $a4
+; LA64-NEXT:    and $a6, $a6, $a3
 ; LA64-NEXT:    xor $a6, $a5, $a6
 ; LA64-NEXT:  .LBB76_3: # in Loop: Header=BB76_1 Depth=1
 ; LA64-NEXT:    sc.w $a6, $a0, 0
@@ -1584,15 +1584,15 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_min_i16_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a3, $a2, 24
+; LA64-NEXT:    ori $a4, $zero, 48
+; LA64-NEXT:    sub.d $a3, $a4, $a3
 ; LA64-NEXT:    lu12i.w $a4, 15
 ; LA64-NEXT:    ori $a4, $a4, 4095
 ; LA64-NEXT:    sll.w $a4, $a4, $a2
 ; LA64-NEXT:    ext.w.h $a1, $a1
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
-; LA64-NEXT:    ori $a5, $zero, 48
-; LA64-NEXT:    sub.d $a3, $a5, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB77_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a5, $a0, 0
 ; LA64-NEXT:    and $a7, $a5, $a4
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
index f2f459ecaa2ec..19b4f3ed2160b 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
@@ -6,11 +6,11 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
@@ -26,11 +26,11 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
@@ -50,10 +50,10 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:    nor $a2, $a2, $zero
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a2
@@ -66,10 +66,10 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_0_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
 ; LA64-NEXT:    nor $a2, $a2, $zero
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -81,9 +81,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
@@ -96,9 +96,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -110,12 +110,12 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i16_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
@@ -131,12 +131,12 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_i16_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
@@ -156,11 +156,11 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i16_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a2, 15
 ; LA32-NEXT:    ori $a2, $a2, 4095
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:    nor $a2, $a2, $zero
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a2
@@ -173,11 +173,11 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_0_i16_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a2, 15
 ; LA64-NEXT:    ori $a2, $a2, 4095
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
 ; LA64-NEXT:    nor $a2, $a2, $zero
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -189,10 +189,10 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a2, 15
 ; LA32-NEXT:    ori $a2, $a2, 4095
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
@@ -205,10 +205,10 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a2, 15
 ; LA64-NEXT:    ori $a2, $a2, 4095
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -261,11 +261,11 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
@@ -281,11 +281,11 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_add_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
@@ -305,12 +305,12 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i16_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
@@ -326,12 +326,12 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_add_i16_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
@@ -392,11 +392,11 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
@@ -412,11 +412,11 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_sub_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
@@ -436,12 +436,12 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i16_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
@@ -457,12 +457,12 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_sub_i16_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
@@ -525,11 +525,11 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
@@ -546,11 +546,11 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_nand_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
@@ -571,12 +571,12 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i16_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
@@ -593,12 +593,12 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_nand_i16_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
@@ -673,12 +673,12 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    orn $a1, $a1, $a3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a1
@@ -691,12 +691,12 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_and_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    orn $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -708,13 +708,13 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i16_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    orn $a1, $a1, $a3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a1
@@ -727,13 +727,13 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_and_i16_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    orn $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -786,9 +786,9 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -801,9 +801,9 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_or_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -815,9 +815,9 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i16_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB25_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -830,9 +830,9 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_or_i16_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -885,9 +885,9 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i8_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB28_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -900,9 +900,9 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xor_i8_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -914,9 +914,9 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i16_acquire:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB29_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -929,9 +929,9 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xor_i16_acquire:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -984,11 +984,11 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
@@ -1004,11 +1004,11 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
@@ -1028,10 +1028,10 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:    nor $a2, $a2, $zero
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a2
@@ -1044,10 +1044,10 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_0_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
 ; LA64-NEXT:    nor $a2, $a2, $zero
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -1059,9 +1059,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB34_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
@@ -1074,9 +1074,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -1088,12 +1088,12 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i16_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
@@ -1109,12 +1109,12 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_i16_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
@@ -1134,11 +1134,11 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i16_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a2, 15
 ; LA32-NEXT:    ori $a2, $a2, 4095
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:    nor $a2, $a2, $zero
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a2
@@ -1151,11 +1151,11 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_0_i16_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a2, 15
 ; LA64-NEXT:    ori $a2, $a2, 4095
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
 ; LA64-NEXT:    nor $a2, $a2, $zero
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -1167,10 +1167,10 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i16_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a2, 15
 ; LA32-NEXT:    ori $a2, $a2, 4095
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB37_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
@@ -1183,10 +1183,10 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i16_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a2, 15
 ; LA64-NEXT:    ori $a2, $a2, 4095
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -1239,11 +1239,11 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB40_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
@@ -1259,11 +1259,11 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_add_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB40_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
@@ -1283,12 +1283,12 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i16_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB41_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
@@ -1304,12 +1304,12 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_add_i16_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB41_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
@@ -1370,11 +1370,11 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB44_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
@@ -1390,11 +1390,11 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_sub_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB44_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
@@ -1414,12 +1414,12 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i16_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB45_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
@@ -1435,12 +1435,12 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_sub_i16_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB45_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
@@ -1503,11 +1503,11 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
@@ -1524,11 +1524,11 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_nand_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
@@ -1549,12 +1549,12 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i16_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
@@ -1571,12 +1571,12 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_nand_i16_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
@@ -1651,12 +1651,12 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    orn $a1, $a1, $a3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a1
@@ -1669,12 +1669,12 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_and_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    orn $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -1686,13 +1686,13 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i16_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    orn $a1, $a1, $a3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a1
@@ -1705,13 +1705,13 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_and_i16_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    orn $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -1764,9 +1764,9 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -1779,9 +1779,9 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_or_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -1793,9 +1793,9 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i16_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -1808,9 +1808,9 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_or_i16_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -1863,9 +1863,9 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i8_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB60_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -1878,9 +1878,9 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xor_i8_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -1892,9 +1892,9 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i16_release:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -1907,9 +1907,9 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xor_i16_release:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -1962,11 +1962,11 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB64_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
@@ -1982,11 +1982,11 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB64_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
@@ -2006,10 +2006,10 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:    nor $a2, $a2, $zero
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB65_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a2
@@ -2022,10 +2022,10 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_0_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
 ; LA64-NEXT:    nor $a2, $a2, $zero
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -2037,9 +2037,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
@@ -2052,9 +2052,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -2066,12 +2066,12 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i16_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
@@ -2087,12 +2087,12 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_i16_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
@@ -2112,11 +2112,11 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i16_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a2, 15
 ; LA32-NEXT:    ori $a2, $a2, 4095
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:    nor $a2, $a2, $zero
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a2
@@ -2129,11 +2129,11 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_0_i16_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a2, 15
 ; LA64-NEXT:    ori $a2, $a2, 4095
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
 ; LA64-NEXT:    nor $a2, $a2, $zero
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -2145,10 +2145,10 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a2, 15
 ; LA32-NEXT:    ori $a2, $a2, 4095
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB69_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
@@ -2161,10 +2161,10 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a2, 15
 ; LA64-NEXT:    ori $a2, $a2, 4095
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -2217,11 +2217,11 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB72_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
@@ -2237,11 +2237,11 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_add_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB72_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
@@ -2261,12 +2261,12 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i16_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB73_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
@@ -2282,12 +2282,12 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_add_i16_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB73_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
@@ -2348,11 +2348,11 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB76_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
@@ -2368,11 +2368,11 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_sub_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB76_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
@@ -2392,12 +2392,12 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i16_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB77_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
@@ -2413,12 +2413,12 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_sub_i16_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB77_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
@@ -2481,11 +2481,11 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB80_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
@@ -2502,11 +2502,11 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_nand_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB80_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
@@ -2527,12 +2527,12 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i16_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
@@ -2549,12 +2549,12 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_nand_i16_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
@@ -2629,12 +2629,12 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    orn $a1, $a1, $a3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB84_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a1
@@ -2647,12 +2647,12 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_and_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    orn $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -2664,13 +2664,13 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i16_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    orn $a1, $a1, $a3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB85_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a1
@@ -2683,13 +2683,13 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_and_i16_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    orn $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -2742,9 +2742,9 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -2757,9 +2757,9 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_or_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -2771,9 +2771,9 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i16_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB89_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -2786,9 +2786,9 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_or_i16_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -2841,9 +2841,9 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i8_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB92_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -2856,9 +2856,9 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xor_i8_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -2870,9 +2870,9 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i16_acq_rel:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB93_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -2885,9 +2885,9 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xor_i16_acq_rel:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -2940,11 +2940,11 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
@@ -2960,11 +2960,11 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
@@ -2984,10 +2984,10 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:    nor $a2, $a2, $zero
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a2
@@ -3000,10 +3000,10 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_0_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
 ; LA64-NEXT:    nor $a2, $a2, $zero
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -3015,9 +3015,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
@@ -3030,9 +3030,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -3044,12 +3044,12 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i16_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB99_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
@@ -3065,12 +3065,12 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_i16_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB99_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
@@ -3090,11 +3090,11 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i16_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a2, 15
 ; LA32-NEXT:    ori $a2, $a2, 4095
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:    nor $a2, $a2, $zero
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB100_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a2
@@ -3107,11 +3107,11 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_0_i16_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a2, 15
 ; LA64-NEXT:    ori $a2, $a2, 4095
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
 ; LA64-NEXT:    nor $a2, $a2, $zero
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -3123,10 +3123,10 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a2, 15
 ; LA32-NEXT:    ori $a2, $a2, 4095
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB101_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
@@ -3139,10 +3139,10 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a2, 15
 ; LA64-NEXT:    ori $a2, $a2, 4095
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -3195,11 +3195,11 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB104_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
@@ -3215,11 +3215,11 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_add_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB104_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
@@ -3239,12 +3239,12 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i16_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB105_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
@@ -3260,12 +3260,12 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_add_i16_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB105_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
@@ -3326,11 +3326,11 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB108_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
@@ -3346,11 +3346,11 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_sub_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB108_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
@@ -3370,12 +3370,12 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i16_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB109_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
@@ -3391,12 +3391,12 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_sub_i16_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB109_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
@@ -3459,11 +3459,11 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
@@ -3480,11 +3480,11 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_nand_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
@@ -3505,12 +3505,12 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i16_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
@@ -3527,12 +3527,12 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_nand_i16_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
@@ -3607,12 +3607,12 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    orn $a1, $a1, $a3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a1
@@ -3625,12 +3625,12 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_and_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    orn $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -3642,13 +3642,13 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i16_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    orn $a1, $a1, $a3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a1
@@ -3661,13 +3661,13 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_and_i16_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    orn $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -3720,9 +3720,9 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB120_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -3735,9 +3735,9 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_or_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -3749,9 +3749,9 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i16_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -3764,9 +3764,9 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_or_i16_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -3819,9 +3819,9 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i8_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB124_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -3834,9 +3834,9 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xor_i8_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -3848,9 +3848,9 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i16_seq_cst:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB125_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -3863,9 +3863,9 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xor_i16_seq_cst:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -3918,11 +3918,11 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
@@ -3938,11 +3938,11 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
@@ -3962,10 +3962,10 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:    nor $a2, $a2, $zero
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB129_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a2
@@ -3978,10 +3978,10 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_0_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
 ; LA64-NEXT:    nor $a2, $a2, $zero
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -3993,9 +3993,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a2, $zero, 255
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB130_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
@@ -4008,9 +4008,9 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a2, $zero, 255
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -4022,12 +4022,12 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_i16_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB131_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    addi.w $a5, $a1, 0
@@ -4043,12 +4043,12 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_i16_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB131_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    addi.w $a5, $a1, 0
@@ -4068,11 +4068,11 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_0_i16_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a2, 15
 ; LA32-NEXT:    ori $a2, $a2, 4095
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
 ; LA32-NEXT:    nor $a2, $a2, $zero
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB132_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a2
@@ -4085,11 +4085,11 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_0_i16_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a2, 15
 ; LA64-NEXT:    ori $a2, $a2, 4095
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
 ; LA64-NEXT:    nor $a2, $a2, $zero
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -4101,10 +4101,10 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
 ; LA32-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a1, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a2, 15
 ; LA32-NEXT:    ori $a2, $a2, 4095
 ; LA32-NEXT:    sll.w $a2, $a2, $a1
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB133_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
@@ -4117,10 +4117,10 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
 ; LA64-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a1, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a2, 15
 ; LA64-NEXT:    ori $a2, $a2, 4095
 ; LA64-NEXT:    sll.w $a2, $a2, $a1
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a2, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a1
 ; LA64-NEXT:    ret
@@ -4173,11 +4173,11 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB136_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
@@ -4193,11 +4193,11 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_add_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB136_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
@@ -4217,12 +4217,12 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_add_i16_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB137_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    add.w $a5, $a4, $a1
@@ -4238,12 +4238,12 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_add_i16_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB137_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    add.w $a5, $a4, $a1
@@ -4304,11 +4304,11 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB140_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
@@ -4324,11 +4324,11 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_sub_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB140_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
@@ -4348,12 +4348,12 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_sub_i16_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB141_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    sub.w $a5, $a4, $a1
@@ -4369,12 +4369,12 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_sub_i16_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB141_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    sub.w $a5, $a4, $a1
@@ -4437,11 +4437,11 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB144_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
@@ -4458,11 +4458,11 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_nand_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB144_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
@@ -4483,12 +4483,12 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_nand_i16_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB145_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a4, $a0, 0
 ; LA32-NEXT:    and $a5, $a4, $a1
@@ -4505,12 +4505,12 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_nand_i16_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:  .LBB145_1: # =>This Inner Loop Header: Depth=1
 ; LA64-NEXT:    ll.w $a4, $a0, 0
 ; LA64-NEXT:    and $a5, $a4, $a1
@@ -4585,12 +4585,12 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    ori $a3, $zero, 255
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    orn $a1, $a1, $a3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB148_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a1
@@ -4603,12 +4603,12 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_and_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    ori $a3, $zero, 255
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    orn $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -4620,13 +4620,13 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_and_i16_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    lu12i.w $a3, 15
 ; LA32-NEXT:    ori $a3, $a3, 4095
 ; LA32-NEXT:    sll.w $a3, $a3, $a2
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
 ; LA32-NEXT:    orn $a1, $a1, $a3
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB149_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    and $a4, $a3, $a1
@@ -4639,13 +4639,13 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_and_i16_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    lu12i.w $a3, 15
 ; LA64-NEXT:    ori $a3, $a3, 4095
 ; LA64-NEXT:    sll.w $a3, $a3, $a2
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
 ; LA64-NEXT:    orn $a1, $a1, $a3
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amand_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -4698,9 +4698,9 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB152_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -4713,9 +4713,9 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_or_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -4727,9 +4727,9 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_or_i16_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB153_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    or $a4, $a3, $a1
@@ -4742,9 +4742,9 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_or_i16_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -4797,9 +4797,9 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i8_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    andi $a1, $a1, 255
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB156_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -4812,9 +4812,9 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xor_i8_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    andi $a1, $a1, 255
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
@@ -4826,9 +4826,9 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA32-LABEL: atomicrmw_xor_i16_monotonic:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    slli.w $a2, $a0, 3
-; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
 ; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    bstrins.w $a0, $zero, 1, 0
 ; LA32-NEXT:  .LBB157_1: # =>This Inner Loop Header: Depth=1
 ; LA32-NEXT:    ll.w $a3, $a0, 0
 ; LA32-NEXT:    xor $a4, $a3, $a1
@@ -4841,9 +4841,9 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; LA64-LABEL: atomicrmw_xor_i16_monotonic:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    slli.d $a2, $a0, 3
-; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
 ; LA64-NEXT:    sll.w $a1, $a1, $a2
+; LA64-NEXT:    bstrins.d $a0, $zero, 1, 0
 ; LA64-NEXT:    amxor_db.w $a3, $a1, $a0
 ; LA64-NEXT:    srl.w $a0, $a3, $a2
 ; LA64-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll
index ef117f9748871..46c6470123a57 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll
@@ -232,11 +232,11 @@ define double @convert_u32_to_double(i32 %a) nounwind {
 ; LA32-NEXT:    lu12i.w $a1, 275200
 ; LA32-NEXT:    st.w $a1, $sp, 12
 ; LA32-NEXT:    st.w $a0, $sp, 8
-; LA32-NEXT:    fld.d $fa0, $sp, 8
 ; LA32-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI12_0)
 ; LA32-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI12_0)
-; LA32-NEXT:    fld.d $fa1, $a0, 0
-; LA32-NEXT:    fsub.d $fa0, $fa0, $fa1
+; LA32-NEXT:    fld.d $fa0, $a0, 0
+; LA32-NEXT:    fld.d $fa1, $sp, 8
+; LA32-NEXT:    fsub.d $fa0, $fa1, $fa0
 ; LA32-NEXT:    addi.w $sp, $sp, 16
 ; LA32-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll
index b01b84ba385ec..e9244cf347f70 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll
@@ -504,11 +504,11 @@ define float @convert_u32_to_float(i32 %a) nounwind {
 ; LA32D-NEXT:    lu12i.w $a1, 275200
 ; LA32D-NEXT:    st.w $a1, $sp, 12
 ; LA32D-NEXT:    st.w $a0, $sp, 8
-; LA32D-NEXT:    fld.d $fa0, $sp, 8
 ; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI14_0)
 ; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI14_0)
-; LA32D-NEXT:    fld.d $fa1, $a0, 0
-; LA32D-NEXT:    fsub.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    fld.d $fa0, $a0, 0
+; LA32D-NEXT:    fld.d $fa1, $sp, 8
+; LA32D-NEXT:    fsub.d $fa0, $fa1, $fa0
 ; LA32D-NEXT:    fcvt.s.d $fa0, $fa0
 ; LA32D-NEXT:    addi.w $sp, $sp, 16
 ; LA32D-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll
index 26f44adc61358..946233c7e95f8 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll
@@ -107,32 +107,28 @@ define i32 @load_store_global_array(i32 %a) nounwind {
 define i64 @ld_b(ptr %a) nounwind {
 ; LA32NOPIC-LABEL: ld_b:
 ; LA32NOPIC:       # %bb.0:
-; LA32NOPIC-NEXT:    ld.b $a2, $a0, 1
 ; LA32NOPIC-NEXT:    ld.b $zero, $a0, 0
-; LA32NOPIC-NEXT:    srai.w $a1, $a2, 31
-; LA32NOPIC-NEXT:    move $a0, $a2
+; LA32NOPIC-NEXT:    ld.b $a0, $a0, 1
+; LA32NOPIC-NEXT:    srai.w $a1, $a0, 31
 ; LA32NOPIC-NEXT:    ret
 ;
 ; LA32PIC-LABEL: ld_b:
 ; LA32PIC:       # %bb.0:
-; LA32PIC-NEXT:    ld.b $a2, $a0, 1
 ; LA32PIC-NEXT:    ld.b $zero, $a0, 0
-; LA32PIC-NEXT:    srai.w $a1, $a2, 31
-; LA32PIC-NEXT:    move $a0, $a2
+; LA32PIC-NEXT:    ld.b $a0, $a0, 1
+; LA32PIC-NEXT:    srai.w $a1, $a0, 31
 ; LA32PIC-NEXT:    ret
 ;
 ; LA64NOPIC-LABEL: ld_b:
 ; LA64NOPIC:       # %bb.0:
-; LA64NOPIC-NEXT:    ld.b $a1, $a0, 1
 ; LA64NOPIC-NEXT:    ld.b $zero, $a0, 0
-; LA64NOPIC-NEXT:    move $a0, $a1
+; LA64NOPIC-NEXT:    ld.b $a0, $a0, 1
 ; LA64NOPIC-NEXT:    ret
 ;
 ; LA64PIC-LABEL: ld_b:
 ; LA64PIC:       # %bb.0:
-; LA64PIC-NEXT:    ld.b $a1, $a0, 1
 ; LA64PIC-NEXT:    ld.b $zero, $a0, 0
-; LA64PIC-NEXT:    move $a0, $a1
+; LA64PIC-NEXT:    ld.b $a0, $a0, 1
 ; LA64PIC-NEXT:    ret
   %1 = getelementptr i8, ptr %a, i64 1
   %2 = load i8, ptr %1
@@ -144,32 +140,28 @@ define i64 @ld_b(ptr %a) nounwind {
 define i64 @ld_h(ptr %a) nounwind {
 ; LA32NOPIC-LABEL: ld_h:
 ; LA32NOPIC:       # %bb.0:
-; LA32NOPIC-NEXT:    ld.h $a2, $a0, 4
 ; LA32NOPIC-NEXT:    ld.h $zero, $a0, 0
-; LA32NOPIC-NEXT:    srai.w $a1, $a2, 31
-; LA32NOPIC-NEXT:    move $a0, $a2
+; LA32NOPIC-NEXT:    ld.h $a0, $a0, 4
+; LA32NOPIC-NEXT:    srai.w $a1, $a0, 31
 ; LA32NOPIC-NEXT:    ret
 ;
 ; LA32PIC-LABEL: ld_h:
 ; LA32PIC:       # %bb.0:
-; LA32PIC-NEXT:    ld.h $a2, $a0, 4
 ; LA32PIC-NEXT:    ld.h $zero, $a0, 0
-; LA32PIC-NEXT:    srai.w $a1, $a2, 31
-; LA32PIC-NEXT:    move $a0, $a2
+; LA32PIC-NEXT:    ld.h $a0, $a0, 4
+; LA32PIC-NEXT:    srai.w $a1, $a0, 31
 ; LA32PIC-NEXT:    ret
 ;
 ; LA64NOPIC-LABEL: ld_h:
 ; LA64NOPIC:       # %bb.0:
-; LA64NOPIC-NEXT:    ld.h $a1, $a0, 4
 ; LA64NOPIC-NEXT:    ld.h $zero, $a0, 0
-; LA64NOPIC-NEXT:    move $a0, $a1
+; LA64NOPIC-NEXT:    ld.h $a0, $a0, 4
 ; LA64NOPIC-NEXT:    ret
 ;
 ; LA64PIC-LABEL: ld_h:
 ; LA64PIC:       # %bb.0:
-; LA64PIC-NEXT:    ld.h $a1, $a0, 4
 ; LA64PIC-NEXT:    ld.h $zero, $a0, 0
-; LA64PIC-NEXT:    move $a0, $a1
+; LA64PIC-NEXT:    ld.h $a0, $a0, 4
 ; LA64PIC-NEXT:    ret
   %1 = getelementptr i16, ptr %a, i64 2
   %2 = load i16, ptr %1
@@ -181,32 +173,28 @@ define i64 @ld_h(ptr %a) nounwind {
 define i64 @ld_w(ptr %a) nounwind {
 ; LA32NOPIC-LABEL: ld_w:
 ; LA32NOPIC:       # %bb.0:
-; LA32NOPIC-NEXT:    ld.w $a2, $a0, 12
 ; LA32NOPIC-NEXT:    ld.w $zero, $a0, 0
-; LA32NOPIC-NEXT:    srai.w $a1, $a2, 31
-; LA32NOPIC-NEXT:    move $a0, $a2
+; LA32NOPIC-NEXT:    ld.w $a0, $a0, 12
+; LA32NOPIC-NEXT:    srai.w $a1, $a0, 31
 ; LA32NOPIC-NEXT:    ret
 ;
 ; LA32PIC-LABEL: ld_w:
 ; LA32PIC:       # %bb.0:
-; LA32PIC-NEXT:    ld.w $a2, $a0, 12
 ; LA32PIC-NEXT:    ld.w $zero, $a0, 0
-; LA32PIC-NEXT:    srai.w $a1, $a2, 31
-; LA32PIC-NEXT:    move $a0, $a2
+; LA32PIC-NEXT:    ld.w $a0, $a0, 12
+; LA32PIC-NEXT:    srai.w $a1, $a0, 31
 ; LA32PIC-NEXT:    ret
 ;
 ; LA64NOPIC-LABEL: ld_w:
 ; LA64NOPIC:       # %bb.0:
-; LA64NOPIC-NEXT:    ld.w $a1, $a0, 12
 ; LA64NOPIC-NEXT:    ld.w $zero, $a0, 0
-; LA64NOPIC-NEXT:    move $a0, $a1
+; LA64NOPIC-NEXT:    ld.w $a0, $a0, 12
 ; LA64NOPIC-NEXT:    ret
 ;
 ; LA64PIC-LABEL: ld_w:
 ; LA64PIC:       # %bb.0:
-; LA64PIC-NEXT:    ld.w $a1, $a0, 12
 ; LA64PIC-NEXT:    ld.w $zero, $a0, 0
-; LA64PIC-NEXT:    move $a0, $a1
+; LA64PIC-NEXT:    ld.w $a0, $a0, 12
 ; LA64PIC-NEXT:    ret
   %1 = getelementptr i32, ptr %a, i64 3
   %2 = load i32, ptr %1
@@ -218,34 +206,30 @@ define i64 @ld_w(ptr %a) nounwind {
 define i64 @ld_d(ptr %a) nounwind {
 ; LA32NOPIC-LABEL: ld_d:
 ; LA32NOPIC:       # %bb.0:
-; LA32NOPIC-NEXT:    ld.w $a1, $a0, 28
-; LA32NOPIC-NEXT:    ld.w $a2, $a0, 24
 ; LA32NOPIC-NEXT:    ld.w $zero, $a0, 4
 ; LA32NOPIC-NEXT:    ld.w $zero, $a0, 0
-; LA32NOPIC-NEXT:    move $a0, $a2
+; LA32NOPIC-NEXT:    ld.w $a1, $a0, 28
+; LA32NOPIC-NEXT:    ld.w $a0, $a0, 24
 ; LA32NOPIC-NEXT:    ret
 ;
 ; LA32PIC-LABEL: ld_d:
 ; LA32PIC:       # %bb.0:
-; LA32PIC-NEXT:    ld.w $a1, $a0, 28
-; LA32PIC-NEXT:    ld.w $a2, $a0, 24
 ; LA32PIC-NEXT:    ld.w $zero, $a0, 4
 ; LA32PIC-NEXT:    ld.w $zero, $a0, 0
-; LA32PIC-NEXT:    move $a0, $a2
+; LA32PIC-NEXT:    ld.w $a1, $a0, 28
+; LA32PIC-NEXT:    ld.w $a0, $a0, 24
 ; LA32PIC-NEXT:    ret
 ;
 ; LA64NOPIC-LABEL: ld_d:
 ; LA64NOPIC:       # %bb.0:
-; LA64NOPIC-NEXT:    ld.d $a1, $a0, 24
 ; LA64NOPIC-NEXT:    ld.d $zero, $a0, 0
-; LA64NOPIC-NEXT:    move $a0, $a1
+; LA64NOPIC-NEXT:    ld.d $a0, $a0, 24
 ; LA64NOPIC-NEXT:    ret
 ;
 ; LA64PIC-LABEL: ld_d:
 ; LA64PIC:       # %bb.0:
-; LA64PIC-NEXT:    ld.d $a1, $a0, 24
 ; LA64PIC-NEXT:    ld.d $zero, $a0, 0
-; LA64PIC-NEXT:    move $a0, $a1
+; LA64PIC-NEXT:    ld.d $a0, $a0, 24
 ; LA64PIC-NEXT:    ret
   %1 = getelementptr i64, ptr %a, i64 3
   %2 = load i64, ptr %1
@@ -256,32 +240,32 @@ define i64 @ld_d(ptr %a) nounwind {
 define i64 @ld_bu(ptr %a) nounwind {
 ; LA32NOPIC-LABEL: ld_bu:
 ; LA32NOPIC:       # %bb.0:
-; LA32NOPIC-NEXT:    ld.bu $a1, $a0, 4
-; LA32NOPIC-NEXT:    ld.bu $a0, $a0, 0
-; LA32NOPIC-NEXT:    add.w $a0, $a1, $a0
-; LA32NOPIC-NEXT:    sltu $a1, $a0, $a1
+; LA32NOPIC-NEXT:    ld.bu $a1, $a0, 0
+; LA32NOPIC-NEXT:    ld.bu $a2, $a0, 4
+; LA32NOPIC-NEXT:    add.w $a0, $a2, $a1
+; LA32NOPIC-NEXT:    sltu $a1, $a0, $a2
 ; LA32NOPIC-NEXT:    ret
 ;
 ; LA32PIC-LABEL: ld_bu:
 ; LA32PIC:       # %bb.0:
-; LA32PIC-NEXT:    ld.bu $a1, $a0, 4
-; LA32PIC-NEXT:    ld.bu $a0, $a0, 0
-; LA32PIC-NEXT:    add.w $a0, $a1, $a0
-; LA32PIC-NEXT:    sltu $a1, $a0, $a1
+; LA32PIC-NEXT:    ld.bu $a1, $a0, 0
+; LA32PIC-NEXT:    ld.bu $a2, $a0, 4
+; LA32PIC-NEXT:    add.w $a0, $a2, $a1
+; LA32PIC-NEXT:    sltu $a1, $a0, $a2
 ; LA32PIC-NEXT:    ret
 ;
 ; LA64NOPIC-LABEL: ld_bu:
 ; LA64NOPIC:       # %bb.0:
-; LA64NOPIC-NEXT:    ld.bu $a1, $a0, 4
-; LA64NOPIC-NEXT:    ld.bu $a0, $a0, 0
-; LA64NOPIC-NEXT:    add.d $a0, $a1, $a0
+; LA64NOPIC-NEXT:    ld.bu $a1, $a0, 0
+; LA64NOPIC-NEXT:    ld.bu $a0, $a0, 4
+; LA64NOPIC-NEXT:    add.d $a0, $a0, $a1
 ; LA64NOPIC-NEXT:    ret
 ;
 ; LA64PIC-LABEL: ld_bu:
 ; LA64PIC:       # %bb.0:
-; LA64PIC-NEXT:    ld.bu $a1, $a0, 4
-; LA64PIC-NEXT:    ld.bu $a0, $a0, 0
-; LA64PIC-NEXT:    add.d $a0, $a1, $a0
+; LA64PIC-NEXT:    ld.bu $a1, $a0, 0
+; LA64PIC-NEXT:    ld.bu $a0, $a0, 4
+; LA64PIC-NEXT:    add.d $a0, $a0, $a1
 ; LA64PIC-NEXT:    ret
   %1 = getelementptr i8, ptr %a, i64 4
   %2 = load i8, ptr %1
@@ -295,32 +279,32 @@ define i64 @ld_bu(ptr %a) nounwind {
 define i64 @ld_hu(ptr %a) nounwind {
 ; LA32NOPIC-LABEL: ld_hu:
 ; LA32NOPIC:       # %bb.0:
-; LA32NOPIC-NEXT:    ld.hu $a1, $a0, 10
-; LA32NOPIC-NEXT:    ld.hu $a0, $a0, 0
-; LA32NOPIC-NEXT:    add.w $a0, $a1, $a0
-; LA32NOPIC-NEXT:    sltu $a1, $a0, $a1
+; LA32NOPIC-NEXT:    ld.hu $a1, $a0, 0
+; LA32NOPIC-NEXT:    ld.hu $a2, $a0, 10
+; LA32NOPIC-NEXT:    add.w $a0, $a2, $a1
+; LA32NOPIC-NEXT:    sltu $a1, $a0, $a2
 ; LA32NOPIC-NEXT:    ret
 ;
 ; LA32PIC-LABEL: ld_hu:
 ; LA32PIC:       # %bb.0:
-; LA32PIC-NEXT:    ld.hu $a1, $a0, 10
-; LA32PIC-NEXT:    ld.hu $a0, $a0, 0
-; LA32PIC-NEXT:    add.w $a0, $a1, $a0
-; LA32PIC-NEXT:    sltu $a1, $a0, $a1
+; LA32PIC-NEXT:    ld.hu $a1, $a0, 0
+; LA32PIC-NEXT:    ld.hu $a2, $a0, 10
+; LA32PIC-NEXT:    add.w $a0, $a2, $a1
+; LA32PIC-NEXT:    sltu $a1, $a0, $a2
 ; LA32PIC-NEXT:    ret
 ;
 ; LA64NOPIC-LABEL: ld_hu:
 ; LA64NOPIC:       # %bb.0:
-; LA64NOPIC-NEXT:    ld.hu $a1, $a0, 10
-; LA64NOPIC-NEXT:    ld.hu $a0, $a0, 0
-; LA64NOPIC-NEXT:    add.d $a0, $a1, $a0
+; LA64NOPIC-NEXT:    ld.hu $a1, $a0, 0
+; LA64NOPIC-NEXT:    ld.hu $a0, $a0, 10
+; LA64NOPIC-NEXT:    add.d $a0, $a0, $a1
 ; LA64NOPIC-NEXT:    ret
 ;
 ; LA64PIC-LABEL: ld_hu:
 ; LA64PIC:       # %bb.0:
-; LA64PIC-NEXT:    ld.hu $a1, $a0, 10
-; LA64PIC-NEXT:    ld.hu $a0, $a0, 0
-; LA64PIC-NEXT:    add.d $a0, $a1, $a0
+; LA64PIC-NEXT:    ld.hu $a1, $a0, 0
+; LA64PIC-NEXT:    ld.hu $a0, $a0, 10
+; LA64PIC-NEXT:    add.d $a0, $a0, $a1
 ; LA64PIC-NEXT:    ret
   %1 = getelementptr i16, ptr %a, i64 5
   %2 = load i16, ptr %1
@@ -334,32 +318,32 @@ define i64 @ld_hu(ptr %a) nounwind {
 define i64 @ld_wu(ptr %a) nounwind {
 ; LA32NOPIC-LABEL: ld_wu:
 ; LA32NOPIC:       # %bb.0:
-; LA32NOPIC-NEXT:    ld.w $a1, $a0, 20
-; LA32NOPIC-NEXT:    ld.w $a0, $a0, 0
-; LA32NOPIC-NEXT:    add.w $a0, $a1, $a0
-; LA32NOPIC-NEXT:    sltu $a1, $a0, $a1
+; LA32NOPIC-NEXT:    ld.w $a1, $a0, 0
+; LA32NOPIC-NEXT:    ld.w $a2, $a0, 20
+; LA32NOPIC-NEXT:    add.w $a0, $a2, $a1
+; LA32NOPIC-NEXT:    sltu $a1, $a0, $a2
 ; LA32NOPIC-NEXT:    ret
 ;
 ; LA32PIC-LABEL: ld_wu:
 ; LA32PIC:       # %bb.0:
-; LA32PIC-NEXT:    ld.w $a1, $a0, 20
-; LA32PIC-NEXT:    ld.w $a0, $a0, 0
-; LA32PIC-NEXT:    add.w $a0, $a1, $a0
-; LA32PIC-NEXT:    sltu $a1, $a0, $a1
+; LA32PIC-NEXT:    ld.w $a1, $a0, 0
+; LA32PIC-NEXT:    ld.w $a2, $a0, 20
+; LA32PIC-NEXT:    add.w $a0, $a2, $a1
+; LA32PIC-NEXT:    sltu $a1, $a0, $a2
 ; LA32PIC-NEXT:    ret
 ;
 ; LA64NOPIC-LABEL: ld_wu:
 ; LA64NOPIC:       # %bb.0:
-; LA64NOPIC-NEXT:    ld.wu $a1, $a0, 20
-; LA64NOPIC-NEXT:    ld.wu $a0, $a0, 0
-; LA64NOPIC-NEXT:    add.d $a0, $a1, $a0
+; LA64NOPIC-NEXT:    ld.wu $a1, $a0, 0
+; LA64NOPIC-NEXT:    ld.wu $a0, $a0, 20
+; LA64NOPIC-NEXT:    add.d $a0, $a0, $a1
 ; LA64NOPIC-NEXT:    ret
 ;
 ; LA64PIC-LABEL: ld_wu:
 ; LA64PIC:       # %bb.0:
-; LA64PIC-NEXT:    ld.wu $a1, $a0, 20
-; LA64PIC-NEXT:    ld.wu $a0, $a0, 0
-; LA64PIC-NEXT:    add.d $a0, $a1, $a0
+; LA64PIC-NEXT:    ld.wu $a1, $a0, 0
+; LA64PIC-NEXT:    ld.wu $a0, $a0, 20
+; LA64PIC-NEXT:    add.d $a0, $a0, $a1
 ; LA64PIC-NEXT:    ret
   %1 = getelementptr i32, ptr %a, i64 5
   %2 = load i32, ptr %1
@@ -496,8 +480,8 @@ define i64 @ldx_d(ptr %a, i64 %idx) nounwind {
 ; LA32NOPIC:       # %bb.0:
 ; LA32NOPIC-NEXT:    alsl.w $a1, $a1, $a0, 3
 ; LA32NOPIC-NEXT:    ld.w $a2, $a1, 0
-; LA32NOPIC-NEXT:    ld.w $a1, $a1, 4
 ; LA32NOPIC-NEXT:    ld.w $zero, $a0, 0
+; LA32NOPIC-NEXT:    ld.w $a1, $a1, 4
 ; LA32NOPIC-NEXT:    ld.w $zero, $a0, 4
 ; LA32NOPIC-NEXT:    move $a0, $a2
 ; LA32NOPIC-NEXT:    ret
@@ -506,8 +490,8 @@ define i64 @ldx_d(ptr %a, i64 %idx) nounwind {
 ; LA32PIC:       # %bb.0:
 ; LA32PIC-NEXT:    alsl.w $a1, $a1, $a0, 3
 ; LA32PIC-NEXT:    ld.w $a2, $a1, 0
-; LA32PIC-NEXT:    ld.w $a1, $a1, 4
 ; LA32PIC-NEXT:    ld.w $zero, $a0, 0
+; LA32PIC-NEXT:    ld.w $a1, $a1, 4
 ; LA32PIC-NEXT:    ld.w $zero, $a0, 4
 ; LA32PIC-NEXT:    move $a0, $a2
 ; LA32PIC-NEXT:    ret
@@ -665,8 +649,8 @@ define i64 @ldx_wu(ptr %a, i64 %idx) nounwind {
 define void @st_b(ptr %a, i8 %b) nounwind {
 ; ALL-LABEL: st_b:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    st.b $a1, $a0, 0
 ; ALL-NEXT:    st.b $a1, $a0, 6
+; ALL-NEXT:    st.b $a1, $a0, 0
 ; ALL-NEXT:    ret
   store i8 %b, ptr %a
   %1 = getelementptr i8, ptr %a, i64 6
@@ -677,8 +661,8 @@ define void @st_b(ptr %a, i8 %b) nounwind {
 define void @st_h(ptr %a, i16 %b) nounwind {
 ; ALL-LABEL: st_h:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    st.h $a1, $a0, 0
 ; ALL-NEXT:    st.h $a1, $a0, 14
+; ALL-NEXT:    st.h $a1, $a0, 0
 ; ALL-NEXT:    ret
   store i16 %b, ptr %a
   %1 = getelementptr i16, ptr %a, i64 7
@@ -689,8 +673,8 @@ define void @st_h(ptr %a, i16 %b) nounwind {
 define void @st_w(ptr %a, i32 %b) nounwind {
 ; ALL-LABEL: st_w:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    st.w $a1, $a0, 0
 ; ALL-NEXT:    st.w $a1, $a0, 28
+; ALL-NEXT:    st.w $a1, $a0, 0
 ; ALL-NEXT:    ret
   store i32 %b, ptr %a
   %1 = getelementptr i32, ptr %a, i64 7
@@ -701,30 +685,30 @@ define void @st_w(ptr %a, i32 %b) nounwind {
 define void @st_d(ptr %a, i64 %b) nounwind {
 ; LA32NOPIC-LABEL: st_d:
 ; LA32NOPIC:       # %bb.0:
-; LA32NOPIC-NEXT:    st.w $a2, $a0, 4
-; LA32NOPIC-NEXT:    st.w $a1, $a0, 0
 ; LA32NOPIC-NEXT:    st.w $a2, $a0, 68
 ; LA32NOPIC-NEXT:    st.w $a1, $a0, 64
+; LA32NOPIC-NEXT:    st.w $a2, $a0, 4
+; LA32NOPIC-NEXT:    st.w $a1, $a0, 0
 ; LA32NOPIC-NEXT:    ret
 ;
 ; LA32PIC-LABEL: st_d:
 ; LA32PIC:       # %bb.0:
-; LA32PIC-NEXT:    st.w $a2, $a0, 4
-; LA32PIC-NEXT:    st.w $a1, $a0, 0
 ; LA32PIC-NEXT:    st.w $a2, $a0, 68
 ; LA32PIC-NEXT:    st.w $a1, $a0, 64
+; LA32PIC-NEXT:    st.w $a2, $a0, 4
+; LA32PIC-NEXT:    st.w $a1, $a0, 0
 ; LA32PIC-NEXT:    ret
 ;
 ; LA64NOPIC-LABEL: st_d:
 ; LA64NOPIC:       # %bb.0:
-; LA64NOPIC-NEXT:    st.d $a1, $a0, 0
 ; LA64NOPIC-NEXT:    st.d $a1, $a0, 64
+; LA64NOPIC-NEXT:    st.d $a1, $a0, 0
 ; LA64NOPIC-NEXT:    ret
 ;
 ; LA64PIC-LABEL: st_d:
 ; LA64PIC:       # %bb.0:
-; LA64PIC-NEXT:    st.d $a1, $a0, 0
 ; LA64PIC-NEXT:    st.d $a1, $a0, 64
+; LA64PIC-NEXT:    st.d $a1, $a0, 0
 ; LA64PIC-NEXT:    ret
   store i64 %b, ptr %a
   %1 = getelementptr i64, ptr %a, i64 8
@@ -852,40 +836,38 @@ define void @stx_d(ptr %dst, i64 %idx, i64 %val) nounwind {
 define i64 @load_sext_zext_anyext_i1(ptr %a) nounwind {
 ; LA32NOPIC-LABEL: load_sext_zext_anyext_i1:
 ; LA32NOPIC:       # %bb.0:
-; LA32NOPIC-NEXT:    ld.bu $a1, $a0, 1
-; LA32NOPIC-NEXT:    ld.bu $a3, $a0, 2
-; LA32NOPIC-NEXT:    sub.w $a2, $a3, $a1
 ; LA32NOPIC-NEXT:    ld.b $zero, $a0, 0
-; LA32NOPIC-NEXT:    sltu $a0, $a3, $a1
-; LA32NOPIC-NEXT:    sub.w $a1, $zero, $a0
-; LA32NOPIC-NEXT:    move $a0, $a2
+; LA32NOPIC-NEXT:    ld.bu $a1, $a0, 1
+; LA32NOPIC-NEXT:    ld.bu $a2, $a0, 2
+; LA32NOPIC-NEXT:    sub.w $a0, $a2, $a1
+; LA32NOPIC-NEXT:    sltu $a1, $a2, $a1
+; LA32NOPIC-NEXT:    sub.w $a1, $zero, $a1
 ; LA32NOPIC-NEXT:    ret
 ;
 ; LA32PIC-LABEL: load_sext_zext_anyext_i1:
 ; LA32PIC:       # %bb.0:
-; LA32PIC-NEXT:    ld.bu $a1, $a0, 1
-; LA32PIC-NEXT:    ld.bu $a3, $a0, 2
-; LA32PIC-NEXT:    sub.w $a2, $a3, $a1
 ; LA32PIC-NEXT:    ld.b $zero, $a0, 0
-; LA32PIC-NEXT:    sltu $a0, $a3, $a1
-; LA32PIC-NEXT:    sub.w $a1, $zero, $a0
-; LA32PIC-NEXT:    move $a0, $a2
+; LA32PIC-NEXT:    ld.bu $a1, $a0, 1
+; LA32PIC-NEXT:    ld.bu $a2, $a0, 2
+; LA32PIC-NEXT:    sub.w $a0, $a2, $a1
+; LA32PIC-NEXT:    sltu $a1, $a2, $a1
+; LA32PIC-NEXT:    sub.w $a1, $zero, $a1
 ; LA32PIC-NEXT:    ret
 ;
 ; LA64NOPIC-LABEL: load_sext_zext_anyext_i1:
 ; LA64NOPIC:       # %bb.0:
-; LA64NOPIC-NEXT:    ld.bu $a1, $a0, 1
-; LA64NOPIC-NEXT:    ld.bu $a2, $a0, 2
 ; LA64NOPIC-NEXT:    ld.b $zero, $a0, 0
-; LA64NOPIC-NEXT:    sub.d $a0, $a2, $a1
+; LA64NOPIC-NEXT:    ld.bu $a1, $a0, 1
+; LA64NOPIC-NEXT:    ld.bu $a0, $a0, 2
+; LA64NOPIC-NEXT:    sub.d $a0, $a0, $a1
 ; LA64NOPIC-NEXT:    ret
 ;
 ; LA64PIC-LABEL: load_sext_zext_anyext_i1:
 ; LA64PIC:       # %bb.0:
-; LA64PIC-NEXT:    ld.bu $a1, $a0, 1
-; LA64PIC-NEXT:    ld.bu $a2, $a0, 2
 ; LA64PIC-NEXT:    ld.b $zero, $a0, 0
-; LA64PIC-NEXT:    sub.d $a0, $a2, $a1
+; LA64PIC-NEXT:    ld.bu $a1, $a0, 1
+; LA64PIC-NEXT:    ld.bu $a0, $a0, 2
+; LA64PIC-NEXT:    sub.d $a0, $a0, $a1
 ; LA64PIC-NEXT:    ret
   ;; sextload i1
   %1 = getelementptr i1, ptr %a, i64 1
@@ -904,34 +886,34 @@ define i64 @load_sext_zext_anyext_i1(ptr %a) nounwind {
 define i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind {
 ; LA32NOPIC-LABEL: load_sext_zext_anyext_i1_i16:
 ; LA32NOPIC:       # %bb.0:
-; LA32NOPIC-NEXT:    ld.bu $a1, $a0, 1
-; LA32NOPIC-NEXT:    ld.bu $a2, $a0, 2
 ; LA32NOPIC-NEXT:    ld.b $zero, $a0, 0
-; LA32NOPIC-NEXT:    sub.w $a0, $a2, $a1
+; LA32NOPIC-NEXT:    ld.bu $a1, $a0, 1
+; LA32NOPIC-NEXT:    ld.bu $a0, $a0, 2
+; LA32NOPIC-NEXT:    sub.w $a0, $a0, $a1
 ; LA32NOPIC-NEXT:    ret
 ;
 ; LA32PIC-LABEL: load_sext_zext_anyext_i1_i16:
 ; LA32PIC:       # %bb.0:
-; LA32PIC-NEXT:    ld.bu $a1, $a0, 1
-; LA32PIC-NEXT:    ld.bu $a2, $a0, 2
 ; LA32PIC-NEXT:    ld.b $zero, $a0, 0
-; LA32PIC-NEXT:    sub.w $a0, $a2, $a1
+; LA32PIC-NEXT:    ld.bu $a1, $a0, 1
+; LA32PIC-NEXT:    ld.bu $a0, $a0, 2
+; LA32PIC-NEXT:    sub.w $a0, $a0, $a1
 ; LA32PIC-NEXT:    ret
 ;
 ; LA64NOPIC-LABEL: load_sext_zext_anyext_i1_i16:
 ; LA64NOPIC:       # %bb.0:
-; LA64NOPIC-NEXT:    ld.bu $a1, $a0, 1
-; LA64NOPIC-NEXT:    ld.bu $a2, $a0, 2
 ; LA64NOPIC-NEXT:    ld.b $zero, $a0, 0
-; LA64NOPIC-NEXT:    sub.d $a0, $a2, $a1
+; LA64NOPIC-NEXT:    ld.bu $a1, $a0, 1
+; LA64NOPIC-NEXT:    ld.bu $a0, $a0, 2
+; LA64NOPIC-NEXT:    sub.d $a0, $a0, $a1
 ; LA64NOPIC-NEXT:    ret
 ;
 ; LA64PIC-LABEL: load_sext_zext_anyext_i1_i16:
 ; LA64PIC:       # %bb.0:
-; LA64PIC-NEXT:    ld.bu $a1, $a0, 1
-; LA64PIC-NEXT:    ld.bu $a2, $a0, 2
 ; LA64PIC-NEXT:    ld.b $zero, $a0, 0
-; LA64PIC-NEXT:    sub.d $a0, $a2, $a1
+; LA64PIC-NEXT:    ld.bu $a1, $a0, 1
+; LA64PIC-NEXT:    ld.bu $a0, $a0, 2
+; LA64PIC-NEXT:    sub.d $a0, $a0, $a1
 ; LA64PIC-NEXT:    ret
   ;; sextload i1
   %1 = getelementptr i1, ptr %a, i64 1
@@ -953,10 +935,10 @@ define i64 @ld_sd_constant(i64 %a) nounwind {
 ; LA32NOPIC-NEXT:    lu12i.w $a3, -136485
 ; LA32NOPIC-NEXT:    ori $a4, $a3, 3823
 ; LA32NOPIC-NEXT:    ld.w $a2, $a4, 0
-; LA32NOPIC-NEXT:    ori $a5, $a3, 3827
-; LA32NOPIC-NEXT:    ld.w $a3, $a5, 0
 ; LA32NOPIC-NEXT:    st.w $a0, $a4, 0
-; LA32NOPIC-NEXT:    st.w $a1, $a5, 0
+; LA32NOPIC-NEXT:    ori $a0, $a3, 3827
+; LA32NOPIC-NEXT:    ld.w $a3, $a0, 0
+; LA32NOPIC-NEXT:    st.w $a1, $a0, 0
 ; LA32NOPIC-NEXT:    move $a0, $a2
 ; LA32NOPIC-NEXT:    move $a1, $a3
 ; LA32NOPIC-NEXT:    ret
@@ -966,10 +948,10 @@ define i64 @ld_sd_constant(i64 %a) nounwind {
 ; LA32PIC-NEXT:    lu12i.w $a3, -136485
 ; LA32PIC-NEXT:    ori $a4, $a3, 3823
 ; LA32PIC-NEXT:    ld.w $a2, $a4, 0
-; LA32PIC-NEXT:    ori $a5, $a3, 3827
-; LA32PIC-NEXT:    ld.w $a3, $a5, 0
 ; LA32PIC-NEXT:    st.w $a0, $a4, 0
-; LA32PIC-NEXT:    st.w $a1, $a5, 0
+; LA32PIC-NEXT:    ori $a0, $a3, 3827
+; LA32PIC-NEXT:    ld.w $a3, $a0, 0
+; LA32PIC-NEXT:    st.w $a1, $a0, 0
 ; LA32PIC-NEXT:    move $a0, $a2
 ; LA32PIC-NEXT:    move $a1, $a3
 ; LA32PIC-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll
index 58cc0e7d6484a..d074ba8affc8c 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll
@@ -160,44 +160,44 @@ define i32 @mulh_wu(i32 %a, i32 %b) {
 define i64 @mulh_d(i64 %a, i64 %b) {
 ; LA32-LABEL: mulh_d:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srai.w $a5, $a1, 31
-; LA32-NEXT:    srai.w $a6, $a3, 31
 ; LA32-NEXT:    mulh.wu $a4, $a0, $a2
-; LA32-NEXT:    mul.w $a7, $a1, $a2
-; LA32-NEXT:    add.w $a4, $a7, $a4
-; LA32-NEXT:    sltu $a7, $a4, $a7
-; LA32-NEXT:    mulh.wu $t0, $a1, $a2
-; LA32-NEXT:    add.w $a7, $t0, $a7
-; LA32-NEXT:    mul.w $t0, $a0, $a3
-; LA32-NEXT:    add.w $a4, $t0, $a4
-; LA32-NEXT:    sltu $a4, $a4, $t0
-; LA32-NEXT:    mulh.wu $t0, $a0, $a3
-; LA32-NEXT:    add.w $a4, $t0, $a4
-; LA32-NEXT:    add.w $t0, $a7, $a4
-; LA32-NEXT:    mul.w $t1, $a1, $a3
-; LA32-NEXT:    add.w $t2, $t1, $t0
-; LA32-NEXT:    mul.w $t3, $a2, $a5
-; LA32-NEXT:    mul.w $t4, $a6, $a0
-; LA32-NEXT:    add.w $t5, $t4, $t3
-; LA32-NEXT:    add.w $a4, $t2, $t5
-; LA32-NEXT:    sltu $t6, $a4, $t2
-; LA32-NEXT:    sltu $t1, $t2, $t1
+; LA32-NEXT:    mul.w $a5, $a1, $a2
+; LA32-NEXT:    add.w $a4, $a5, $a4
+; LA32-NEXT:    sltu $a5, $a4, $a5
+; LA32-NEXT:    mulh.wu $a6, $a1, $a2
+; LA32-NEXT:    add.w $a5, $a6, $a5
+; LA32-NEXT:    mul.w $a6, $a0, $a3
+; LA32-NEXT:    add.w $a4, $a6, $a4
+; LA32-NEXT:    sltu $a4, $a4, $a6
+; LA32-NEXT:    mulh.wu $a6, $a0, $a3
+; LA32-NEXT:    add.w $a4, $a6, $a4
+; LA32-NEXT:    add.w $a6, $a5, $a4
+; LA32-NEXT:    mul.w $a7, $a1, $a3
+; LA32-NEXT:    add.w $t0, $a7, $a6
+; LA32-NEXT:    srai.w $t1, $a1, 31
+; LA32-NEXT:    mul.w $t2, $a2, $t1
+; LA32-NEXT:    srai.w $t3, $a3, 31
+; LA32-NEXT:    mul.w $t4, $t3, $a0
+; LA32-NEXT:    add.w $t5, $t4, $t2
+; LA32-NEXT:    add.w $a4, $t0, $t5
+; LA32-NEXT:    sltu $t6, $a4, $t0
 ; LA32-NEXT:    sltu $a7, $t0, $a7
-; LA32-NEXT:    mulh.wu $t0, $a1, $a3
-; LA32-NEXT:    add.w $a7, $t0, $a7
-; LA32-NEXT:    add.w $a7, $a7, $t1
-; LA32-NEXT:    mulh.wu $a2, $a2, $a5
-; LA32-NEXT:    add.w $a2, $a2, $t3
-; LA32-NEXT:    mul.w $a3, $a3, $a5
+; LA32-NEXT:    sltu $a5, $a6, $a5
+; LA32-NEXT:    mulh.wu $a6, $a1, $a3
+; LA32-NEXT:    add.w $a5, $a6, $a5
+; LA32-NEXT:    add.w $a5, $a5, $a7
+; LA32-NEXT:    mulh.wu $a2, $a2, $t1
+; LA32-NEXT:    add.w $a2, $a2, $t2
+; LA32-NEXT:    mul.w $a3, $a3, $t1
 ; LA32-NEXT:    add.w $a2, $a2, $a3
-; LA32-NEXT:    mul.w $a1, $a6, $a1
-; LA32-NEXT:    mulh.wu $a0, $a6, $a0
+; LA32-NEXT:    mul.w $a1, $t3, $a1
+; LA32-NEXT:    mulh.wu $a0, $t3, $a0
 ; LA32-NEXT:    add.w $a0, $a0, $a1
 ; LA32-NEXT:    add.w $a0, $a0, $t4
 ; LA32-NEXT:    add.w $a0, $a0, $a2
 ; LA32-NEXT:    sltu $a1, $t5, $t4
 ; LA32-NEXT:    add.w $a0, $a0, $a1
-; LA32-NEXT:    add.w $a0, $a7, $a0
+; LA32-NEXT:    add.w $a0, $a5, $a0
 ; LA32-NEXT:    add.w $a1, $a0, $t6
 ; LA32-NEXT:    move $a0, $a4
 ; LA32-NEXT:    ret
@@ -1495,15 +1495,15 @@ define i64 @mul_i64_65280_twice(i64 %a, i64 %b) {
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    lu12i.w $a4, 15
 ; LA32-NEXT:    ori $a4, $a4, 3840
-; LA32-NEXT:    mul.w $a1, $a1, $a4
-; LA32-NEXT:    mulh.wu $a5, $a0, $a4
-; LA32-NEXT:    add.w $a1, $a5, $a1
-; LA32-NEXT:    mul.w $a0, $a0, $a4
 ; LA32-NEXT:    mul.w $a3, $a3, $a4
 ; LA32-NEXT:    mulh.wu $a5, $a2, $a4
 ; LA32-NEXT:    add.w $a3, $a5, $a3
-; LA32-NEXT:    mul.w $a2, $a2, $a4
+; LA32-NEXT:    mul.w $a1, $a1, $a4
+; LA32-NEXT:    mulh.wu $a5, $a0, $a4
+; LA32-NEXT:    add.w $a1, $a5, $a1
 ; LA32-NEXT:    xor $a1, $a1, $a3
+; LA32-NEXT:    mul.w $a2, $a2, $a4
+; LA32-NEXT:    mul.w $a0, $a0, $a4
 ; LA32-NEXT:    xor $a0, $a0, $a2
 ; LA32-NEXT:    ret
 ;
@@ -1511,8 +1511,8 @@ define i64 @mul_i64_65280_twice(i64 %a, i64 %b) {
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    lu12i.w $a2, 15
 ; LA64-NEXT:    ori $a2, $a2, 3840
-; LA64-NEXT:    mul.d $a0, $a0, $a2
 ; LA64-NEXT:    mul.d $a1, $a1, $a2
+; LA64-NEXT:    mul.d $a0, $a0, $a2
 ; LA64-NEXT:    xor $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %c = mul i64 %a, 65280
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
index 3a0fd0758cb32..e5dcf8e264ffc 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -237,28 +237,28 @@ define void @buildvector_v32i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a5, 4
 ; CHECK-NEXT:    ld.b $a1, $sp, 0
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a6, 5
-; CHECK-NEXT:    ld.b $a2, $sp, 8
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a7, 6
-; CHECK-NEXT:    ld.b $a3, $sp, 16
+; CHECK-NEXT:    ld.b $a2, $sp, 8
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 7
-; CHECK-NEXT:    ld.b $a1, $sp, 24
+; CHECK-NEXT:    ld.b $a1, $sp, 16
+; CHECK-NEXT:    ld.b $a3, $sp, 24
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a2, 8
 ; CHECK-NEXT:    ld.b $a2, $sp, 32
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 9
-; CHECK-NEXT:    ld.b $a3, $sp, 40
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 10
-; CHECK-NEXT:    ld.b $a1, $sp, 48
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 9
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 10
+; CHECK-NEXT:    ld.b $a1, $sp, 40
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a2, 11
-; CHECK-NEXT:    ld.b $a2, $sp, 56
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 12
-; CHECK-NEXT:    ld.b $a3, $sp, 64
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 13
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a2, 14
-; CHECK-NEXT:    ld.b $a1, $sp, 72
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 15
+; CHECK-NEXT:    ld.b $a2, $sp, 48
+; CHECK-NEXT:    ld.b $a3, $sp, 56
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 12
+; CHECK-NEXT:    ld.b $a1, $sp, 64
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a2, 13
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 14
+; CHECK-NEXT:    ld.b $a2, $sp, 72
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 15
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 0
+; CHECK-NEXT:    vinsgr2vr.b $vr1, $a2, 0
 ; CHECK-NEXT:    ld.b $a1, $sp, 80
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
@@ -377,50 +377,50 @@ entry:
 define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind {
 ; CHECK-LABEL: buildvector_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    ld.h $t0, $sp, 8
-; CHECK-NEXT:    ld.h $t1, $sp, 0
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 0
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a2, 1
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a3, 2
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a4, 3
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a5, 4
+; CHECK-NEXT:    ld.h $a1, $sp, 0
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a6, 5
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a7, 6
-; CHECK-NEXT:    vinsgr2vr.h $vr0, $t1, 7
-; CHECK-NEXT:    ld.h $a1, $sp, 16
+; CHECK-NEXT:    ld.h $a2, $sp, 8
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 7
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $t0, 0
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a2, 0
+; CHECK-NEXT:    ld.h $a1, $sp, 16
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a2, $sp, 24
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 1
+; CHECK-NEXT:    ld.h $a1, $sp, 24
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a1, $sp, 32
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $a2, 2
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 2
+; CHECK-NEXT:    ld.h $a1, $sp, 32
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a2, $sp, 40
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 3
+; CHECK-NEXT:    ld.h $a1, $sp, 40
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a1, $sp, 48
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $a2, 4
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 4
+; CHECK-NEXT:    ld.h $a1, $sp, 48
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a2, $sp, 56
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
 ; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 5
+; CHECK-NEXT:    ld.h $a1, $sp, 56
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a1, $sp, 64
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $a2, 6
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 6
+; CHECK-NEXT:    ld.h $a1, $sp, 64
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
 ; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
@@ -452,15 +452,15 @@ entry:
 define void @buildvector_v8i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind {
 ; CHECK-LABEL: buildvector_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    ld.w $t0, $sp, 0
 ; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a1, 0
 ; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a2, 1
 ; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a3, 2
 ; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a4, 3
+; CHECK-NEXT:    ld.w $a1, $sp, 0
 ; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a5, 4
 ; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a6, 5
 ; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a7, 6
-; CHECK-NEXT:    xvinsgr2vr.w $xr0, $t0, 7
+; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a1, 7
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll
index 0f3df3d573b65..d10b3050589a6 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll
@@ -9,29 +9,29 @@
 define void @xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvfadd.d $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvfadd.d $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -48,29 +48,29 @@ entry:
 define void @xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -87,19 +87,19 @@ entry:
 define void @xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvfadd.d $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvbitrevi.d $xr0, $xr0, 63
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
@@ -107,10 +107,10 @@ define void @xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ;
 ; CONTRACT-OFF-LABEL: xvfnmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvfadd.d $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr0, $xr0, 63
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
@@ -129,10 +129,10 @@ entry:
 define void @xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmadd_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
@@ -173,11 +173,11 @@ entry:
 define void @not_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_xvfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr0, $xr0, 63
-; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr1, $xr1, 63
+; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
@@ -217,19 +217,19 @@ entry:
 define void @xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvbitrevi.d $xr0, $xr0, 63
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
@@ -237,10 +237,10 @@ define void @xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ;
 ; CONTRACT-OFF-LABEL: xvfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr0, $xr0, 63
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
@@ -260,29 +260,29 @@ entry:
 define void @xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmsub_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmsub_d_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr2, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmsub_d_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr2, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -301,30 +301,30 @@ entry:
 define void @not_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_xvfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr0, $xr0, 63
-; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr1, $xr1, 63
+; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_xvfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfmul.d $xr0, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvfsub.d $xr0, $xr2, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_xvfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfmul.d $xr0, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvfsub.d $xr0, $xr2, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -342,28 +342,28 @@ entry:
 define void @contract_xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -379,28 +379,28 @@ entry:
 define void @contract_xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -416,28 +416,28 @@ entry:
 define void @contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -454,28 +454,28 @@ entry:
 define void @contract_xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmadd_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmadd_d_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmadd_d_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -494,31 +494,31 @@ entry:
 define void @not_contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr0, $xr0, 63
-; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr1, $xr1, 63
+; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_xvfnmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvbitrevi.d $xr0, $xr0, 63
-; CONTRACT-ON-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-ON-NEXT:    xvbitrevi.d $xr1, $xr1, 63
+; CONTRACT-ON-NEXT:    xvfmsub.d $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr0, $xr0, 63
-; CONTRACT-OFF-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr1, $xr1, 63
+; CONTRACT-OFF-NEXT:    xvfmsub.d $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -536,28 +536,28 @@ entry:
 define void @contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -575,28 +575,28 @@ entry:
 define void @contract_xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmsub_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmsub_d_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmsub_d_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -614,31 +614,31 @@ entry:
 define void @not_contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr0, $xr0, 63
-; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.d $xr1, $xr1, 63
+; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_xvfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvbitrevi.d $xr0, $xr0, 63
-; CONTRACT-ON-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-ON-NEXT:    xvbitrevi.d $xr1, $xr1, 63
+; CONTRACT-ON-NEXT:    xvfmadd.d $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr0, $xr0, 63
-; CONTRACT-OFF-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-OFF-NEXT:    xvbitrevi.d $xr1, $xr1, 63
+; CONTRACT-OFF-NEXT:    xvfmadd.d $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -655,28 +655,28 @@ entry:
 define void @xvfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmadd_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmadd_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmadd_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -692,28 +692,28 @@ entry:
 define void @xvfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmsub_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmsub_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmsub_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -729,28 +729,28 @@ entry:
 define void @xvfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmadd_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmadd_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmadd_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfnmadd.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfnmadd.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -767,28 +767,28 @@ entry:
 define void @xvfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmsub_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmsub_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmsub_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfnmsub.d $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfnmsub.d $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll
index 6fd14d93a751e..20e66c1b6fb91 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll
@@ -9,29 +9,29 @@
 define void @xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvfadd.s $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvfadd.s $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -48,29 +48,29 @@ entry:
 define void @xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -87,19 +87,19 @@ entry:
 define void @xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvfadd.s $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvbitrevi.w $xr0, $xr0, 31
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
@@ -107,10 +107,10 @@ define void @xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ;
 ; CONTRACT-OFF-LABEL: xvfnmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvfadd.s $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr0, $xr0, 31
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
@@ -129,10 +129,10 @@ entry:
 define void @xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmadd_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
@@ -173,11 +173,11 @@ entry:
 define void @not_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_xvfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr0, $xr0, 31
-; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr1, $xr1, 31
+; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
@@ -217,19 +217,19 @@ entry:
 define void @xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr0, $xr2
 ; CONTRACT-ON-NEXT:    xvbitrevi.w $xr0, $xr0, 31
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
@@ -237,10 +237,10 @@ define void @xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ;
 ; CONTRACT-OFF-LABEL: xvfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr0, $xr2
 ; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr0, $xr0, 31
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
@@ -260,29 +260,29 @@ entry:
 define void @xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmsub_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmsub_s_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr2, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmsub_s_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr2, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -301,30 +301,30 @@ entry:
 define void @not_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_xvfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr0, $xr0, 31
-; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr1, $xr1, 31
+; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_xvfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-ON-NEXT:    xvfmul.s $xr0, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvfsub.s $xr0, $xr2, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_xvfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a2, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CONTRACT-OFF-NEXT:    xvfmul.s $xr0, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvfsub.s $xr0, $xr2, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -342,28 +342,28 @@ entry:
 define void @contract_xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -379,28 +379,28 @@ entry:
 define void @contract_xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -416,28 +416,28 @@ entry:
 define void @contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -454,28 +454,28 @@ entry:
 define void @contract_xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmadd_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmadd_s_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmadd_s_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -494,31 +494,31 @@ entry:
 define void @not_contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr0, $xr0, 31
-; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr1, $xr1, 31
+; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_xvfnmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvbitrevi.w $xr0, $xr0, 31
-; CONTRACT-ON-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-ON-NEXT:    xvbitrevi.w $xr1, $xr1, 31
+; CONTRACT-ON-NEXT:    xvfmsub.s $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr0, $xr0, 31
-; CONTRACT-OFF-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr1, $xr1, 31
+; CONTRACT-OFF-NEXT:    xvfmsub.s $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -536,28 +536,28 @@ entry:
 define void @contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -575,28 +575,28 @@ entry:
 define void @contract_xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_xvfnmsub_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_xvfnmsub_s_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_xvfnmsub_s_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -614,31 +614,31 @@ entry:
 define void @not_contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr0, $xr0, 31
-; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-FAST-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-FAST-NEXT:    xvbitrevi.w $xr1, $xr1, 31
+; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_xvfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvbitrevi.w $xr0, $xr0, 31
-; CONTRACT-ON-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-ON-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-ON-NEXT:    xvbitrevi.w $xr1, $xr1, 31
+; CONTRACT-ON-NEXT:    xvfmadd.s $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
-; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr0, $xr0, 31
-; CONTRACT-OFF-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
+; CONTRACT-OFF-NEXT:    xvld $xr1, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a2, 0
+; CONTRACT-OFF-NEXT:    xvbitrevi.w $xr1, $xr1, 31
+; CONTRACT-OFF-NEXT:    xvfmadd.s $xr0, $xr1, $xr2, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -655,28 +655,28 @@ entry:
 define void @xvfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmadd_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmadd_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmadd_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -692,28 +692,28 @@ entry:
 define void @xvfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfmsub_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfmsub_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfmsub_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -729,28 +729,28 @@ entry:
 define void @xvfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmadd_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmadd_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmadd_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfnmadd.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfnmadd.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -767,28 +767,28 @@ entry:
 define void @xvfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: xvfnmsub_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-FAST-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-FAST-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-FAST-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-FAST-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-FAST-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: xvfnmsub_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-ON-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-ON-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-ON-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-ON-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-ON-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-ON-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-ON-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: xvfnmsub_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    xvld $xr0, $a1, 0
+; CONTRACT-OFF-NEXT:    xvld $xr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    xvld $xr1, $a2, 0
-; CONTRACT-OFF-NEXT:    xvld $xr2, $a3, 0
-; CONTRACT-OFF-NEXT:    xvfnmsub.s $xr0, $xr0, $xr1, $xr2
+; CONTRACT-OFF-NEXT:    xvld $xr2, $a1, 0
+; CONTRACT-OFF-NEXT:    xvfnmsub.s $xr0, $xr2, $xr1, $xr0
 ; CONTRACT-OFF-NEXT:    xvst $xr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll
index 136f34bafb32a..8e4d0dc6f1c38 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll
@@ -4,9 +4,9 @@
 define void @add_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvadd.b $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvadd.b $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @add_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvadd.h $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvadd.h $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @add_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvadd.w $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvadd.w $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @add_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvadd.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvadd.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll
index b06d1bea4ef62..98c87cadeeb5a 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll
@@ -4,9 +4,9 @@
 define void @and_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvand.v $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @and_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvand.v $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @and_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvand.v $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @and_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvand.v $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll
index 4dd2cee7a2ed5..fcbf0f1400fe6 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll
@@ -4,9 +4,9 @@
 define void @ashr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsra.b $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsra.b $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @ashr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsra.h $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsra.h $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @ashr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsra.w $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsra.w $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @ashr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsra.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsra.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll
index b3eb328e8d446..365bb305fc5aa 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll
@@ -4,9 +4,9 @@
 define void @fadd_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fadd_v8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfadd.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfadd.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fadd_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fadd_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfadd.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfadd.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll
index 4f56dd29c1b25..ef67dbc100c04 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll
@@ -35,9 +35,9 @@ define void @v4f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_oeq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.ceq.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.ceq.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -51,9 +51,9 @@ define void @v8f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_oeq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.ceq.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.ceq.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -68,9 +68,9 @@ define void @v4f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_ueq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cueq.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cueq.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -84,9 +84,9 @@ define void @v8f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_ueq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cueq.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cueq.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -101,9 +101,9 @@ define void @v4f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.ceq.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.ceq.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -117,9 +117,9 @@ define void @v8f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.ceq.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.ceq.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -134,9 +134,9 @@ define void @v4f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_ole:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cle.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cle.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -150,9 +150,9 @@ define void @v8f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_ole:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cle.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cle.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -167,9 +167,9 @@ define void @v4f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cule.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cule.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -183,9 +183,9 @@ define void @v8f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cule.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cule.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -200,9 +200,9 @@ define void @v4f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_le:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cle.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cle.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -216,9 +216,9 @@ define void @v8f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_le:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cle.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cle.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -233,9 +233,9 @@ define void @v4f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_olt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.clt.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.clt.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -249,9 +249,9 @@ define void @v8f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_olt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.clt.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.clt.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -266,9 +266,9 @@ define void @v4f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cult.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cult.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -282,9 +282,9 @@ define void @v8f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cult.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cult.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -299,9 +299,9 @@ define void @v4f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_lt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.clt.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.clt.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -315,9 +315,9 @@ define void @v8f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_lt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.clt.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.clt.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -332,9 +332,9 @@ define void @v4f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_one:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cne.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cne.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -348,9 +348,9 @@ define void @v8f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_one:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cne.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cne.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -365,9 +365,9 @@ define void @v4f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_une:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cune.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cune.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -381,9 +381,9 @@ define void @v8f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_une:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cune.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cune.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -398,9 +398,9 @@ define void @v4f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cne.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cne.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -414,9 +414,9 @@ define void @v8f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cne.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cne.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -431,9 +431,9 @@ define void @v4f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_ord:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cor.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cor.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -447,9 +447,9 @@ define void @v8f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_ord:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cor.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cor.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
@@ -464,9 +464,9 @@ define void @v4f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8f32_fcmp_uno:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cun.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cun.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x float>, ptr %a0
@@ -480,9 +480,9 @@ define void @v8f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f64_fcmp_uno:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfcmp.cun.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfcmp.cun.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %a0
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll
index 63d8c222ae54f..6004565b0b784 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll
@@ -4,9 +4,9 @@
 define void @fdiv_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fdiv_v8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfdiv.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfdiv.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fdiv_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fdiv_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfdiv.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfdiv.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll
index f777151cdb0ac..a48dca8d28470 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll
@@ -4,9 +4,9 @@
 define void @fmul_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fmul_v8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfmul.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfmul.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fmul_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fmul_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfmul.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfmul.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll
index 201ba5f5df66f..6164aa5a55c7e 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll
@@ -4,9 +4,9 @@
 define void @fsub_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fsub_v8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfsub.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfsub.s $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fsub_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fsub_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvfsub.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvfsub.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll
index d15c4133855f4..6693fe0f6ec7c 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll
@@ -19,9 +19,9 @@ define void @v32i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v32i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v32i8_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvseq.b $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvseq.b $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <32 x i8>, ptr %a0
@@ -49,9 +49,9 @@ define void @v16i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i16_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvseq.h $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvseq.h $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i16>, ptr %a0
@@ -79,9 +79,9 @@ define void @v8i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i32_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvseq.w $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvseq.w $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i32>, ptr %a0
@@ -109,9 +109,9 @@ define void @v4i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i64_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvseq.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvseq.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i64>, ptr %a0
@@ -140,9 +140,9 @@ define void @v32i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v32i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v32i8_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsle.b $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsle.b $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <32 x i8>, ptr %a0
@@ -170,9 +170,9 @@ define void @v16i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i16_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsle.h $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsle.h $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i16>, ptr %a0
@@ -200,9 +200,9 @@ define void @v8i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i32_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsle.w $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsle.w $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i32>, ptr %a0
@@ -230,9 +230,9 @@ define void @v4i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i64_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsle.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsle.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i64>, ptr %a0
@@ -261,9 +261,9 @@ define void @v32i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v32i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v32i8_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsle.bu $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsle.bu $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <32 x i8>, ptr %a0
@@ -291,9 +291,9 @@ define void @v16i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i16_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsle.hu $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsle.hu $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i16>, ptr %a0
@@ -321,9 +321,9 @@ define void @v8i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i32_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsle.wu $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsle.wu $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i32>, ptr %a0
@@ -351,9 +351,9 @@ define void @v4i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i64_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsle.du $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsle.du $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i64>, ptr %a0
@@ -382,9 +382,9 @@ define void @v32i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v32i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v32i8_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvslt.b $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvslt.b $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <32 x i8>, ptr %a0
@@ -412,9 +412,9 @@ define void @v16i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i16_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvslt.h $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvslt.h $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i16>, ptr %a0
@@ -442,9 +442,9 @@ define void @v8i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i32_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvslt.w $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvslt.w $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i32>, ptr %a0
@@ -472,9 +472,9 @@ define void @v4i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i64_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvslt.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvslt.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i64>, ptr %a0
@@ -503,9 +503,9 @@ define void @v32i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v32i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v32i8_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvslt.bu $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvslt.bu $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <32 x i8>, ptr %a0
@@ -533,9 +533,9 @@ define void @v16i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i16_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvslt.hu $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvslt.hu $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i16>, ptr %a0
@@ -563,9 +563,9 @@ define void @v8i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i32_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvslt.wu $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvslt.wu $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i32>, ptr %a0
@@ -593,9 +593,9 @@ define void @v4i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i64_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvslt.du $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvslt.du $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i64>, ptr %a0
@@ -610,9 +610,9 @@ define void @v4i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v32i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v32i8_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvseq.b $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvseq.b $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvxori.b $xr0, $xr0, 255
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
@@ -627,9 +627,9 @@ define void @v32i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v16i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i16_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvseq.h $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvseq.h $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvrepli.b $xr1, -1
 ; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
@@ -645,9 +645,9 @@ define void @v16i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i32_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvseq.w $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvseq.w $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvrepli.b $xr1, -1
 ; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
@@ -663,9 +663,9 @@ define void @v8i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i64_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvseq.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvseq.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvrepli.b $xr1, -1
 ; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll
index 5b992b5e38de5..24be69d8032a8 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll
@@ -4,9 +4,9 @@
 define void @lshr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsrl.b $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsrl.b $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @lshr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsrl.h $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsrl.h $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @lshr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsrl.w $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsrl.w $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @lshr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsrl.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsrl.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll
index 4745e7003cb1c..dcb893caa2555 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll
@@ -4,9 +4,9 @@
 define void @mul_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mul_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvmul.b $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvmul.b $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @mul_v16i16(ptr %res, ptr %a0, ptr %a1)  nounwind {
 ; CHECK-LABEL: mul_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvmul.h $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvmul.h $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @mul_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mul_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvmul.w $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvmul.w $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @mul_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mul_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvmul.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvmul.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll
index f32b8897bebce..f37cbf1cefedc 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll
@@ -4,9 +4,9 @@
 define void @or_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvor.v $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @or_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvor.v $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @or_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvor.v $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @or_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvor.v $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll
index 879caa5a6700d..e3635a5f14a2b 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll
@@ -4,9 +4,9 @@
 define void @sdiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvdiv.b $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvdiv.b $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @sdiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvdiv.h $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvdiv.h $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @sdiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvdiv.w $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvdiv.w $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @sdiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvdiv.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvdiv.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll
index 56c69171c9d44..8a02c7e3ac975 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll
@@ -4,9 +4,9 @@
 define void @shl_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsll.b $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsll.b $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @shl_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsll.h $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsll.h $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @shl_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsll.w $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsll.w $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @shl_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsll.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsll.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll
index 5102abac83d80..bcfff16514770 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll
@@ -4,9 +4,9 @@
 define void @sub_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsub.b $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsub.b $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @sub_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsub.h $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsub.h $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @sub_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsub.w $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsub.w $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @sub_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvsub.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvsub.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll
index 43f558f3cdf37..e78084c7186d3 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll
@@ -4,9 +4,9 @@
 define void @udiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvdiv.bu $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvdiv.bu $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @udiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvdiv.hu $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvdiv.hu $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @udiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvdiv.wu $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvdiv.wu $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @udiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvdiv.du $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvdiv.du $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll
index e062e10b21d9f..c2fb1462b7a25 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll
@@ -4,9 +4,9 @@
 define void @xor_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvxor.v $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @xor_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvxor.v $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @xor_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvxor.v $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @xor_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvxor.v $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/mulh.ll b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll
index db3cc7f38774d..aac711a4a371c 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/mulh.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll
@@ -4,9 +4,9 @@
 define void @mulhs_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvmuh.b $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvmuh.b $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -24,9 +24,9 @@ entry:
 define void @mulhu_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v32i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvmuh.bu $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvmuh.bu $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -44,9 +44,9 @@ entry:
 define void @mulhs_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvmuh.h $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvmuh.h $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -64,9 +64,9 @@ entry:
 define void @mulhu_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvmuh.hu $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvmuh.hu $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -84,9 +84,9 @@ entry:
 define void @mulhs_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvmuh.w $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvmuh.w $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -104,9 +104,9 @@ entry:
 define void @mulhu_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v8i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvmuh.wu $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvmuh.wu $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -124,9 +124,9 @@ entry:
 define void @mulhs_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvmuh.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvmuh.d $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -144,9 +144,9 @@ entry:
 define void @mulhu_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v4i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    xvmuh.du $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvld $xr0, $a2, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvmuh.du $xr0, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
index 7b4c7ced4b5f3..609e7ce3a6f2a 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
@@ -69,12 +69,12 @@ define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: select_v4i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT:    addi.d $a1, $a1, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT:    xvld $xr2, $a1, 0
-; CHECK-NEXT:    xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT:    pcalau12i $a3, %pc_hi20(.LCPI4_0)
+; CHECK-NEXT:    addi.d $a3, $a3, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT:    xvld $xr0, $a3, 0
+; CHECK-NEXT:    xvld $xr1, $a1, 0
+; CHECK-NEXT:    xvld $xr2, $a2, 0
+; CHECK-NEXT:    xvbitsel.v $xr0, $xr2, $xr1, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i64>, ptr %a0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
index c04d7ca889f7e..84adb5f6b2ccd 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
@@ -230,31 +230,31 @@ entry:
 define void @buildvector_v16i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
 ; CHECK-LABEL: buildvector_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    ld.b $t0, $sp, 64
-; CHECK-NEXT:    ld.b $t1, $sp, 56
-; CHECK-NEXT:    ld.b $t2, $sp, 48
-; CHECK-NEXT:    ld.b $t3, $sp, 40
-; CHECK-NEXT:    ld.b $t4, $sp, 32
-; CHECK-NEXT:    ld.b $t5, $sp, 24
-; CHECK-NEXT:    ld.b $t6, $sp, 16
-; CHECK-NEXT:    ld.b $t7, $sp, 8
-; CHECK-NEXT:    ld.b $t8, $sp, 0
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 0
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a2, 1
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 2
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a4, 3
+; CHECK-NEXT:    ld.b $a1, $sp, 0
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a5, 4
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a6, 5
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a7, 6
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $t8, 7
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $t7, 8
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $t6, 9
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $t5, 10
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $t4, 11
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $t3, 12
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $t2, 13
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $t1, 14
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $t0, 15
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 7
+; CHECK-NEXT:    ld.b $a1, $sp, 8
+; CHECK-NEXT:    ld.b $a2, $sp, 16
+; CHECK-NEXT:    ld.b $a3, $sp, 24
+; CHECK-NEXT:    ld.b $a4, $sp, 32
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 8
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a2, 9
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 10
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a4, 11
+; CHECK-NEXT:    ld.b $a1, $sp, 40
+; CHECK-NEXT:    ld.b $a2, $sp, 48
+; CHECK-NEXT:    ld.b $a3, $sp, 56
+; CHECK-NEXT:    ld.b $a4, $sp, 64
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 12
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a2, 13
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a4, 15
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -281,15 +281,15 @@ entry:
 define void @buildvector_v8i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
 ; CHECK-LABEL: buildvector_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    ld.h $t0, $sp, 0
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 0
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a2, 1
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a3, 2
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a4, 3
+; CHECK-NEXT:    ld.h $a1, $sp, 0
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a5, 4
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a6, 5
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a7, 6
-; CHECK-NEXT:    vinsgr2vr.h $vr0, $t0, 7
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 7
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll
index c83c563952d4f..29ff13cc9e3b0 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll
@@ -9,29 +9,29 @@
 define void @vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vfadd.d $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vfadd.d $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -48,29 +48,29 @@ entry:
 define void @vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -87,19 +87,19 @@ entry:
 define void @vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vfadd.d $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vbitrevi.d $vr0, $vr0, 63
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
@@ -107,10 +107,10 @@ define void @vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ;
 ; CONTRACT-OFF-LABEL: vfnmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vfadd.d $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vbitrevi.d $vr0, $vr0, 63
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
@@ -129,10 +129,10 @@ entry:
 define void @vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmadd_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
@@ -173,11 +173,11 @@ entry:
 define void @not_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_vfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.d $vr0, $vr0, 63
-; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.d $vr1, $vr1, 63
+; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
@@ -217,19 +217,19 @@ entry:
 define void @vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vbitrevi.d $vr0, $vr0, 63
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
@@ -237,10 +237,10 @@ define void @vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ;
 ; CONTRACT-OFF-LABEL: vfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vbitrevi.d $vr0, $vr0, 63
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
@@ -260,29 +260,29 @@ entry:
 define void @vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmsub_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmsub_d_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr2, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmsub_d_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr2, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -301,30 +301,30 @@ entry:
 define void @not_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_vfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.d $vr0, $vr0, 63
-; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.d $vr1, $vr1, 63
+; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_vfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfmul.d $vr0, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vfsub.d $vr0, $vr2, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_vfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfmul.d $vr0, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vfsub.d $vr0, $vr2, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -342,28 +342,28 @@ entry:
 define void @contract_vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -379,28 +379,28 @@ entry:
 define void @contract_vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -416,28 +416,28 @@ entry:
 define void @contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -454,28 +454,28 @@ entry:
 define void @contract_vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmadd_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmadd_d_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmadd_d_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -494,31 +494,31 @@ entry:
 define void @not_contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_vfnmadd_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.d $vr0, $vr0, 63
-; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.d $vr1, $vr1, 63
+; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_vfnmadd_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vbitrevi.d $vr0, $vr0, 63
-; CONTRACT-ON-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-ON-NEXT:    vbitrevi.d $vr1, $vr1, 63
+; CONTRACT-ON-NEXT:    vfmsub.d $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_vfnmadd_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vbitrevi.d $vr0, $vr0, 63
-; CONTRACT-OFF-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-OFF-NEXT:    vbitrevi.d $vr1, $vr1, 63
+; CONTRACT-OFF-NEXT:    vfmsub.d $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -536,28 +536,28 @@ entry:
 define void @contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -575,28 +575,28 @@ entry:
 define void @contract_vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmsub_d_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmsub_d_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmsub_d_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -614,31 +614,31 @@ entry:
 define void @not_contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_vfnmsub_d:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.d $vr0, $vr0, 63
-; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.d $vr1, $vr1, 63
+; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_vfnmsub_d:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vbitrevi.d $vr0, $vr0, 63
-; CONTRACT-ON-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-ON-NEXT:    vbitrevi.d $vr1, $vr1, 63
+; CONTRACT-ON-NEXT:    vfmadd.d $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_vfnmsub_d:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vbitrevi.d $vr0, $vr0, 63
-; CONTRACT-OFF-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-OFF-NEXT:    vbitrevi.d $vr1, $vr1, 63
+; CONTRACT-OFF-NEXT:    vfmadd.d $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -655,28 +655,28 @@ entry:
 define void @vfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmadd_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmadd_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmadd_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -692,28 +692,28 @@ entry:
 define void @vfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmsub_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmsub_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmsub_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -729,28 +729,28 @@ entry:
 define void @vfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmadd_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmadd_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmadd_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfnmadd.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfnmadd.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -767,28 +767,28 @@ entry:
 define void @vfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmsub_d_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmsub_d_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmsub_d_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfnmsub.d $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfnmsub.d $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll
index 1f316d5b1c8a4..9e0f05eac9449 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll
@@ -9,29 +9,29 @@
 define void @vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vfadd.s $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vfadd.s $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -48,29 +48,29 @@ entry:
 define void @vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -87,19 +87,19 @@ entry:
 define void @vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vfadd.s $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vbitrevi.w $vr0, $vr0, 31
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
@@ -107,10 +107,10 @@ define void @vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ;
 ; CONTRACT-OFF-LABEL: vfnmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vfadd.s $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vbitrevi.w $vr0, $vr0, 31
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
@@ -129,10 +129,10 @@ entry:
 define void @vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmadd_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
@@ -173,11 +173,11 @@ entry:
 define void @not_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_vfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.w $vr0, $vr0, 31
-; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.w $vr1, $vr1, 31
+; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
@@ -217,19 +217,19 @@ entry:
 define void @vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr0, $vr2
 ; CONTRACT-ON-NEXT:    vbitrevi.w $vr0, $vr0, 31
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
@@ -237,10 +237,10 @@ define void @vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ;
 ; CONTRACT-OFF-LABEL: vfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr0, $vr2
 ; CONTRACT-OFF-NEXT:    vbitrevi.w $vr0, $vr0, 31
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
@@ -260,29 +260,29 @@ entry:
 define void @vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmsub_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmsub_s_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr2, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmsub_s_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr2, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -301,30 +301,30 @@ entry:
 define void @not_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_vfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.w $vr0, $vr0, 31
-; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.w $vr1, $vr1, 31
+; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_vfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-ON-NEXT:    vfmul.s $vr0, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vfsub.s $vr0, $vr2, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_vfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a2, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
 ; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CONTRACT-OFF-NEXT:    vfmul.s $vr0, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vfsub.s $vr0, $vr2, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
@@ -342,28 +342,28 @@ entry:
 define void @contract_vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -379,28 +379,28 @@ entry:
 define void @contract_vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -416,28 +416,28 @@ entry:
 define void @contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -454,28 +454,28 @@ entry:
 define void @contract_vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmadd_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmadd_s_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmadd_s_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -494,31 +494,31 @@ entry:
 define void @not_contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_vfnmadd_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.w $vr0, $vr0, 31
-; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.w $vr1, $vr1, 31
+; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_vfnmadd_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vbitrevi.w $vr0, $vr0, 31
-; CONTRACT-ON-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-ON-NEXT:    vbitrevi.w $vr1, $vr1, 31
+; CONTRACT-ON-NEXT:    vfmsub.s $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_vfnmadd_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vbitrevi.w $vr0, $vr0, 31
-; CONTRACT-OFF-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-OFF-NEXT:    vbitrevi.w $vr1, $vr1, 31
+; CONTRACT-OFF-NEXT:    vfmsub.s $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -536,28 +536,28 @@ entry:
 define void @contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -575,28 +575,28 @@ entry:
 define void @contract_vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: contract_vfnmsub_s_nsz:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: contract_vfnmsub_s_nsz:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: contract_vfnmsub_s_nsz:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -614,31 +614,31 @@ entry:
 define void @not_contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: not_contract_vfnmsub_s:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vbitrevi.w $vr0, $vr0, 31
-; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-FAST-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-FAST-NEXT:    vbitrevi.w $vr1, $vr1, 31
+; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: not_contract_vfnmsub_s:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vbitrevi.w $vr0, $vr0, 31
-; CONTRACT-ON-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-ON-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-ON-NEXT:    vbitrevi.w $vr1, $vr1, 31
+; CONTRACT-ON-NEXT:    vfmadd.s $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: not_contract_vfnmsub_s:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
-; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vbitrevi.w $vr0, $vr0, 31
-; CONTRACT-OFF-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
+; CONTRACT-OFF-NEXT:    vld $vr1, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr2, $a2, 0
+; CONTRACT-OFF-NEXT:    vbitrevi.w $vr1, $vr1, 31
+; CONTRACT-OFF-NEXT:    vfmadd.s $vr0, $vr1, $vr2, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -655,28 +655,28 @@ entry:
 define void @vfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmadd_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmadd_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmadd_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -692,28 +692,28 @@ entry:
 define void @vfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfmsub_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfmsub_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfmsub_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -729,28 +729,28 @@ entry:
 define void @vfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmadd_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmadd_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmadd_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfnmadd.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfnmadd.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
@@ -767,28 +767,28 @@ entry:
 define void @vfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
 ; CONTRACT-FAST-LABEL: vfnmsub_s_contract:
 ; CONTRACT-FAST:       # %bb.0: # %entry
-; CONTRACT-FAST-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-FAST-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-FAST-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-FAST-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-FAST-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-FAST-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-FAST-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-FAST-NEXT:    ret
 ;
 ; CONTRACT-ON-LABEL: vfnmsub_s_contract:
 ; CONTRACT-ON:       # %bb.0: # %entry
-; CONTRACT-ON-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-ON-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-ON-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-ON-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-ON-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-ON-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-ON-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-ON-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-ON-NEXT:    ret
 ;
 ; CONTRACT-OFF-LABEL: vfnmsub_s_contract:
 ; CONTRACT-OFF:       # %bb.0: # %entry
-; CONTRACT-OFF-NEXT:    vld $vr0, $a1, 0
+; CONTRACT-OFF-NEXT:    vld $vr0, $a3, 0
 ; CONTRACT-OFF-NEXT:    vld $vr1, $a2, 0
-; CONTRACT-OFF-NEXT:    vld $vr2, $a3, 0
-; CONTRACT-OFF-NEXT:    vfnmsub.s $vr0, $vr0, $vr1, $vr2
+; CONTRACT-OFF-NEXT:    vld $vr2, $a1, 0
+; CONTRACT-OFF-NEXT:    vfnmsub.s $vr0, $vr2, $vr1, $vr0
 ; CONTRACT-OFF-NEXT:    vst $vr0, $a0, 0
 ; CONTRACT-OFF-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll
index 485bd1df8d66e..2a7c37c2ae346 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll
@@ -4,9 +4,9 @@
 define void @add_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vadd.b $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vadd.b $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @add_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vadd.h $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vadd.h $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @add_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vadd.w $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vadd.w $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @add_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: add_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vadd.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vadd.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll
index d3e4efb1b1c27..523255159a811 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll
@@ -4,9 +4,9 @@
 define void @and_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vand.v $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @and_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vand.v $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @and_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vand.v $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @and_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: and_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vand.v $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll
index 2a31074470983..fbc570d77ba80 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll
@@ -4,9 +4,9 @@
 define void @ashr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsra.b $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsra.b $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @ashr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsra.h $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsra.h $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @ashr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsra.w $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsra.w $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @ashr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: ashr_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsra.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsra.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll
index 989ad10a44ffc..1fa1f611c4a36 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll
@@ -4,9 +4,9 @@
 define void @fadd_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fadd_v4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfadd.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfadd.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fadd_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fadd_v2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfadd.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfadd.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll
index 95e46a4e71dab..53fbf0b2f86fe 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll
@@ -35,9 +35,9 @@ define void @v2f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_oeq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.ceq.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.ceq.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -51,9 +51,9 @@ define void @v4f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_oeq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.ceq.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.ceq.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -68,9 +68,9 @@ define void @v2f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_ueq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cueq.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cueq.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -84,9 +84,9 @@ define void @v4f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_ueq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cueq.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cueq.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -101,9 +101,9 @@ define void @v2f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.ceq.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.ceq.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -117,9 +117,9 @@ define void @v4f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.ceq.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.ceq.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -134,9 +134,9 @@ define void @v2f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_ole:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cle.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cle.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -150,9 +150,9 @@ define void @v4f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_ole:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cle.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cle.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -167,9 +167,9 @@ define void @v2f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cule.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cule.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -183,9 +183,9 @@ define void @v4f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cule.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cule.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -200,9 +200,9 @@ define void @v2f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_le:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cle.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cle.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -216,9 +216,9 @@ define void @v4f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_le:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cle.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cle.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -233,9 +233,9 @@ define void @v2f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_olt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.clt.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.clt.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -249,9 +249,9 @@ define void @v4f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_olt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.clt.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.clt.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -266,9 +266,9 @@ define void @v2f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cult.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cult.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -282,9 +282,9 @@ define void @v4f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cult.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cult.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -299,9 +299,9 @@ define void @v2f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_lt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.clt.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.clt.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -315,9 +315,9 @@ define void @v4f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_lt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.clt.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.clt.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -332,9 +332,9 @@ define void @v2f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_one:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cne.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cne.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -348,9 +348,9 @@ define void @v4f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_one:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cne.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cne.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -365,9 +365,9 @@ define void @v2f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_une:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cune.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cune.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -381,9 +381,9 @@ define void @v4f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_une:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cune.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cune.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -398,9 +398,9 @@ define void @v2f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cne.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cne.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -414,9 +414,9 @@ define void @v4f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cne.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cne.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -431,9 +431,9 @@ define void @v2f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_ord:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cor.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cor.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -447,9 +447,9 @@ define void @v4f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_ord:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cor.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cor.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
@@ -464,9 +464,9 @@ define void @v2f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4f32_fcmp_uno:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cun.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cun.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x float>, ptr %a0
@@ -480,9 +480,9 @@ define void @v4f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2f64_fcmp_uno:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfcmp.cun.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfcmp.cun.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x double>, ptr %a0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll
index 3b9642e31b02d..5f1ee9e4d212e 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll
@@ -4,9 +4,9 @@
 define void @fdiv_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fdiv_v4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfdiv.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfdiv.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fdiv_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fdiv_v2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfdiv.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfdiv.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll
index f604a8962958d..e7fb527f7805e 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll
@@ -4,9 +4,9 @@
 define void @fmul_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fmul_v4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfmul.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfmul.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fmul_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fmul_v2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfmul.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfmul.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll
index 02350c0763bae..df98182321dab 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll
@@ -4,9 +4,9 @@
 define void @fsub_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fsub_v4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfsub.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfsub.s $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @fsub_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: fsub_v2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vfsub.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vfsub.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll
index 04b4831f1188c..448f3fa6c6e0e 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll
@@ -19,9 +19,9 @@ define void @v16i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i8_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vseq.b $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vseq.b $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i8>, ptr %a0
@@ -49,9 +49,9 @@ define void @v8i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i16_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vseq.h $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vseq.h $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i16>, ptr %a0
@@ -79,9 +79,9 @@ define void @v4i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i32_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vseq.w $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vseq.w $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i32>, ptr %a0
@@ -109,9 +109,9 @@ define void @v2i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind {
 define void @v2i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2i64_icmp_eq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vseq.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vseq.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x i64>, ptr %a0
@@ -140,9 +140,9 @@ define void @v16i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i8_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsle.b $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsle.b $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i8>, ptr %a0
@@ -170,9 +170,9 @@ define void @v8i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i16_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsle.h $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsle.h $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i16>, ptr %a0
@@ -200,9 +200,9 @@ define void @v4i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i32_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsle.w $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsle.w $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i32>, ptr %a0
@@ -230,9 +230,9 @@ define void @v2i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind {
 define void @v2i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2i64_icmp_sle:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsle.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsle.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x i64>, ptr %a0
@@ -261,9 +261,9 @@ define void @v16i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i8_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsle.bu $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsle.bu $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i8>, ptr %a0
@@ -291,9 +291,9 @@ define void @v8i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i16_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsle.hu $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsle.hu $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i16>, ptr %a0
@@ -321,9 +321,9 @@ define void @v4i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i32_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsle.wu $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsle.wu $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i32>, ptr %a0
@@ -351,9 +351,9 @@ define void @v2i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind {
 define void @v2i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2i64_icmp_ule:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsle.du $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsle.du $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x i64>, ptr %a0
@@ -382,9 +382,9 @@ define void @v16i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i8_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vslt.b $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vslt.b $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i8>, ptr %a0
@@ -412,9 +412,9 @@ define void @v8i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i16_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vslt.h $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vslt.h $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i16>, ptr %a0
@@ -442,9 +442,9 @@ define void @v4i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i32_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vslt.w $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vslt.w $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i32>, ptr %a0
@@ -472,9 +472,9 @@ define void @v2i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind {
 define void @v2i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2i64_icmp_slt:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vslt.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vslt.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x i64>, ptr %a0
@@ -503,9 +503,9 @@ define void @v16i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v16i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i8_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vslt.bu $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vslt.bu $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i8>, ptr %a0
@@ -533,9 +533,9 @@ define void @v8i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v8i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i16_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vslt.hu $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vslt.hu $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i16>, ptr %a0
@@ -563,9 +563,9 @@ define void @v4i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v4i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i32_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vslt.wu $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vslt.wu $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i32>, ptr %a0
@@ -593,9 +593,9 @@ define void @v2i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind {
 define void @v2i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2i64_icmp_ult:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vslt.du $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vslt.du $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x i64>, ptr %a0
@@ -610,9 +610,9 @@ define void @v2i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v16i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v16i8_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vseq.b $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vseq.b $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vxori.b $vr0, $vr0, 255
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
@@ -627,9 +627,9 @@ define void @v16i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v8i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v8i16_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vseq.h $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vseq.h $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vrepli.b $vr1, -1
 ; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
@@ -645,9 +645,9 @@ define void @v8i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v4i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v4i32_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vseq.w $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vseq.w $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vrepli.b $vr1, -1
 ; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
@@ -663,9 +663,9 @@ define void @v4i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @v2i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: v2i64_icmp_ne:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vseq.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vseq.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vrepli.b $vr1, -1
 ; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
 ; CHECK-NEXT:    vst $vr0, $a0, 0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll
index 2693310b4f508..dada52f93060e 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll
@@ -4,9 +4,9 @@
 define void @lshr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsrl.b $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsrl.b $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @lshr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsrl.h $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsrl.h $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @lshr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsrl.w $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsrl.w $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @lshr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: lshr_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsrl.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsrl.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll
index f66cae6a18027..d0be9cb7e3c8b 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll
@@ -4,9 +4,9 @@
 define void @mul_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mul_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vmul.b $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vmul.b $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @mul_v8i16(ptr %res, ptr %a0, ptr %a1)  nounwind {
 ; CHECK-LABEL: mul_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vmul.h $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vmul.h $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @mul_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mul_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vmul.w $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vmul.w $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @mul_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mul_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vmul.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vmul.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll
index 89702e60c01f5..f124512acce73 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll
@@ -4,9 +4,9 @@
 define void @or_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @or_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @or_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @or_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: or_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vor.v $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll
index cdff58defdaea..b68f73a749135 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll
@@ -4,9 +4,9 @@
 define void @sdiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vdiv.b $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vdiv.b $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @sdiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vdiv.h $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vdiv.h $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @sdiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vdiv.w $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vdiv.w $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @sdiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sdiv_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vdiv.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vdiv.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll
index 4b34c04f3374b..fa0aebaf28b3c 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll
@@ -4,9 +4,9 @@
 define void @shl_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsll.b $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsll.b $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @shl_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsll.h $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsll.h $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @shl_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsll.w $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsll.w $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @shl_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: shl_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsll.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsll.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll
index 2813d9c97e680..25b4623a47d1f 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll
@@ -4,9 +4,9 @@
 define void @sub_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsub.b $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsub.b $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @sub_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsub.h $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsub.h $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @sub_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsub.w $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsub.w $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @sub_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: sub_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vsub.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vsub.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll
index 32dac67d36a81..abb60b91dd488 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll
@@ -4,9 +4,9 @@
 define void @udiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vdiv.bu $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vdiv.bu $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @udiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vdiv.hu $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vdiv.hu $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @udiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vdiv.wu $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vdiv.wu $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @udiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: udiv_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vdiv.du $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vdiv.du $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll
index 482cecb1d7522..ce3e49c990ffb 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll
@@ -4,9 +4,9 @@
 define void @xor_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vxor.v $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -20,9 +20,9 @@ entry:
 define void @xor_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vxor.v $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -36,9 +36,9 @@ entry:
 define void @xor_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vxor.v $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -52,9 +52,9 @@ entry:
 define void @xor_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: xor_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vxor.v $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/mulh.ll b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll
index b0ca556eeff36..e1388f00e355f 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/mulh.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll
@@ -4,9 +4,9 @@
 define void @mulhs_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vmuh.b $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vmuh.b $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -24,9 +24,9 @@ entry:
 define void @mulhu_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vmuh.bu $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vmuh.bu $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -44,9 +44,9 @@ entry:
 define void @mulhs_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vmuh.h $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vmuh.h $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -64,9 +64,9 @@ entry:
 define void @mulhu_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vmuh.hu $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vmuh.hu $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -84,9 +84,9 @@ entry:
 define void @mulhs_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vmuh.w $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vmuh.w $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -104,9 +104,9 @@ entry:
 define void @mulhu_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vmuh.wu $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vmuh.wu $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -124,9 +124,9 @@ entry:
 define void @mulhs_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhs_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vmuh.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vmuh.d $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -144,9 +144,9 @@ entry:
 define void @mulhu_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: mulhu_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    vmuh.du $vr0, $vr0, $vr1
+; CHECK-NEXT:    vld $vr0, $a2, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vmuh.du $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
index 48ef3c14a4bf5..64af56a2acaef 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
@@ -69,12 +69,12 @@ define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: select_v2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT:    addi.d $a1, $a1, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT:    vld $vr2, $a1, 0
-; CHECK-NEXT:    vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT:    pcalau12i $a3, %pc_hi20(.LCPI4_0)
+; CHECK-NEXT:    addi.d $a3, $a3, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT:    vld $vr0, $a3, 0
+; CHECK-NEXT:    vld $vr1, $a1, 0
+; CHECK-NEXT:    vld $vr2, $a2, 0
+; CHECK-NEXT:    vbitsel.v $vr0, $vr2, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x i64>, ptr %a0
diff --git a/llvm/test/CodeGen/LoongArch/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll
index b2d46f5c088ba..584a3091a625a 100644
--- a/llvm/test/CodeGen/LoongArch/rotl-rotr.ll
+++ b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll
@@ -43,39 +43,40 @@ define signext i32 @rotr_32(i32 signext %x, i32 signext %y) nounwind {
 define i64 @rotl_64(i64 %x, i64 %y) nounwind {
 ; LA32-LABEL: rotl_64:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    sll.w $a3, $a1, $a2
-; LA32-NEXT:    xori $a4, $a2, 31
-; LA32-NEXT:    srli.w $a5, $a0, 1
-; LA32-NEXT:    srl.w $a4, $a5, $a4
-; LA32-NEXT:    or $a3, $a3, $a4
-; LA32-NEXT:    addi.w $a4, $a2, -32
-; LA32-NEXT:    slti $a5, $a4, 0
-; LA32-NEXT:    maskeqz $a3, $a3, $a5
-; LA32-NEXT:    sll.w $a6, $a0, $a4
-; LA32-NEXT:    masknez $a5, $a6, $a5
-; LA32-NEXT:    or $a3, $a3, $a5
-; LA32-NEXT:    sll.w $a5, $a0, $a2
-; LA32-NEXT:    srai.w $a4, $a4, 31
-; LA32-NEXT:    and $a4, $a4, $a5
-; LA32-NEXT:    sub.w $a5, $zero, $a2
-; LA32-NEXT:    srl.w $a6, $a1, $a5
-; LA32-NEXT:    ori $a7, $zero, 32
+; LA32-NEXT:    sub.w $a4, $zero, $a2
+; LA32-NEXT:    srl.w $a5, $a1, $a4
+; LA32-NEXT:    ori $a3, $zero, 32
+; LA32-NEXT:    sub.w $a6, $a3, $a2
+; LA32-NEXT:    srai.w $a3, $a6, 31
+; LA32-NEXT:    and $a3, $a3, $a5
+; LA32-NEXT:    sll.w $a7, $a1, $a2
+; LA32-NEXT:    xori $t0, $a2, 31
+; LA32-NEXT:    srli.w $t1, $a0, 1
+; LA32-NEXT:    srl.w $t0, $t1, $t0
+; LA32-NEXT:    or $a7, $a7, $t0
+; LA32-NEXT:    addi.w $t0, $a2, -32
+; LA32-NEXT:    slti $t1, $t0, 0
+; LA32-NEXT:    maskeqz $a7, $a7, $t1
+; LA32-NEXT:    sll.w $t2, $a0, $t0
+; LA32-NEXT:    masknez $t1, $t2, $t1
+; LA32-NEXT:    or $a7, $a7, $t1
+; LA32-NEXT:    or $a3, $a7, $a3
+; LA32-NEXT:    slti $a6, $a6, 0
+; LA32-NEXT:    masknez $a5, $a5, $a6
+; LA32-NEXT:    srl.w $a4, $a0, $a4
+; LA32-NEXT:    ori $a7, $zero, 64
 ; LA32-NEXT:    sub.w $a7, $a7, $a2
-; LA32-NEXT:    slti $t0, $a7, 0
-; LA32-NEXT:    masknez $t1, $a6, $t0
-; LA32-NEXT:    srl.w $a0, $a0, $a5
-; LA32-NEXT:    ori $a5, $zero, 64
-; LA32-NEXT:    sub.w $a2, $a5, $a2
-; LA32-NEXT:    xori $a2, $a2, 31
+; LA32-NEXT:    xori $a7, $a7, 31
 ; LA32-NEXT:    slli.w $a1, $a1, 1
-; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    sll.w $a1, $a1, $a7
+; LA32-NEXT:    or $a1, $a4, $a1
+; LA32-NEXT:    maskeqz $a1, $a1, $a6
+; LA32-NEXT:    or $a1, $a1, $a5
+; LA32-NEXT:    sll.w $a0, $a0, $a2
+; LA32-NEXT:    srai.w $a2, $t0, 31
+; LA32-NEXT:    and $a0, $a2, $a0
 ; LA32-NEXT:    or $a0, $a0, $a1
-; LA32-NEXT:    maskeqz $a0, $a0, $t0
-; LA32-NEXT:    or $a0, $a0, $t1
-; LA32-NEXT:    srai.w $a1, $a7, 31
-; LA32-NEXT:    and $a1, $a1, $a6
-; LA32-NEXT:    or $a1, $a3, $a1
-; LA32-NEXT:    or $a0, $a4, $a0
+; LA32-NEXT:    move $a1, $a3
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: rotl_64:
@@ -94,39 +95,40 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind {
 define i64 @rotr_64(i64 %x, i64 %y) nounwind {
 ; LA32-LABEL: rotr_64:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srl.w $a3, $a0, $a2
-; LA32-NEXT:    xori $a4, $a2, 31
-; LA32-NEXT:    slli.w $a5, $a1, 1
-; LA32-NEXT:    sll.w $a4, $a5, $a4
-; LA32-NEXT:    or $a3, $a3, $a4
-; LA32-NEXT:    addi.w $a4, $a2, -32
-; LA32-NEXT:    slti $a5, $a4, 0
-; LA32-NEXT:    maskeqz $a3, $a3, $a5
-; LA32-NEXT:    srl.w $a6, $a1, $a4
-; LA32-NEXT:    masknez $a5, $a6, $a5
-; LA32-NEXT:    or $a3, $a3, $a5
-; LA32-NEXT:    srl.w $a5, $a1, $a2
-; LA32-NEXT:    srai.w $a4, $a4, 31
-; LA32-NEXT:    and $a4, $a4, $a5
-; LA32-NEXT:    sub.w $a5, $zero, $a2
-; LA32-NEXT:    sll.w $a6, $a0, $a5
-; LA32-NEXT:    ori $a7, $zero, 32
+; LA32-NEXT:    sub.w $a4, $zero, $a2
+; LA32-NEXT:    sll.w $a5, $a0, $a4
+; LA32-NEXT:    ori $a3, $zero, 32
+; LA32-NEXT:    sub.w $a6, $a3, $a2
+; LA32-NEXT:    srai.w $a3, $a6, 31
+; LA32-NEXT:    and $a3, $a3, $a5
+; LA32-NEXT:    srl.w $a7, $a0, $a2
+; LA32-NEXT:    xori $t0, $a2, 31
+; LA32-NEXT:    slli.w $t1, $a1, 1
+; LA32-NEXT:    sll.w $t0, $t1, $t0
+; LA32-NEXT:    or $a7, $a7, $t0
+; LA32-NEXT:    addi.w $t0, $a2, -32
+; LA32-NEXT:    slti $t1, $t0, 0
+; LA32-NEXT:    maskeqz $a7, $a7, $t1
+; LA32-NEXT:    srl.w $t2, $a1, $t0
+; LA32-NEXT:    masknez $t1, $t2, $t1
+; LA32-NEXT:    or $a7, $a7, $t1
+; LA32-NEXT:    or $a3, $a7, $a3
+; LA32-NEXT:    slti $a6, $a6, 0
+; LA32-NEXT:    masknez $a5, $a5, $a6
+; LA32-NEXT:    sll.w $a4, $a1, $a4
+; LA32-NEXT:    ori $a7, $zero, 64
 ; LA32-NEXT:    sub.w $a7, $a7, $a2
-; LA32-NEXT:    slti $t0, $a7, 0
-; LA32-NEXT:    masknez $t1, $a6, $t0
-; LA32-NEXT:    sll.w $a1, $a1, $a5
-; LA32-NEXT:    ori $a5, $zero, 64
-; LA32-NEXT:    sub.w $a2, $a5, $a2
-; LA32-NEXT:    xori $a2, $a2, 31
+; LA32-NEXT:    xori $a7, $a7, 31
 ; LA32-NEXT:    srli.w $a0, $a0, 1
-; LA32-NEXT:    srl.w $a0, $a0, $a2
-; LA32-NEXT:    or $a0, $a1, $a0
-; LA32-NEXT:    maskeqz $a0, $a0, $t0
-; LA32-NEXT:    or $a1, $a0, $t1
-; LA32-NEXT:    srai.w $a0, $a7, 31
-; LA32-NEXT:    and $a0, $a0, $a6
-; LA32-NEXT:    or $a0, $a3, $a0
-; LA32-NEXT:    or $a1, $a4, $a1
+; LA32-NEXT:    srl.w $a0, $a0, $a7
+; LA32-NEXT:    or $a0, $a4, $a0
+; LA32-NEXT:    maskeqz $a0, $a0, $a6
+; LA32-NEXT:    or $a0, $a0, $a5
+; LA32-NEXT:    srl.w $a1, $a1, $a2
+; LA32-NEXT:    srai.w $a2, $t0, 31
+; LA32-NEXT:    and $a1, $a2, $a1
+; LA32-NEXT:    or $a1, $a1, $a0
+; LA32-NEXT:    move $a0, $a3
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: rotr_64:
@@ -264,38 +266,39 @@ define signext i32 @rotr_32_mask_or_64_or_32(i32 signext %x, i32 signext %y) nou
 define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind {
 ; LA32-LABEL: rotl_64_mask:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    sll.w $a3, $a1, $a2
-; LA32-NEXT:    xori $a4, $a2, 31
-; LA32-NEXT:    srli.w $a5, $a0, 1
-; LA32-NEXT:    srl.w $a4, $a5, $a4
-; LA32-NEXT:    or $a3, $a3, $a4
-; LA32-NEXT:    addi.w $a4, $a2, -32
-; LA32-NEXT:    slti $a5, $a4, 0
-; LA32-NEXT:    maskeqz $a3, $a3, $a5
-; LA32-NEXT:    sll.w $a6, $a0, $a4
-; LA32-NEXT:    masknez $a5, $a6, $a5
-; LA32-NEXT:    or $a3, $a3, $a5
-; LA32-NEXT:    sll.w $a5, $a0, $a2
-; LA32-NEXT:    srai.w $a4, $a4, 31
-; LA32-NEXT:    and $a4, $a4, $a5
-; LA32-NEXT:    sub.w $a2, $zero, $a2
-; LA32-NEXT:    andi $a5, $a2, 63
+; LA32-NEXT:    sub.w $a4, $zero, $a2
+; LA32-NEXT:    srl.w $a3, $a1, $a4
+; LA32-NEXT:    andi $a5, $a4, 63
 ; LA32-NEXT:    addi.w $a6, $a5, -32
+; LA32-NEXT:    srai.w $a7, $a6, 31
+; LA32-NEXT:    and $a3, $a7, $a3
+; LA32-NEXT:    sll.w $a7, $a1, $a2
+; LA32-NEXT:    xori $t0, $a2, 31
+; LA32-NEXT:    srli.w $t1, $a0, 1
+; LA32-NEXT:    srl.w $t0, $t1, $t0
+; LA32-NEXT:    or $a7, $a7, $t0
+; LA32-NEXT:    addi.w $t0, $a2, -32
+; LA32-NEXT:    slti $t1, $t0, 0
+; LA32-NEXT:    maskeqz $a7, $a7, $t1
+; LA32-NEXT:    sll.w $t2, $a0, $t0
+; LA32-NEXT:    masknez $t1, $t2, $t1
+; LA32-NEXT:    or $a7, $a7, $t1
+; LA32-NEXT:    or $a3, $a7, $a3
 ; LA32-NEXT:    srl.w $a7, $a1, $a6
-; LA32-NEXT:    slti $t0, $a6, 0
-; LA32-NEXT:    masknez $a7, $a7, $t0
-; LA32-NEXT:    srl.w $a0, $a0, $a2
+; LA32-NEXT:    slti $a6, $a6, 0
+; LA32-NEXT:    masknez $a7, $a7, $a6
+; LA32-NEXT:    srl.w $a4, $a0, $a4
 ; LA32-NEXT:    xori $a5, $a5, 31
-; LA32-NEXT:    slli.w $t1, $a1, 1
-; LA32-NEXT:    sll.w $a5, $t1, $a5
-; LA32-NEXT:    or $a0, $a0, $a5
-; LA32-NEXT:    maskeqz $a0, $a0, $t0
-; LA32-NEXT:    or $a0, $a0, $a7
-; LA32-NEXT:    srl.w $a1, $a1, $a2
-; LA32-NEXT:    srai.w $a2, $a6, 31
-; LA32-NEXT:    and $a1, $a2, $a1
-; LA32-NEXT:    or $a1, $a3, $a1
-; LA32-NEXT:    or $a0, $a4, $a0
+; LA32-NEXT:    slli.w $a1, $a1, 1
+; LA32-NEXT:    sll.w $a1, $a1, $a5
+; LA32-NEXT:    or $a1, $a4, $a1
+; LA32-NEXT:    maskeqz $a1, $a1, $a6
+; LA32-NEXT:    or $a1, $a1, $a7
+; LA32-NEXT:    sll.w $a0, $a0, $a2
+; LA32-NEXT:    srai.w $a2, $t0, 31
+; LA32-NEXT:    and $a0, $a2, $a0
+; LA32-NEXT:    or $a0, $a0, $a1
+; LA32-NEXT:    move $a1, $a3
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: rotl_64_mask:
@@ -314,39 +317,40 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind {
 define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
 ; LA32-LABEL: rotl_64_mask_and_127_and_63:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    sll.w $a3, $a1, $a2
-; LA32-NEXT:    srli.w $a4, $a0, 1
-; LA32-NEXT:    andi $a5, $a2, 127
-; LA32-NEXT:    xori $a6, $a5, 31
-; LA32-NEXT:    srl.w $a4, $a4, $a6
-; LA32-NEXT:    or $a3, $a3, $a4
-; LA32-NEXT:    addi.w $a4, $a5, -32
-; LA32-NEXT:    slti $a5, $a4, 0
-; LA32-NEXT:    maskeqz $a3, $a3, $a5
-; LA32-NEXT:    sll.w $a6, $a0, $a4
-; LA32-NEXT:    masknez $a5, $a6, $a5
-; LA32-NEXT:    or $a3, $a3, $a5
-; LA32-NEXT:    sll.w $a5, $a0, $a2
-; LA32-NEXT:    srai.w $a4, $a4, 31
-; LA32-NEXT:    and $a4, $a4, $a5
-; LA32-NEXT:    sub.w $a2, $zero, $a2
-; LA32-NEXT:    andi $a5, $a2, 63
+; LA32-NEXT:    sub.w $a4, $zero, $a2
+; LA32-NEXT:    srl.w $a3, $a1, $a4
+; LA32-NEXT:    andi $a5, $a4, 63
 ; LA32-NEXT:    addi.w $a6, $a5, -32
+; LA32-NEXT:    srai.w $a7, $a6, 31
+; LA32-NEXT:    and $a3, $a7, $a3
+; LA32-NEXT:    sll.w $a7, $a1, $a2
+; LA32-NEXT:    srli.w $t0, $a0, 1
+; LA32-NEXT:    andi $t1, $a2, 127
+; LA32-NEXT:    xori $t2, $t1, 31
+; LA32-NEXT:    srl.w $t0, $t0, $t2
+; LA32-NEXT:    or $a7, $a7, $t0
+; LA32-NEXT:    addi.w $t0, $t1, -32
+; LA32-NEXT:    slti $t1, $t0, 0
+; LA32-NEXT:    maskeqz $a7, $a7, $t1
+; LA32-NEXT:    sll.w $t2, $a0, $t0
+; LA32-NEXT:    masknez $t1, $t2, $t1
+; LA32-NEXT:    or $a7, $a7, $t1
+; LA32-NEXT:    or $a3, $a7, $a3
 ; LA32-NEXT:    srl.w $a7, $a1, $a6
-; LA32-NEXT:    slti $t0, $a6, 0
-; LA32-NEXT:    masknez $a7, $a7, $t0
-; LA32-NEXT:    srl.w $a0, $a0, $a2
+; LA32-NEXT:    slti $a6, $a6, 0
+; LA32-NEXT:    masknez $a7, $a7, $a6
+; LA32-NEXT:    srl.w $a4, $a0, $a4
 ; LA32-NEXT:    xori $a5, $a5, 31
-; LA32-NEXT:    slli.w $t1, $a1, 1
-; LA32-NEXT:    sll.w $a5, $t1, $a5
-; LA32-NEXT:    or $a0, $a0, $a5
-; LA32-NEXT:    maskeqz $a0, $a0, $t0
-; LA32-NEXT:    or $a0, $a0, $a7
-; LA32-NEXT:    srl.w $a1, $a1, $a2
-; LA32-NEXT:    srai.w $a2, $a6, 31
-; LA32-NEXT:    and $a1, $a2, $a1
-; LA32-NEXT:    or $a1, $a3, $a1
-; LA32-NEXT:    or $a0, $a4, $a0
+; LA32-NEXT:    slli.w $a1, $a1, 1
+; LA32-NEXT:    sll.w $a1, $a1, $a5
+; LA32-NEXT:    or $a1, $a4, $a1
+; LA32-NEXT:    maskeqz $a1, $a1, $a6
+; LA32-NEXT:    or $a1, $a1, $a7
+; LA32-NEXT:    sll.w $a0, $a0, $a2
+; LA32-NEXT:    srai.w $a2, $t0, 31
+; LA32-NEXT:    and $a0, $a2, $a0
+; LA32-NEXT:    or $a0, $a0, $a1
+; LA32-NEXT:    move $a1, $a3
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: rotl_64_mask_and_127_and_63:
@@ -387,38 +391,39 @@ define i64 @rotl_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind {
 define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
 ; LA32-LABEL: rotr_64_mask:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srl.w $a3, $a0, $a2
-; LA32-NEXT:    xori $a4, $a2, 31
-; LA32-NEXT:    slli.w $a5, $a1, 1
-; LA32-NEXT:    sll.w $a4, $a5, $a4
-; LA32-NEXT:    or $a3, $a3, $a4
-; LA32-NEXT:    addi.w $a4, $a2, -32
-; LA32-NEXT:    slti $a5, $a4, 0
-; LA32-NEXT:    maskeqz $a3, $a3, $a5
-; LA32-NEXT:    srl.w $a6, $a1, $a4
-; LA32-NEXT:    masknez $a5, $a6, $a5
-; LA32-NEXT:    or $a3, $a3, $a5
-; LA32-NEXT:    srl.w $a5, $a1, $a2
-; LA32-NEXT:    srai.w $a4, $a4, 31
-; LA32-NEXT:    and $a4, $a4, $a5
-; LA32-NEXT:    sub.w $a2, $zero, $a2
-; LA32-NEXT:    andi $a5, $a2, 63
+; LA32-NEXT:    sub.w $a4, $zero, $a2
+; LA32-NEXT:    sll.w $a3, $a0, $a4
+; LA32-NEXT:    andi $a5, $a4, 63
 ; LA32-NEXT:    addi.w $a6, $a5, -32
+; LA32-NEXT:    srai.w $a7, $a6, 31
+; LA32-NEXT:    and $a3, $a7, $a3
+; LA32-NEXT:    srl.w $a7, $a0, $a2
+; LA32-NEXT:    xori $t0, $a2, 31
+; LA32-NEXT:    slli.w $t1, $a1, 1
+; LA32-NEXT:    sll.w $t0, $t1, $t0
+; LA32-NEXT:    or $a7, $a7, $t0
+; LA32-NEXT:    addi.w $t0, $a2, -32
+; LA32-NEXT:    slti $t1, $t0, 0
+; LA32-NEXT:    maskeqz $a7, $a7, $t1
+; LA32-NEXT:    srl.w $t2, $a1, $t0
+; LA32-NEXT:    masknez $t1, $t2, $t1
+; LA32-NEXT:    or $a7, $a7, $t1
+; LA32-NEXT:    or $a3, $a7, $a3
 ; LA32-NEXT:    sll.w $a7, $a0, $a6
-; LA32-NEXT:    slti $t0, $a6, 0
-; LA32-NEXT:    masknez $a7, $a7, $t0
-; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    slti $a6, $a6, 0
+; LA32-NEXT:    masknez $a7, $a7, $a6
+; LA32-NEXT:    sll.w $a4, $a1, $a4
 ; LA32-NEXT:    xori $a5, $a5, 31
-; LA32-NEXT:    srli.w $t1, $a0, 1
-; LA32-NEXT:    srl.w $a5, $t1, $a5
-; LA32-NEXT:    or $a1, $a1, $a5
-; LA32-NEXT:    maskeqz $a1, $a1, $t0
-; LA32-NEXT:    or $a1, $a1, $a7
-; LA32-NEXT:    sll.w $a0, $a0, $a2
-; LA32-NEXT:    srai.w $a2, $a6, 31
-; LA32-NEXT:    and $a0, $a2, $a0
-; LA32-NEXT:    or $a0, $a3, $a0
-; LA32-NEXT:    or $a1, $a4, $a1
+; LA32-NEXT:    srli.w $a0, $a0, 1
+; LA32-NEXT:    srl.w $a0, $a0, $a5
+; LA32-NEXT:    or $a0, $a4, $a0
+; LA32-NEXT:    maskeqz $a0, $a0, $a6
+; LA32-NEXT:    or $a0, $a0, $a7
+; LA32-NEXT:    srl.w $a1, $a1, $a2
+; LA32-NEXT:    srai.w $a2, $t0, 31
+; LA32-NEXT:    and $a1, $a2, $a1
+; LA32-NEXT:    or $a1, $a1, $a0
+; LA32-NEXT:    move $a0, $a3
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: rotr_64_mask:
@@ -436,39 +441,40 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
 define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
 ; LA32-LABEL: rotr_64_mask_and_127_and_63:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srl.w $a3, $a0, $a2
-; LA32-NEXT:    slli.w $a4, $a1, 1
-; LA32-NEXT:    andi $a5, $a2, 127
-; LA32-NEXT:    xori $a6, $a5, 31
-; LA32-NEXT:    sll.w $a4, $a4, $a6
-; LA32-NEXT:    or $a3, $a3, $a4
-; LA32-NEXT:    addi.w $a4, $a5, -32
-; LA32-NEXT:    slti $a5, $a4, 0
-; LA32-NEXT:    maskeqz $a3, $a3, $a5
-; LA32-NEXT:    srl.w $a6, $a1, $a4
-; LA32-NEXT:    masknez $a5, $a6, $a5
-; LA32-NEXT:    or $a3, $a3, $a5
-; LA32-NEXT:    srl.w $a5, $a1, $a2
-; LA32-NEXT:    srai.w $a4, $a4, 31
-; LA32-NEXT:    and $a4, $a4, $a5
-; LA32-NEXT:    sub.w $a2, $zero, $a2
-; LA32-NEXT:    andi $a5, $a2, 63
+; LA32-NEXT:    sub.w $a4, $zero, $a2
+; LA32-NEXT:    sll.w $a3, $a0, $a4
+; LA32-NEXT:    andi $a5, $a4, 63
 ; LA32-NEXT:    addi.w $a6, $a5, -32
+; LA32-NEXT:    srai.w $a7, $a6, 31
+; LA32-NEXT:    and $a3, $a7, $a3
+; LA32-NEXT:    srl.w $a7, $a0, $a2
+; LA32-NEXT:    slli.w $t0, $a1, 1
+; LA32-NEXT:    andi $t1, $a2, 127
+; LA32-NEXT:    xori $t2, $t1, 31
+; LA32-NEXT:    sll.w $t0, $t0, $t2
+; LA32-NEXT:    or $a7, $a7, $t0
+; LA32-NEXT:    addi.w $t0, $t1, -32
+; LA32-NEXT:    slti $t1, $t0, 0
+; LA32-NEXT:    maskeqz $a7, $a7, $t1
+; LA32-NEXT:    srl.w $t2, $a1, $t0
+; LA32-NEXT:    masknez $t1, $t2, $t1
+; LA32-NEXT:    or $a7, $a7, $t1
+; LA32-NEXT:    or $a3, $a7, $a3
 ; LA32-NEXT:    sll.w $a7, $a0, $a6
-; LA32-NEXT:    slti $t0, $a6, 0
-; LA32-NEXT:    masknez $a7, $a7, $t0
-; LA32-NEXT:    sll.w $a1, $a1, $a2
+; LA32-NEXT:    slti $a6, $a6, 0
+; LA32-NEXT:    masknez $a7, $a7, $a6
+; LA32-NEXT:    sll.w $a4, $a1, $a4
 ; LA32-NEXT:    xori $a5, $a5, 31
-; LA32-NEXT:    srli.w $t1, $a0, 1
-; LA32-NEXT:    srl.w $a5, $t1, $a5
-; LA32-NEXT:    or $a1, $a1, $a5
-; LA32-NEXT:    maskeqz $a1, $a1, $t0
-; LA32-NEXT:    or $a1, $a1, $a7
-; LA32-NEXT:    sll.w $a0, $a0, $a2
-; LA32-NEXT:    srai.w $a2, $a6, 31
-; LA32-NEXT:    and $a0, $a2, $a0
-; LA32-NEXT:    or $a0, $a3, $a0
-; LA32-NEXT:    or $a1, $a4, $a1
+; LA32-NEXT:    srli.w $a0, $a0, 1
+; LA32-NEXT:    srl.w $a0, $a0, $a5
+; LA32-NEXT:    or $a0, $a4, $a0
+; LA32-NEXT:    maskeqz $a0, $a0, $a6
+; LA32-NEXT:    or $a0, $a0, $a7
+; LA32-NEXT:    srl.w $a1, $a1, $a2
+; LA32-NEXT:    srai.w $a2, $t0, 31
+; LA32-NEXT:    and $a1, $a2, $a1
+; LA32-NEXT:    or $a1, $a1, $a0
+; LA32-NEXT:    move $a0, $a3
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: rotr_64_mask_and_127_and_63:
diff --git a/llvm/test/CodeGen/LoongArch/select-to-shiftand.ll b/llvm/test/CodeGen/LoongArch/select-to-shiftand.ll
index f95f1fb7df794..717e15e51feb8 100644
--- a/llvm/test/CodeGen/LoongArch/select-to-shiftand.ll
+++ b/llvm/test/CodeGen/LoongArch/select-to-shiftand.ll
@@ -214,10 +214,10 @@ define i64 @sub_clamp_zero_i64(i64 signext %x, i64 signext %y) {
 ; LA32-NEXT:    sltu $a4, $a0, $a2
 ; LA32-NEXT:    sub.w $a1, $a1, $a3
 ; LA32-NEXT:    sub.w $a1, $a1, $a4
+; LA32-NEXT:    srai.w $a3, $a1, 31
+; LA32-NEXT:    andn $a1, $a1, $a3
 ; LA32-NEXT:    sub.w $a0, $a0, $a2
-; LA32-NEXT:    srai.w $a2, $a1, 31
-; LA32-NEXT:    andn $a1, $a1, $a2
-; LA32-NEXT:    andn $a0, $a0, $a2
+; LA32-NEXT:    andn $a0, $a0, $a3
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: sub_clamp_zero_i64:
diff --git a/llvm/test/CodeGen/LoongArch/sextw-removal.ll b/llvm/test/CodeGen/LoongArch/sextw-removal.ll
index 2bb39395c1d1b..e4848b79b59b8 100644
--- a/llvm/test/CodeGen/LoongArch/sextw-removal.ll
+++ b/llvm/test/CodeGen/LoongArch/sextw-removal.ll
@@ -11,14 +11,15 @@ define void @test1(i32 signext %arg, i32 signext %arg1) nounwind {
 ; CHECK-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
 ; CHECK-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
 ; CHECK-NEXT:    move $fp, $a1
-; CHECK-NEXT:    sra.w $s0, $a0, $a1
+; CHECK-NEXT:    sra.w $a0, $a0, $a1
 ; CHECK-NEXT:    .p2align 4, , 16
 ; CHECK-NEXT:  .LBB0_1: # %bb2
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    move $a0, $s0
+; CHECK-NEXT:    sll.w $s0, $a0, $fp
 ; CHECK-NEXT:    bl %plt(bar)
-; CHECK-NEXT:    sll.w $s0, $s0, $fp
-; CHECK-NEXT:    bnez $a0, .LBB0_1
+; CHECK-NEXT:    move $a1, $a0
+; CHECK-NEXT:    move $a0, $s0
+; CHECK-NEXT:    bnez $a1, .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %bb7
 ; CHECK-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
 ; CHECK-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
@@ -33,13 +34,14 @@ define void @test1(i32 signext %arg, i32 signext %arg1) nounwind {
 ; NORMV-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
 ; NORMV-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
 ; NORMV-NEXT:    move $fp, $a1
-; NORMV-NEXT:    sra.w $s0, $a0, $a1
+; NORMV-NEXT:    sra.w $a1, $a0, $a1
 ; NORMV-NEXT:    .p2align 4, , 16
 ; NORMV-NEXT:  .LBB0_1: # %bb2
 ; NORMV-NEXT:    # =>This Inner Loop Header: Depth=1
-; NORMV-NEXT:    addi.w $a0, $s0, 0
+; NORMV-NEXT:    sll.w $s0, $a1, $fp
+; NORMV-NEXT:    addi.w $a0, $a1, 0
 ; NORMV-NEXT:    bl %plt(bar)
-; NORMV-NEXT:    sll.w $s0, $s0, $fp
+; NORMV-NEXT:    move $a1, $s0
 ; NORMV-NEXT:    bnez $a0, .LBB0_1
 ; NORMV-NEXT:  # %bb.2: # %bb7
 ; NORMV-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
@@ -251,19 +253,19 @@ define void @test6(i32 signext %arg, i32 signext %arg1) nounwind {
 ; CHECK-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
 ; CHECK-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
 ; CHECK-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
-; CHECK-NEXT:    sra.w $fp, $a0, $a1
+; CHECK-NEXT:    sra.w $a0, $a0, $a1
 ; CHECK-NEXT:    .p2align 4, , 16
 ; CHECK-NEXT:  .LBB5_1: # %bb2
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    addi.w $a0, $fp, 0
+; CHECK-NEXT:    addi.w $a0, $a0, 0
 ; CHECK-NEXT:    bl %plt(baz)
-; CHECK-NEXT:    move $s0, $a0
-; CHECK-NEXT:    bl %plt(__fixsfsi)
 ; CHECK-NEXT:    move $fp, $a0
-; CHECK-NEXT:    move $a0, $s0
 ; CHECK-NEXT:    move $a1, $zero
 ; CHECK-NEXT:    bl %plt(__nesf2)
-; CHECK-NEXT:    bnez $a0, .LBB5_1
+; CHECK-NEXT:    move $s0, $a0
+; CHECK-NEXT:    move $a0, $fp
+; CHECK-NEXT:    bl %plt(__fixsfsi)
+; CHECK-NEXT:    bnez $s0, .LBB5_1
 ; CHECK-NEXT:  # %bb.2: # %bb7
 ; CHECK-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
 ; CHECK-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
@@ -277,19 +279,19 @@ define void @test6(i32 signext %arg, i32 signext %arg1) nounwind {
 ; NORMV-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
 ; NORMV-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
 ; NORMV-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
-; NORMV-NEXT:    sra.w $fp, $a0, $a1
+; NORMV-NEXT:    sra.w $a0, $a0, $a1
 ; NORMV-NEXT:    .p2align 4, , 16
 ; NORMV-NEXT:  .LBB5_1: # %bb2
 ; NORMV-NEXT:    # =>This Inner Loop Header: Depth=1
-; NORMV-NEXT:    addi.w $a0, $fp, 0
+; NORMV-NEXT:    addi.w $a0, $a0, 0
 ; NORMV-NEXT:    bl %plt(baz)
-; NORMV-NEXT:    move $s0, $a0
-; NORMV-NEXT:    bl %plt(__fixsfsi)
 ; NORMV-NEXT:    move $fp, $a0
-; NORMV-NEXT:    move $a0, $s0
 ; NORMV-NEXT:    move $a1, $zero
 ; NORMV-NEXT:    bl %plt(__nesf2)
-; NORMV-NEXT:    bnez $a0, .LBB5_1
+; NORMV-NEXT:    move $s0, $a0
+; NORMV-NEXT:    move $a0, $fp
+; NORMV-NEXT:    bl %plt(__fixsfsi)
+; NORMV-NEXT:    bnez $s0, .LBB5_1
 ; NORMV-NEXT:  # %bb.2: # %bb7
 ; NORMV-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
 ; NORMV-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
@@ -1078,49 +1080,49 @@ define signext i32 @bug(i32 signext %x) {
 ; CHECK-NEXT:  # %bb.1: # %if.end
 ; CHECK-NEXT:    bstrpick.d $a1, $a0, 31, 16
 ; CHECK-NEXT:    sltui $a1, $a1, 1
-; CHECK-NEXT:    slli.d $a2, $a0, 16
-; CHECK-NEXT:    masknez $a0, $a0, $a1
-; CHECK-NEXT:    maskeqz $a2, $a2, $a1
-; CHECK-NEXT:    or $a0, $a2, $a0
 ; CHECK-NEXT:    ori $a2, $zero, 32
 ; CHECK-NEXT:    masknez $a2, $a2, $a1
 ; CHECK-NEXT:    ori $a3, $zero, 16
-; CHECK-NEXT:    maskeqz $a1, $a3, $a1
-; CHECK-NEXT:    or $a1, $a1, $a2
-; CHECK-NEXT:    bstrpick.d $a2, $a0, 31, 24
-; CHECK-NEXT:    sltui $a2, $a2, 1
-; CHECK-NEXT:    slli.d $a3, $a0, 8
-; CHECK-NEXT:    addi.d $a4, $a1, -8
-; CHECK-NEXT:    masknez $a0, $a0, $a2
-; CHECK-NEXT:    maskeqz $a3, $a3, $a2
-; CHECK-NEXT:    or $a0, $a3, $a0
-; CHECK-NEXT:    masknez $a1, $a1, $a2
-; CHECK-NEXT:    maskeqz $a2, $a4, $a2
-; CHECK-NEXT:    or $a1, $a2, $a1
-; CHECK-NEXT:    bstrpick.d $a2, $a0, 31, 28
-; CHECK-NEXT:    sltui $a2, $a2, 1
-; CHECK-NEXT:    slli.d $a3, $a0, 4
-; CHECK-NEXT:    addi.d $a4, $a1, -4
-; CHECK-NEXT:    masknez $a0, $a0, $a2
-; CHECK-NEXT:    maskeqz $a3, $a3, $a2
-; CHECK-NEXT:    or $a0, $a3, $a0
-; CHECK-NEXT:    masknez $a1, $a1, $a2
-; CHECK-NEXT:    maskeqz $a2, $a4, $a2
-; CHECK-NEXT:    or $a1, $a2, $a1
-; CHECK-NEXT:    bstrpick.d $a2, $a0, 31, 30
-; CHECK-NEXT:    sltui $a2, $a2, 1
-; CHECK-NEXT:    slli.d $a3, $a0, 2
-; CHECK-NEXT:    addi.d $a4, $a1, -2
-; CHECK-NEXT:    masknez $a0, $a0, $a2
-; CHECK-NEXT:    maskeqz $a3, $a3, $a2
-; CHECK-NEXT:    or $a0, $a3, $a0
+; CHECK-NEXT:    maskeqz $a3, $a3, $a1
+; CHECK-NEXT:    or $a2, $a3, $a2
+; CHECK-NEXT:    masknez $a3, $a0, $a1
+; CHECK-NEXT:    slli.d $a0, $a0, 16
+; CHECK-NEXT:    maskeqz $a0, $a0, $a1
+; CHECK-NEXT:    or $a0, $a0, $a3
+; CHECK-NEXT:    bstrpick.d $a1, $a0, 31, 24
+; CHECK-NEXT:    sltui $a1, $a1, 1
+; CHECK-NEXT:    masknez $a3, $a2, $a1
+; CHECK-NEXT:    addi.d $a2, $a2, -8
+; CHECK-NEXT:    maskeqz $a2, $a2, $a1
+; CHECK-NEXT:    or $a2, $a2, $a3
+; CHECK-NEXT:    masknez $a3, $a0, $a1
+; CHECK-NEXT:    slli.d $a0, $a0, 8
+; CHECK-NEXT:    maskeqz $a0, $a0, $a1
+; CHECK-NEXT:    or $a0, $a0, $a3
+; CHECK-NEXT:    bstrpick.d $a1, $a0, 31, 28
+; CHECK-NEXT:    sltui $a1, $a1, 1
+; CHECK-NEXT:    masknez $a3, $a2, $a1
+; CHECK-NEXT:    addi.d $a2, $a2, -4
+; CHECK-NEXT:    maskeqz $a2, $a2, $a1
+; CHECK-NEXT:    or $a2, $a2, $a3
+; CHECK-NEXT:    masknez $a3, $a0, $a1
+; CHECK-NEXT:    slli.d $a0, $a0, 4
+; CHECK-NEXT:    maskeqz $a0, $a0, $a1
+; CHECK-NEXT:    or $a0, $a0, $a3
+; CHECK-NEXT:    bstrpick.d $a1, $a0, 31, 30
+; CHECK-NEXT:    sltui $a1, $a1, 1
+; CHECK-NEXT:    masknez $a3, $a2, $a1
+; CHECK-NEXT:    addi.d $a2, $a2, -2
+; CHECK-NEXT:    maskeqz $a2, $a2, $a1
+; CHECK-NEXT:    or $a2, $a2, $a3
+; CHECK-NEXT:    masknez $a3, $a0, $a1
+; CHECK-NEXT:    slli.d $a0, $a0, 2
+; CHECK-NEXT:    maskeqz $a0, $a0, $a1
+; CHECK-NEXT:    or $a0, $a0, $a3
 ; CHECK-NEXT:    addi.w $a0, $a0, 0
-; CHECK-NEXT:    masknez $a1, $a1, $a2
-; CHECK-NEXT:    maskeqz $a2, $a4, $a2
-; CHECK-NEXT:    or $a1, $a2, $a1
 ; CHECK-NEXT:    nor $a0, $a0, $zero
 ; CHECK-NEXT:    srli.d $a0, $a0, 31
-; CHECK-NEXT:    add.w $a0, $a1, $a0
+; CHECK-NEXT:    add.w $a0, $a2, $a0
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB18_2:
 ; CHECK-NEXT:    move $a0, $zero
@@ -1132,49 +1134,49 @@ define signext i32 @bug(i32 signext %x) {
 ; NORMV-NEXT:  # %bb.1: # %if.end
 ; NORMV-NEXT:    bstrpick.d $a1, $a0, 31, 16
 ; NORMV-NEXT:    sltui $a1, $a1, 1
-; NORMV-NEXT:    slli.d $a2, $a0, 16
-; NORMV-NEXT:    masknez $a0, $a0, $a1
-; NORMV-NEXT:    maskeqz $a2, $a2, $a1
-; NORMV-NEXT:    or $a0, $a2, $a0
 ; NORMV-NEXT:    ori $a2, $zero, 32
 ; NORMV-NEXT:    masknez $a2, $a2, $a1
 ; NORMV-NEXT:    ori $a3, $zero, 16
-; NORMV-NEXT:    maskeqz $a1, $a3, $a1
-; NORMV-NEXT:    or $a1, $a1, $a2
-; NORMV-NEXT:    bstrpick.d $a2, $a0, 31, 24
-; NORMV-NEXT:    sltui $a2, $a2, 1
-; NORMV-NEXT:    slli.d $a3, $a0, 8
-; NORMV-NEXT:    addi.d $a4, $a1, -8
-; NORMV-NEXT:    masknez $a0, $a0, $a2
-; NORMV-NEXT:    maskeqz $a3, $a3, $a2
-; NORMV-NEXT:    or $a0, $a3, $a0
-; NORMV-NEXT:    masknez $a1, $a1, $a2
-; NORMV-NEXT:    maskeqz $a2, $a4, $a2
-; NORMV-NEXT:    or $a1, $a2, $a1
-; NORMV-NEXT:    bstrpick.d $a2, $a0, 31, 28
-; NORMV-NEXT:    sltui $a2, $a2, 1
-; NORMV-NEXT:    slli.d $a3, $a0, 4
-; NORMV-NEXT:    addi.d $a4, $a1, -4
-; NORMV-NEXT:    masknez $a0, $a0, $a2
-; NORMV-NEXT:    maskeqz $a3, $a3, $a2
-; NORMV-NEXT:    or $a0, $a3, $a0
-; NORMV-NEXT:    masknez $a1, $a1, $a2
-; NORMV-NEXT:    maskeqz $a2, $a4, $a2
-; NORMV-NEXT:    or $a1, $a2, $a1
-; NORMV-NEXT:    bstrpick.d $a2, $a0, 31, 30
-; NORMV-NEXT:    sltui $a2, $a2, 1
-; NORMV-NEXT:    slli.d $a3, $a0, 2
-; NORMV-NEXT:    addi.d $a4, $a1, -2
-; NORMV-NEXT:    masknez $a0, $a0, $a2
-; NORMV-NEXT:    maskeqz $a3, $a3, $a2
-; NORMV-NEXT:    or $a0, $a3, $a0
+; NORMV-NEXT:    maskeqz $a3, $a3, $a1
+; NORMV-NEXT:    or $a2, $a3, $a2
+; NORMV-NEXT:    masknez $a3, $a0, $a1
+; NORMV-NEXT:    slli.d $a0, $a0, 16
+; NORMV-NEXT:    maskeqz $a0, $a0, $a1
+; NORMV-NEXT:    or $a0, $a0, $a3
+; NORMV-NEXT:    bstrpick.d $a1, $a0, 31, 24
+; NORMV-NEXT:    sltui $a1, $a1, 1
+; NORMV-NEXT:    masknez $a3, $a2, $a1
+; NORMV-NEXT:    addi.d $a2, $a2, -8
+; NORMV-NEXT:    maskeqz $a2, $a2, $a1
+; NORMV-NEXT:    or $a2, $a2, $a3
+; NORMV-NEXT:    masknez $a3, $a0, $a1
+; NORMV-NEXT:    slli.d $a0, $a0, 8
+; NORMV-NEXT:    maskeqz $a0, $a0, $a1
+; NORMV-NEXT:    or $a0, $a0, $a3
+; NORMV-NEXT:    bstrpick.d $a1, $a0, 31, 28
+; NORMV-NEXT:    sltui $a1, $a1, 1
+; NORMV-NEXT:    masknez $a3, $a2, $a1
+; NORMV-NEXT:    addi.d $a2, $a2, -4
+; NORMV-NEXT:    maskeqz $a2, $a2, $a1
+; NORMV-NEXT:    or $a2, $a2, $a3
+; NORMV-NEXT:    masknez $a3, $a0, $a1
+; NORMV-NEXT:    slli.d $a0, $a0, 4
+; NORMV-NEXT:    maskeqz $a0, $a0, $a1
+; NORMV-NEXT:    or $a0, $a0, $a3
+; NORMV-NEXT:    bstrpick.d $a1, $a0, 31, 30
+; NORMV-NEXT:    sltui $a1, $a1, 1
+; NORMV-NEXT:    masknez $a3, $a2, $a1
+; NORMV-NEXT:    addi.d $a2, $a2, -2
+; NORMV-NEXT:    maskeqz $a2, $a2, $a1
+; NORMV-NEXT:    or $a2, $a2, $a3
+; NORMV-NEXT:    masknez $a3, $a0, $a1
+; NORMV-NEXT:    slli.d $a0, $a0, 2
+; NORMV-NEXT:    maskeqz $a0, $a0, $a1
+; NORMV-NEXT:    or $a0, $a0, $a3
 ; NORMV-NEXT:    addi.w $a0, $a0, 0
-; NORMV-NEXT:    masknez $a1, $a1, $a2
-; NORMV-NEXT:    maskeqz $a2, $a4, $a2
-; NORMV-NEXT:    or $a1, $a2, $a1
 ; NORMV-NEXT:    nor $a0, $a0, $zero
 ; NORMV-NEXT:    srli.d $a0, $a0, 31
-; NORMV-NEXT:    add.d $a0, $a1, $a0
+; NORMV-NEXT:    add.d $a0, $a2, $a0
 ; NORMV-NEXT:    addi.w $a0, $a0, 0
 ; NORMV-NEXT:    ret
 ; NORMV-NEXT:  .LBB18_2:
@@ -1223,14 +1225,14 @@ define void @test16(i32 signext %arg, i32 signext %arg1) nounwind {
 ; CHECK-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
 ; CHECK-NEXT:    move $fp, $a1
 ; CHECK-NEXT:    bl %plt(bar)
-; CHECK-NEXT:    move $s0, $a0
 ; CHECK-NEXT:    .p2align 4, , 16
 ; CHECK-NEXT:  .LBB19_1: # %bb2
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    move $a0, $s0
+; CHECK-NEXT:    sll.w $s0, $a0, $fp
 ; CHECK-NEXT:    bl %plt(bar)
-; CHECK-NEXT:    sll.w $s0, $s0, $fp
-; CHECK-NEXT:    bnez $a0, .LBB19_1
+; CHECK-NEXT:    move $a1, $a0
+; CHECK-NEXT:    move $a0, $s0
+; CHECK-NEXT:    bnez $a1, .LBB19_1
 ; CHECK-NEXT:  # %bb.2: # %bb7
 ; CHECK-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
 ; CHECK-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
@@ -1246,14 +1248,15 @@ define void @test16(i32 signext %arg, i32 signext %arg1) nounwind {
 ; NORMV-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
 ; NORMV-NEXT:    move $fp, $a1
 ; NORMV-NEXT:    bl %plt(bar)
-; NORMV-NEXT:    move $s0, $a0
 ; NORMV-NEXT:    .p2align 4, , 16
 ; NORMV-NEXT:  .LBB19_1: # %bb2
 ; NORMV-NEXT:    # =>This Inner Loop Header: Depth=1
-; NORMV-NEXT:    addi.w $a0, $s0, 0
+; NORMV-NEXT:    sll.w $s0, $a0, $fp
+; NORMV-NEXT:    addi.w $a0, $a0, 0
 ; NORMV-NEXT:    bl %plt(bar)
-; NORMV-NEXT:    sll.w $s0, $s0, $fp
-; NORMV-NEXT:    bnez $a0, .LBB19_1
+; NORMV-NEXT:    move $a1, $a0
+; NORMV-NEXT:    move $a0, $s0
+; NORMV-NEXT:    bnez $a1, .LBB19_1
 ; NORMV-NEXT:  # %bb.2: # %bb7
 ; NORMV-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
 ; NORMV-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
@@ -1284,14 +1287,14 @@ define void @test17(i32 signext %arg, i32 signext %arg1) nounwind {
 ; CHECK-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
 ; CHECK-NEXT:    move $fp, $a1
 ; CHECK-NEXT:    bl %plt(bat)
-; CHECK-NEXT:    move $s0, $a0
 ; CHECK-NEXT:    .p2align 4, , 16
 ; CHECK-NEXT:  .LBB20_1: # %bb2
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    move $a0, $s0
+; CHECK-NEXT:    sll.w $s0, $a0, $fp
 ; CHECK-NEXT:    bl %plt(bar)
-; CHECK-NEXT:    sll.w $s0, $s0, $fp
-; CHECK-NEXT:    bnez $a0, .LBB20_1
+; CHECK-NEXT:    move $a1, $a0
+; CHECK-NEXT:    move $a0, $s0
+; CHECK-NEXT:    bnez $a1, .LBB20_1
 ; CHECK-NEXT:  # %bb.2: # %bb7
 ; CHECK-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
 ; CHECK-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
@@ -1307,14 +1310,15 @@ define void @test17(i32 signext %arg, i32 signext %arg1) nounwind {
 ; NORMV-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
 ; NORMV-NEXT:    move $fp, $a1
 ; NORMV-NEXT:    bl %plt(bat)
-; NORMV-NEXT:    move $s0, $a0
 ; NORMV-NEXT:    .p2align 4, , 16
 ; NORMV-NEXT:  .LBB20_1: # %bb2
 ; NORMV-NEXT:    # =>This Inner Loop Header: Depth=1
-; NORMV-NEXT:    addi.w $a0, $s0, 0
+; NORMV-NEXT:    sll.w $s0, $a0, $fp
+; NORMV-NEXT:    addi.w $a0, $a0, 0
 ; NORMV-NEXT:    bl %plt(bar)
-; NORMV-NEXT:    sll.w $s0, $s0, $fp
-; NORMV-NEXT:    bnez $a0, .LBB20_1
+; NORMV-NEXT:    move $a1, $a0
+; NORMV-NEXT:    move $a0, $s0
+; NORMV-NEXT:    bnez $a1, .LBB20_1
 ; NORMV-NEXT:  # %bb.2: # %bb7
 ; NORMV-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
 ; NORMV-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
index 739680e6141dc..ddd0bc5e5c140 100644
--- a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
+++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
@@ -5,64 +5,63 @@
 define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) {
 ; LA32-LABEL: smuloi64:
 ; LA32:       # %bb.0:
+; LA32-NEXT:    mul.w $a5, $a0, $a2
+; LA32-NEXT:    st.w $a5, $a4, 0
 ; LA32-NEXT:    mulh.wu $a5, $a0, $a2
 ; LA32-NEXT:    mul.w $a6, $a1, $a2
 ; LA32-NEXT:    add.w $a5, $a6, $a5
-; LA32-NEXT:    sltu $a6, $a5, $a6
-; LA32-NEXT:    mulh.wu $a7, $a1, $a2
-; LA32-NEXT:    add.w $a6, $a7, $a6
 ; LA32-NEXT:    mul.w $a7, $a0, $a3
-; LA32-NEXT:    add.w $a5, $a7, $a5
-; LA32-NEXT:    sltu $a7, $a5, $a7
-; LA32-NEXT:    mulh.wu $t0, $a0, $a3
-; LA32-NEXT:    add.w $a7, $t0, $a7
-; LA32-NEXT:    add.w $a7, $a6, $a7
-; LA32-NEXT:    mul.w $t0, $a1, $a3
-; LA32-NEXT:    add.w $t1, $t0, $a7
+; LA32-NEXT:    add.w $t0, $a7, $a5
+; LA32-NEXT:    sltu $a5, $a5, $a6
+; LA32-NEXT:    mulh.wu $a6, $a1, $a2
+; LA32-NEXT:    add.w $a5, $a6, $a5
+; LA32-NEXT:    sltu $a6, $t0, $a7
+; LA32-NEXT:    mulh.wu $a7, $a0, $a3
+; LA32-NEXT:    add.w $a6, $a7, $a6
+; LA32-NEXT:    add.w $a6, $a5, $a6
+; LA32-NEXT:    mul.w $a7, $a1, $a3
+; LA32-NEXT:    add.w $t1, $a7, $a6
 ; LA32-NEXT:    srai.w $t2, $a1, 31
 ; LA32-NEXT:    mul.w $t3, $a2, $t2
 ; LA32-NEXT:    srai.w $t4, $a3, 31
 ; LA32-NEXT:    mul.w $t5, $t4, $a0
 ; LA32-NEXT:    add.w $t6, $t5, $t3
 ; LA32-NEXT:    add.w $t7, $t1, $t6
-; LA32-NEXT:    sltu $t8, $t7, $t1
-; LA32-NEXT:    sltu $t0, $t1, $t0
-; LA32-NEXT:    sltu $a6, $a7, $a6
-; LA32-NEXT:    mulh.wu $a7, $a1, $a3
-; LA32-NEXT:    add.w $a6, $a7, $a6
-; LA32-NEXT:    add.w $a6, $a6, $t0
-; LA32-NEXT:    mulh.wu $a7, $a2, $t2
-; LA32-NEXT:    add.w $a7, $a7, $t3
+; LA32-NEXT:    sltu $a7, $t1, $a7
+; LA32-NEXT:    sltu $t1, $t7, $t1
+; LA32-NEXT:    sltu $a5, $a6, $a5
+; LA32-NEXT:    mulh.wu $a6, $a1, $a3
+; LA32-NEXT:    add.w $a5, $a6, $a5
+; LA32-NEXT:    add.w $a5, $a5, $a7
+; LA32-NEXT:    mulh.wu $a2, $a2, $t2
+; LA32-NEXT:    add.w $a2, $a2, $t3
 ; LA32-NEXT:    mul.w $a3, $a3, $t2
-; LA32-NEXT:    add.w $a3, $a7, $a3
+; LA32-NEXT:    add.w $a2, $a2, $a3
 ; LA32-NEXT:    mul.w $a1, $t4, $a1
-; LA32-NEXT:    mulh.wu $a7, $t4, $a0
-; LA32-NEXT:    add.w $a1, $a7, $a1
-; LA32-NEXT:    add.w $a1, $a1, $t5
-; LA32-NEXT:    add.w $a1, $a1, $a3
-; LA32-NEXT:    sltu $a3, $t6, $t5
-; LA32-NEXT:    add.w $a1, $a1, $a3
-; LA32-NEXT:    add.w $a1, $a6, $a1
-; LA32-NEXT:    add.w $a1, $a1, $t8
-; LA32-NEXT:    srai.w $a3, $a5, 31
-; LA32-NEXT:    xor $a1, $a1, $a3
-; LA32-NEXT:    xor $a3, $t7, $a3
-; LA32-NEXT:    or $a1, $a3, $a1
-; LA32-NEXT:    sltu $a1, $zero, $a1
-; LA32-NEXT:    mul.w $a0, $a0, $a2
-; LA32-NEXT:    st.w $a0, $a4, 0
-; LA32-NEXT:    st.w $a5, $a4, 4
-; LA32-NEXT:    move $a0, $a1
+; LA32-NEXT:    mulh.wu $a0, $t4, $a0
+; LA32-NEXT:    add.w $a0, $a0, $a1
+; LA32-NEXT:    add.w $a0, $a0, $t5
+; LA32-NEXT:    add.w $a0, $a0, $a2
+; LA32-NEXT:    sltu $a1, $t6, $t5
+; LA32-NEXT:    add.w $a0, $a0, $a1
+; LA32-NEXT:    add.w $a0, $a5, $a0
+; LA32-NEXT:    add.w $a0, $a0, $t1
+; LA32-NEXT:    srai.w $a1, $t0, 31
+; LA32-NEXT:    xor $a0, $a0, $a1
+; LA32-NEXT:    xor $a1, $t7, $a1
+; LA32-NEXT:    or $a0, $a1, $a0
+; LA32-NEXT:    sltu $a0, $zero, $a0
+; LA32-NEXT:    st.w $t0, $a4, 4
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: smuloi64:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    mulh.d $a3, $a0, $a1
-; LA64-NEXT:    mul.d $a1, $a0, $a1
-; LA64-NEXT:    srai.d $a0, $a1, 63
-; LA64-NEXT:    xor $a0, $a3, $a0
+; LA64-NEXT:    mul.d $a3, $a0, $a1
+; LA64-NEXT:    mulh.d $a0, $a0, $a1
+; LA64-NEXT:    srai.d $a1, $a3, 63
+; LA64-NEXT:    xor $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $zero, $a0
-; LA64-NEXT:    st.d $a1, $a2, 0
+; LA64-NEXT:    st.d $a3, $a2, 0
 ; LA64-NEXT:    ret
   %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
@@ -98,241 +97,247 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
 ; LA32-NEXT:    .cfi_offset 29, -36
 ; LA32-NEXT:    .cfi_offset 30, -40
 ; LA32-NEXT:    .cfi_offset 31, -44
+; LA32-NEXT:    ld.w $t6, $a1, 0
+; LA32-NEXT:    ld.w $t4, $a0, 0
+; LA32-NEXT:    mul.w $a3, $t4, $t6
+; LA32-NEXT:    st.w $a3, $a2, 0
+; LA32-NEXT:    ld.w $a3, $a0, 4
+; LA32-NEXT:    ld.w $a5, $a1, 4
+; LA32-NEXT:    mulh.wu $a4, $t4, $t6
+; LA32-NEXT:    mul.w $a6, $a3, $t6
+; LA32-NEXT:    add.w $a4, $a6, $a4
+; LA32-NEXT:    mul.w $a7, $t4, $a5
+; LA32-NEXT:    add.w $t0, $a7, $a4
+; LA32-NEXT:    st.w $t0, $a2, 4
 ; LA32-NEXT:    st.w $a2, $sp, 48 # 4-byte Folded Spill
-; LA32-NEXT:    ld.w $t0, $a1, 12
-; LA32-NEXT:    ld.w $t1, $a1, 8
-; LA32-NEXT:    ld.w $a5, $a0, 12
-; LA32-NEXT:    ld.w $a7, $a1, 0
-; LA32-NEXT:    ld.w $a3, $a0, 0
-; LA32-NEXT:    ld.w $a6, $a0, 4
+; LA32-NEXT:    sltu $a4, $a4, $a6
+; LA32-NEXT:    mulh.wu $a6, $a3, $t6
+; LA32-NEXT:    add.w $t1, $a6, $a4
+; LA32-NEXT:    sltu $a4, $t0, $a7
+; LA32-NEXT:    mulh.wu $a6, $t4, $a5
+; LA32-NEXT:    add.w $a4, $a6, $a4
+; LA32-NEXT:    add.w $a7, $t1, $a4
 ; LA32-NEXT:    ld.w $a4, $a0, 8
-; LA32-NEXT:    ld.w $t3, $a1, 4
-; LA32-NEXT:    mulh.wu $a0, $a3, $a7
-; LA32-NEXT:    mul.w $a1, $a6, $a7
-; LA32-NEXT:    add.w $a0, $a1, $a0
-; LA32-NEXT:    sltu $a1, $a0, $a1
-; LA32-NEXT:    mulh.wu $t2, $a6, $a7
-; LA32-NEXT:    add.w $a1, $t2, $a1
-; LA32-NEXT:    mul.w $t2, $a3, $t3
-; LA32-NEXT:    add.w $a0, $t2, $a0
+; LA32-NEXT:    mul.w $t0, $a3, $a5
+; LA32-NEXT:    ld.w $a6, $a1, 8
+; LA32-NEXT:    add.w $t2, $t0, $a7
+; LA32-NEXT:    mul.w $t3, $a4, $t6
+; LA32-NEXT:    add.w $t5, $t3, $t2
+; LA32-NEXT:    mul.w $t7, $t4, $a6
+; LA32-NEXT:    add.w $t8, $t7, $t5
+; LA32-NEXT:    st.w $t8, $a2, 8
+; LA32-NEXT:    sltu $t0, $t2, $t0
+; LA32-NEXT:    sltu $a7, $a7, $t1
+; LA32-NEXT:    move $a2, $a5
+; LA32-NEXT:    mulh.wu $t1, $a3, $a5
+; LA32-NEXT:    ld.w $a5, $a0, 12
+; LA32-NEXT:    add.w $a7, $t1, $a7
+; LA32-NEXT:    add.w $a7, $a7, $t0
+; LA32-NEXT:    mulh.wu $t0, $a4, $t6
+; LA32-NEXT:    mul.w $t1, $a5, $t6
+; LA32-NEXT:    add.w $t2, $t1, $t0
+; LA32-NEXT:    mul.w $fp, $a4, $a2
+; LA32-NEXT:    add.w $s0, $fp, $t2
+; LA32-NEXT:    add.w $a7, $s0, $a7
+; LA32-NEXT:    sltu $t3, $t5, $t3
+; LA32-NEXT:    add.w $a7, $a7, $t3
+; LA32-NEXT:    ld.w $t0, $a1, 12
+; LA32-NEXT:    mulh.wu $a1, $t4, $a6
+; LA32-NEXT:    mul.w $t5, $a3, $a6
+; LA32-NEXT:    add.w $s1, $t5, $a1
+; LA32-NEXT:    mul.w $s2, $t4, $t0
+; LA32-NEXT:    add.w $s3, $s2, $s1
+; LA32-NEXT:    add.w $a1, $s3, $a7
+; LA32-NEXT:    sltu $t7, $t8, $t7
+; LA32-NEXT:    add.w $a0, $a1, $t7
+; LA32-NEXT:    sltu $t8, $a7, $s0
+; LA32-NEXT:    xor $a7, $a7, $s0
+; LA32-NEXT:    sltui $a7, $a7, 1
+; LA32-NEXT:    masknez $t8, $t8, $a7
+; LA32-NEXT:    maskeqz $a7, $t3, $a7
+; LA32-NEXT:    or $a7, $a7, $t8
+; LA32-NEXT:    sltu $t1, $t2, $t1
+; LA32-NEXT:    mulh.wu $t2, $a5, $t6
+; LA32-NEXT:    add.w $t8, $t2, $t1
+; LA32-NEXT:    sltu $t1, $s0, $fp
+; LA32-NEXT:    mulh.wu $t2, $a4, $a2
+; LA32-NEXT:    add.w $t1, $t2, $t1
+; LA32-NEXT:    add.w $fp, $t8, $t1
+; LA32-NEXT:    mul.w $s0, $a5, $a2
+; LA32-NEXT:    add.w $s4, $s0, $fp
+; LA32-NEXT:    add.w $a7, $s4, $a7
 ; LA32-NEXT:    st.w $a0, $sp, 44 # 4-byte Folded Spill
-; LA32-NEXT:    sltu $t2, $a0, $t2
-; LA32-NEXT:    mulh.wu $t4, $a3, $t3
-; LA32-NEXT:    add.w $t2, $t4, $t2
-; LA32-NEXT:    add.w $t2, $a1, $t2
-; LA32-NEXT:    mul.w $t4, $a6, $t3
-; LA32-NEXT:    add.w $t5, $t4, $t2
-; LA32-NEXT:    sltu $t4, $t5, $t4
-; LA32-NEXT:    sltu $a1, $t2, $a1
-; LA32-NEXT:    mulh.wu $t2, $a6, $t3
-; LA32-NEXT:    add.w $a1, $t2, $a1
-; LA32-NEXT:    add.w $a1, $a1, $t4
-; LA32-NEXT:    mulh.wu $t2, $a4, $a7
-; LA32-NEXT:    mul.w $t4, $a5, $a7
-; LA32-NEXT:    add.w $t2, $t4, $t2
-; LA32-NEXT:    mul.w $t6, $a4, $t3
-; LA32-NEXT:    add.w $t7, $t6, $t2
-; LA32-NEXT:    add.w $a1, $t7, $a1
-; LA32-NEXT:    mul.w $t8, $a4, $a7
-; LA32-NEXT:    add.w $t5, $t8, $t5
-; LA32-NEXT:    sltu $t8, $t5, $t8
-; LA32-NEXT:    add.w $a1, $a1, $t8
-; LA32-NEXT:    sltu $fp, $a1, $t7
-; LA32-NEXT:    xor $s0, $a1, $t7
-; LA32-NEXT:    sltui $s0, $s0, 1
-; LA32-NEXT:    masknez $fp, $fp, $s0
-; LA32-NEXT:    maskeqz $t8, $t8, $s0
-; LA32-NEXT:    or $t8, $t8, $fp
-; LA32-NEXT:    sltu $t2, $t2, $t4
-; LA32-NEXT:    mulh.wu $t4, $a5, $a7
-; LA32-NEXT:    add.w $t4, $t4, $t2
-; LA32-NEXT:    sltu $t2, $t7, $t6
-; LA32-NEXT:    mulh.wu $t6, $a4, $t3
-; LA32-NEXT:    add.w $t2, $t6, $t2
-; LA32-NEXT:    add.w $fp, $t4, $t2
-; LA32-NEXT:    mul.w $t6, $a5, $t3
-; LA32-NEXT:    add.w $s0, $t6, $fp
-; LA32-NEXT:    add.w $s1, $s0, $t8
-; LA32-NEXT:    mulh.wu $t2, $a3, $t1
-; LA32-NEXT:    mul.w $t7, $a6, $t1
-; LA32-NEXT:    add.w $t8, $t7, $t2
+; LA32-NEXT:    sltu $t1, $a0, $s3
+; LA32-NEXT:    xor $t2, $a0, $s3
+; LA32-NEXT:    sltui $t2, $t2, 1
+; LA32-NEXT:    masknez $t1, $t1, $t2
+; LA32-NEXT:    maskeqz $t2, $t7, $t2
+; LA32-NEXT:    or $t1, $t2, $t1
+; LA32-NEXT:    sltu $t2, $s1, $t5
+; LA32-NEXT:    mulh.wu $t3, $a3, $a6
+; LA32-NEXT:    add.w $t7, $t3, $t2
+; LA32-NEXT:    sltu $t2, $s3, $s2
+; LA32-NEXT:    mulh.wu $t3, $t4, $t0
+; LA32-NEXT:    add.w $t2, $t3, $t2
+; LA32-NEXT:    add.w $s1, $t7, $t2
 ; LA32-NEXT:    mul.w $s2, $a3, $t0
-; LA32-NEXT:    add.w $s3, $s2, $t8
-; LA32-NEXT:    add.w $t2, $s3, $a1
-; LA32-NEXT:    mul.w $s4, $a3, $t1
-; LA32-NEXT:    add.w $a0, $s4, $t5
-; LA32-NEXT:    st.w $a0, $sp, 40 # 4-byte Folded Spill
-; LA32-NEXT:    sltu $t5, $a0, $s4
-; LA32-NEXT:    add.w $a0, $t2, $t5
+; LA32-NEXT:    add.w $s3, $s2, $s1
+; LA32-NEXT:    add.w $s5, $s3, $t1
+; LA32-NEXT:    add.w $s6, $a7, $s5
+; LA32-NEXT:    mul.w $s7, $a4, $a6
+; LA32-NEXT:    add.w $s8, $s7, $s6
+; LA32-NEXT:    srai.w $t5, $a5, 31
+; LA32-NEXT:    mul.w $a0, $t6, $t5
+; LA32-NEXT:    srai.w $t2, $t0, 31
+; LA32-NEXT:    mul.w $t1, $t4, $t2
+; LA32-NEXT:    add.w $ra, $t1, $a0
+; LA32-NEXT:    move $t3, $a0
+; LA32-NEXT:    sltu $s6, $s6, $a7
+; LA32-NEXT:    sltu $a0, $a7, $s4
+; LA32-NEXT:    sltu $s0, $s4, $s0
+; LA32-NEXT:    add.w $a1, $s8, $ra
+; LA32-NEXT:    st.w $a1, $sp, 40 # 4-byte Folded Spill
+; LA32-NEXT:    sltu $t8, $fp, $t8
+; LA32-NEXT:    mulh.wu $fp, $a5, $a2
+; LA32-NEXT:    move $a7, $a5
+; LA32-NEXT:    st.w $a5, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $a2, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT:    add.w $t8, $fp, $t8
+; LA32-NEXT:    sltu $s4, $a1, $s8
+; LA32-NEXT:    add.w $t8, $t8, $s0
+; LA32-NEXT:    add.w $a5, $t8, $a0
+; LA32-NEXT:    sltu $t8, $s5, $s3
+; LA32-NEXT:    sltu $fp, $s3, $s2
+; LA32-NEXT:    sltu $t7, $s1, $t7
+; LA32-NEXT:    mulh.wu $s0, $a3, $t0
+; LA32-NEXT:    move $a0, $a3
+; LA32-NEXT:    st.w $a3, $sp, 32 # 4-byte Folded Spill
+; LA32-NEXT:    add.w $t7, $s0, $t7
+; LA32-NEXT:    add.w $t7, $t7, $fp
+; LA32-NEXT:    add.w $t7, $t7, $t8
+; LA32-NEXT:    add.w $t7, $a5, $t7
+; LA32-NEXT:    add.w $s1, $t7, $s6
+; LA32-NEXT:    st.w $a4, $sp, 16 # 4-byte Folded Spill
+; LA32-NEXT:    mulh.wu $t7, $a4, $a6
+; LA32-NEXT:    mul.w $a3, $a7, $a6
+; LA32-NEXT:    add.w $s2, $a3, $t7
+; LA32-NEXT:    mul.w $s0, $a4, $t0
+; LA32-NEXT:    add.w $t8, $s0, $s2
+; LA32-NEXT:    add.w $t7, $t8, $s1
+; LA32-NEXT:    sltu $a7, $s8, $s7
+; LA32-NEXT:    add.w $a4, $t7, $a7
+; LA32-NEXT:    mulh.wu $s5, $t6, $t5
+; LA32-NEXT:    mul.w $fp, $a2, $t5
+; LA32-NEXT:    add.w $a2, $fp, $s5
+; LA32-NEXT:    st.w $a2, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT:    mulh.wu $s3, $t4, $t2
+; LA32-NEXT:    move $a1, $t1
+; LA32-NEXT:    st.w $t1, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT:    sltu $t1, $ra, $t1
+; LA32-NEXT:    add.w $s8, $t3, $a2
+; LA32-NEXT:    move $s7, $t3
+; LA32-NEXT:    add.w $t3, $s3, $a1
+; LA32-NEXT:    mul.w $ra, $a0, $t2
+; LA32-NEXT:    add.w $t7, $t3, $ra
+; LA32-NEXT:    add.w $t4, $t7, $s8
+; LA32-NEXT:    add.w $a1, $t4, $t1
+; LA32-NEXT:    add.w $t4, $a4, $a1
+; LA32-NEXT:    add.w $a0, $t4, $s4
 ; LA32-NEXT:    st.w $a0, $sp, 36 # 4-byte Folded Spill
-; LA32-NEXT:    sltu $s4, $a0, $s3
-; LA32-NEXT:    xor $s5, $a0, $s3
-; LA32-NEXT:    sltui $s5, $s5, 1
-; LA32-NEXT:    masknez $s4, $s4, $s5
-; LA32-NEXT:    maskeqz $t5, $t5, $s5
-; LA32-NEXT:    or $t5, $t5, $s4
-; LA32-NEXT:    sltu $t7, $t8, $t7
-; LA32-NEXT:    mulh.wu $t8, $a6, $t1
-; LA32-NEXT:    add.w $s4, $t8, $t7
-; LA32-NEXT:    sltu $t7, $s3, $s2
-; LA32-NEXT:    mulh.wu $t8, $a3, $t0
-; LA32-NEXT:    add.w $t7, $t8, $t7
-; LA32-NEXT:    add.w $s2, $s4, $t7
-; LA32-NEXT:    mul.w $s3, $a6, $t0
-; LA32-NEXT:    add.w $s6, $s3, $s2
-; LA32-NEXT:    add.w $s7, $s6, $t5
-; LA32-NEXT:    add.w $s5, $s1, $s7
-; LA32-NEXT:    mul.w $s8, $a4, $t1
-; LA32-NEXT:    add.w $ra, $s8, $s5
-; LA32-NEXT:    srai.w $t8, $a5, 31
-; LA32-NEXT:    mul.w $t7, $a7, $t8
-; LA32-NEXT:    st.w $a7, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT:    srai.w $t5, $t0, 31
-; LA32-NEXT:    sltu $s5, $s5, $s1
-; LA32-NEXT:    sltu $s1, $s1, $s0
-; LA32-NEXT:    sltu $s0, $s0, $t6
-; LA32-NEXT:    mul.w $t2, $a3, $t5
-; LA32-NEXT:    st.w $a3, $sp, 24 # 4-byte Folded Spill
-; LA32-NEXT:    sltu $t4, $fp, $t4
-; LA32-NEXT:    mulh.wu $fp, $a5, $t3
-; LA32-NEXT:    st.w $a5, $sp, 0 # 4-byte Folded Spill
-; LA32-NEXT:    add.w $t4, $fp, $t4
-; LA32-NEXT:    add.w $fp, $t2, $t7
-; LA32-NEXT:    add.w $s0, $t4, $s0
-; LA32-NEXT:    add.w $a0, $ra, $fp
-; LA32-NEXT:    st.w $a0, $sp, 32 # 4-byte Folded Spill
-; LA32-NEXT:    add.w $a2, $s0, $s1
-; LA32-NEXT:    sltu $s0, $a0, $ra
-; LA32-NEXT:    sltu $s1, $s7, $s6
-; LA32-NEXT:    sltu $s3, $s6, $s3
-; LA32-NEXT:    sltu $s2, $s2, $s4
-; LA32-NEXT:    move $s6, $a6
-; LA32-NEXT:    st.w $a6, $sp, 16 # 4-byte Folded Spill
-; LA32-NEXT:    mulh.wu $s4, $a6, $t0
-; LA32-NEXT:    add.w $s2, $s4, $s2
-; LA32-NEXT:    add.w $s2, $s2, $s3
-; LA32-NEXT:    add.w $s1, $s2, $s1
-; LA32-NEXT:    add.w $s1, $a2, $s1
-; LA32-NEXT:    add.w $s7, $s1, $s5
-; LA32-NEXT:    move $a0, $a4
-; LA32-NEXT:    st.w $a4, $sp, 4 # 4-byte Folded Spill
-; LA32-NEXT:    mulh.wu $s1, $a4, $t1
-; LA32-NEXT:    mul.w $a5, $a5, $t1
-; LA32-NEXT:    add.w $a4, $a5, $s1
-; LA32-NEXT:    mul.w $a6, $a0, $t0
-; LA32-NEXT:    add.w $a1, $a6, $a4
-; LA32-NEXT:    sltu $ra, $ra, $s8
-; LA32-NEXT:    add.w $s1, $a1, $s7
-; LA32-NEXT:    add.w $s8, $s1, $ra
-; LA32-NEXT:    move $a0, $t2
-; LA32-NEXT:    st.w $t2, $sp, 8 # 4-byte Folded Spill
-; LA32-NEXT:    sltu $t6, $fp, $t2
-; LA32-NEXT:    mulh.wu $t2, $a7, $t8
-; LA32-NEXT:    mul.w $s4, $t3, $t8
-; LA32-NEXT:    add.w $a7, $s4, $t2
-; LA32-NEXT:    st.w $a7, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    add.w $s3, $t7, $a7
-; LA32-NEXT:    mulh.wu $a7, $a3, $t5
-; LA32-NEXT:    add.w $t4, $a7, $a0
-; LA32-NEXT:    mul.w $s2, $s6, $t5
-; LA32-NEXT:    add.w $s1, $t4, $s2
-; LA32-NEXT:    add.w $fp, $s1, $s3
-; LA32-NEXT:    add.w $a0, $fp, $t6
-; LA32-NEXT:    add.w $fp, $s8, $a0
-; LA32-NEXT:    add.w $a3, $fp, $s0
-; LA32-NEXT:    st.w $a3, $sp, 20 # 4-byte Folded Spill
-; LA32-NEXT:    xor $fp, $a3, $s8
-; LA32-NEXT:    sltui $fp, $fp, 1
-; LA32-NEXT:    sltu $s6, $a3, $s8
-; LA32-NEXT:    masknez $s6, $s6, $fp
-; LA32-NEXT:    maskeqz $fp, $s0, $fp
-; LA32-NEXT:    or $s6, $fp, $s6
-; LA32-NEXT:    sltu $fp, $s7, $a2
-; LA32-NEXT:    xor $a2, $s7, $a2
+; LA32-NEXT:    xor $t6, $a0, $a4
+; LA32-NEXT:    sltui $t6, $t6, 1
+; LA32-NEXT:    sltu $t4, $a0, $a4
+; LA32-NEXT:    masknez $t4, $t4, $t6
+; LA32-NEXT:    maskeqz $t6, $s4, $t6
+; LA32-NEXT:    or $t6, $t6, $t4
+; LA32-NEXT:    sltu $t4, $s1, $a5
+; LA32-NEXT:    xor $a2, $s1, $a5
 ; LA32-NEXT:    sltui $a2, $a2, 1
-; LA32-NEXT:    masknez $fp, $fp, $a2
-; LA32-NEXT:    maskeqz $a2, $s5, $a2
-; LA32-NEXT:    or $s0, $a2, $fp
-; LA32-NEXT:    sltu $a2, $a4, $a5
-; LA32-NEXT:    ld.w $a5, $sp, 0 # 4-byte Folded Reload
-; LA32-NEXT:    mulh.wu $a3, $a5, $t1
+; LA32-NEXT:    masknez $t4, $t4, $a2
+; LA32-NEXT:    maskeqz $a2, $s6, $a2
+; LA32-NEXT:    or $s4, $a2, $t4
+; LA32-NEXT:    sltu $a2, $s2, $a3
+; LA32-NEXT:    ld.w $a5, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:    mulh.wu $a3, $a5, $a6
 ; LA32-NEXT:    add.w $a2, $a3, $a2
-; LA32-NEXT:    sltu $a3, $a1, $a6
-; LA32-NEXT:    ld.w $fp, $sp, 4 # 4-byte Folded Reload
-; LA32-NEXT:    mulh.wu $a4, $fp, $t0
-; LA32-NEXT:    add.w $a3, $a4, $a3
-; LA32-NEXT:    sltu $a4, $s8, $a1
-; LA32-NEXT:    xor $a1, $s8, $a1
-; LA32-NEXT:    sltui $a1, $a1, 1
-; LA32-NEXT:    masknez $a4, $a4, $a1
-; LA32-NEXT:    maskeqz $a1, $ra, $a1
-; LA32-NEXT:    or $a1, $a1, $a4
-; LA32-NEXT:    sltu $a4, $a0, $s1
-; LA32-NEXT:    xor $a0, $a0, $s1
+; LA32-NEXT:    sltu $a3, $t8, $s0
+; LA32-NEXT:    ld.w $s0, $sp, 16 # 4-byte Folded Reload
+; LA32-NEXT:    mulh.wu $t4, $s0, $t0
+; LA32-NEXT:    add.w $a3, $t4, $a3
+; LA32-NEXT:    sltu $t4, $a4, $t8
+; LA32-NEXT:    xor $a0, $a4, $t8
 ; LA32-NEXT:    sltui $a0, $a0, 1
-; LA32-NEXT:    masknez $a4, $a4, $a0
-; LA32-NEXT:    maskeqz $a0, $t6, $a0
-; LA32-NEXT:    or $s5, $a0, $a4
-; LA32-NEXT:    sltu $a0, $s3, $t7
-; LA32-NEXT:    add.w $a0, $t2, $a0
-; LA32-NEXT:    ld.w $t2, $sp, 8 # 4-byte Folded Reload
-; LA32-NEXT:    sltu $a4, $t4, $t2
-; LA32-NEXT:    add.w $s7, $a7, $a4
+; LA32-NEXT:    masknez $t4, $t4, $a0
+; LA32-NEXT:    maskeqz $a0, $a7, $a0
+; LA32-NEXT:    or $a0, $a0, $t4
+; LA32-NEXT:    sltu $a7, $a1, $t7
+; LA32-NEXT:    xor $a1, $a1, $t7
+; LA32-NEXT:    sltui $a1, $a1, 1
+; LA32-NEXT:    masknez $a7, $a7, $a1
+; LA32-NEXT:    maskeqz $a1, $t1, $a1
+; LA32-NEXT:    or $s1, $a1, $a7
+; LA32-NEXT:    move $t1, $s7
+; LA32-NEXT:    sltu $a1, $s8, $s7
+; LA32-NEXT:    add.w $a1, $s5, $a1
 ; LA32-NEXT:    add.w $a3, $a2, $a3
+; LA32-NEXT:    ld.w $s7, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT:    sltu $a4, $t3, $s7
+; LA32-NEXT:    add.w $s5, $s3, $a4
 ; LA32-NEXT:    sltu $a2, $a3, $a2
 ; LA32-NEXT:    mulh.wu $a4, $a5, $t0
 ; LA32-NEXT:    add.w $a2, $a4, $a2
 ; LA32-NEXT:    mul.w $a4, $a5, $t0
-; LA32-NEXT:    move $a6, $a5
+; LA32-NEXT:    move $a7, $a5
 ; LA32-NEXT:    add.w $a3, $a4, $a3
 ; LA32-NEXT:    sltu $a4, $a3, $a4
 ; LA32-NEXT:    add.w $a2, $a2, $a4
-; LA32-NEXT:    add.w $a4, $a3, $s0
+; LA32-NEXT:    add.w $a4, $a3, $s4
 ; LA32-NEXT:    sltu $a3, $a4, $a3
 ; LA32-NEXT:    add.w $a2, $a2, $a3
-; LA32-NEXT:    add.w $s8, $a4, $a1
-; LA32-NEXT:    sltu $a1, $s8, $a4
-; LA32-NEXT:    add.w $ra, $a2, $a1
-; LA32-NEXT:    ld.w $a1, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    sltu $a1, $a1, $s4
-; LA32-NEXT:    mulh.wu $a2, $t3, $t8
-; LA32-NEXT:    add.w $a1, $a2, $a1
-; LA32-NEXT:    add.w $a0, $a1, $a0
-; LA32-NEXT:    sltu $a1, $a0, $a1
-; LA32-NEXT:    add.w $a1, $a2, $a1
-; LA32-NEXT:    add.w $a0, $s4, $a0
-; LA32-NEXT:    sltu $a2, $a0, $s4
-; LA32-NEXT:    add.w $a1, $a1, $a2
-; LA32-NEXT:    mul.w $a2, $t8, $t1
-; LA32-NEXT:    mul.w $a3, $t8, $t0
-; LA32-NEXT:    mulh.wu $a4, $t8, $t1
+; LA32-NEXT:    add.w $s3, $a4, $a0
+; LA32-NEXT:    sltu $a0, $s3, $a4
+; LA32-NEXT:    add.w $s4, $a2, $a0
+; LA32-NEXT:    ld.w $a0, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT:    sltu $a0, $a0, $fp
+; LA32-NEXT:    ld.w $a2, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT:    mulh.wu $a2, $a2, $t5
+; LA32-NEXT:    add.w $a0, $a2, $a0
+; LA32-NEXT:    add.w $a1, $a0, $a1
+; LA32-NEXT:    sltu $a0, $a1, $a0
+; LA32-NEXT:    add.w $a0, $a2, $a0
+; LA32-NEXT:    add.w $a1, $fp, $a1
+; LA32-NEXT:    sltu $a2, $a1, $fp
+; LA32-NEXT:    add.w $a0, $a0, $a2
+; LA32-NEXT:    mul.w $a2, $t5, $a6
+; LA32-NEXT:    mul.w $a3, $t5, $t0
+; LA32-NEXT:    mulh.wu $a4, $t5, $a6
 ; LA32-NEXT:    add.w $a3, $a4, $a3
 ; LA32-NEXT:    add.w $a3, $a3, $a2
-; LA32-NEXT:    add.w $a3, $s3, $a3
-; LA32-NEXT:    add.w $a2, $t7, $a2
-; LA32-NEXT:    sltu $a4, $a2, $t7
+; LA32-NEXT:    add.w $a3, $s8, $a3
+; LA32-NEXT:    add.w $a2, $t1, $a2
+; LA32-NEXT:    sltu $a4, $a2, $t1
 ; LA32-NEXT:    add.w $a3, $a3, $a4
-; LA32-NEXT:    add.w $a1, $a1, $a3
-; LA32-NEXT:    add.w $a2, $a0, $a2
-; LA32-NEXT:    sltu $a0, $a2, $a0
-; LA32-NEXT:    add.w $a0, $a1, $a0
-; LA32-NEXT:    sltu $a1, $s1, $s2
-; LA32-NEXT:    ld.w $a3, $sp, 16 # 4-byte Folded Reload
-; LA32-NEXT:    mulh.wu $a3, $t5, $a3
+; LA32-NEXT:    add.w $a0, $a0, $a3
+; LA32-NEXT:    add.w $a2, $a1, $a2
+; LA32-NEXT:    sltu $a1, $a2, $a1
+; LA32-NEXT:    add.w $a0, $a0, $a1
+; LA32-NEXT:    sltu $a1, $t7, $ra
+; LA32-NEXT:    ld.w $a3, $sp, 32 # 4-byte Folded Reload
+; LA32-NEXT:    mulh.wu $a3, $t2, $a3
 ; LA32-NEXT:    add.w $a1, $a3, $a1
-; LA32-NEXT:    add.w $a1, $s7, $a1
-; LA32-NEXT:    sltu $a4, $a1, $s7
+; LA32-NEXT:    add.w $a1, $s5, $a1
+; LA32-NEXT:    sltu $a4, $a1, $s5
 ; LA32-NEXT:    add.w $a3, $a3, $a4
-; LA32-NEXT:    add.w $a1, $s2, $a1
-; LA32-NEXT:    sltu $a4, $a1, $s2
+; LA32-NEXT:    add.w $a1, $ra, $a1
+; LA32-NEXT:    sltu $a4, $a1, $ra
 ; LA32-NEXT:    add.w $a3, $a3, $a4
-; LA32-NEXT:    mul.w $a4, $fp, $t5
-; LA32-NEXT:    mulh.wu $a5, $fp, $t5
-; LA32-NEXT:    mul.w $a6, $a6, $t5
+; LA32-NEXT:    mul.w $a4, $s0, $t2
+; LA32-NEXT:    mulh.wu $a5, $s0, $t2
+; LA32-NEXT:    mul.w $a6, $a7, $t2
 ; LA32-NEXT:    add.w $a5, $a5, $a4
 ; LA32-NEXT:    add.w $a5, $a5, $a6
-; LA32-NEXT:    add.w $a5, $a5, $s1
-; LA32-NEXT:    add.w $a6, $a4, $t2
+; LA32-NEXT:    add.w $a5, $a5, $t7
+; LA32-NEXT:    add.w $a6, $a4, $s7
 ; LA32-NEXT:    sltu $a4, $a6, $a4
 ; LA32-NEXT:    add.w $a4, $a5, $a4
 ; LA32-NEXT:    add.w $a3, $a3, $a4
@@ -343,38 +348,30 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
 ; LA32-NEXT:    add.w $a1, $a4, $a2
 ; LA32-NEXT:    sltu $a2, $a1, $a4
 ; LA32-NEXT:    add.w $a0, $a0, $a2
-; LA32-NEXT:    add.w $a2, $a1, $s5
+; LA32-NEXT:    add.w $a2, $a1, $s1
 ; LA32-NEXT:    sltu $a1, $a2, $a1
 ; LA32-NEXT:    add.w $a0, $a0, $a1
-; LA32-NEXT:    add.w $a0, $ra, $a0
-; LA32-NEXT:    add.w $a1, $s8, $a2
-; LA32-NEXT:    sltu $a2, $a1, $s8
+; LA32-NEXT:    add.w $a0, $s4, $a0
+; LA32-NEXT:    add.w $a1, $s3, $a2
+; LA32-NEXT:    sltu $a2, $a1, $s3
 ; LA32-NEXT:    add.w $a0, $a0, $a2
-; LA32-NEXT:    add.w $a2, $a1, $s6
+; LA32-NEXT:    add.w $a2, $a1, $t6
 ; LA32-NEXT:    sltu $a1, $a2, $a1
 ; LA32-NEXT:    add.w $a0, $a0, $a1
-; LA32-NEXT:    ld.w $a4, $sp, 36 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $a4, $sp, 44 # 4-byte Folded Reload
 ; LA32-NEXT:    srai.w $a1, $a4, 31
 ; LA32-NEXT:    xor $a0, $a0, $a1
-; LA32-NEXT:    ld.w $a3, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $a3, $sp, 36 # 4-byte Folded Reload
 ; LA32-NEXT:    xor $a3, $a3, $a1
 ; LA32-NEXT:    or $a0, $a3, $a0
 ; LA32-NEXT:    xor $a2, $a2, $a1
-; LA32-NEXT:    ld.w $a3, $sp, 32 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $a3, $sp, 40 # 4-byte Folded Reload
 ; LA32-NEXT:    xor $a1, $a3, $a1
 ; LA32-NEXT:    or $a1, $a1, $a2
 ; LA32-NEXT:    or $a0, $a1, $a0
-; LA32-NEXT:    ld.w $a1, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT:    ld.w $a2, $sp, 24 # 4-byte Folded Reload
-; LA32-NEXT:    mul.w $a1, $a2, $a1
-; LA32-NEXT:    ld.w $a2, $sp, 48 # 4-byte Folded Reload
-; LA32-NEXT:    st.w $a1, $a2, 0
-; LA32-NEXT:    ld.w $a1, $sp, 44 # 4-byte Folded Reload
-; LA32-NEXT:    st.w $a1, $a2, 4
-; LA32-NEXT:    ld.w $a1, $sp, 40 # 4-byte Folded Reload
-; LA32-NEXT:    st.w $a1, $a2, 8
 ; LA32-NEXT:    sltu $a0, $zero, $a0
-; LA32-NEXT:    st.w $a4, $a2, 12
+; LA32-NEXT:    ld.w $a1, $sp, 48 # 4-byte Folded Reload
+; LA32-NEXT:    st.w $a4, $a1, 12
 ; LA32-NEXT:    ld.w $s8, $sp, 52 # 4-byte Folded Reload
 ; LA32-NEXT:    ld.w $s7, $sp, 56 # 4-byte Folded Reload
 ; LA32-NEXT:    ld.w $s6, $sp, 60 # 4-byte Folded Reload
@@ -391,54 +388,53 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
 ;
 ; LA64-LABEL: smuloi128:
 ; LA64:       # %bb.0:
+; LA64-NEXT:    mul.d $a5, $a0, $a2
+; LA64-NEXT:    st.d $a5, $a4, 0
 ; LA64-NEXT:    mulh.du $a5, $a0, $a2
 ; LA64-NEXT:    mul.d $a6, $a1, $a2
 ; LA64-NEXT:    add.d $a5, $a6, $a5
-; LA64-NEXT:    sltu $a6, $a5, $a6
-; LA64-NEXT:    mulh.du $a7, $a1, $a2
-; LA64-NEXT:    add.d $a6, $a7, $a6
 ; LA64-NEXT:    mul.d $a7, $a0, $a3
-; LA64-NEXT:    add.d $a5, $a7, $a5
-; LA64-NEXT:    sltu $a7, $a5, $a7
-; LA64-NEXT:    mulh.du $t0, $a0, $a3
-; LA64-NEXT:    add.d $a7, $t0, $a7
-; LA64-NEXT:    add.d $a7, $a6, $a7
-; LA64-NEXT:    mul.d $t0, $a1, $a3
-; LA64-NEXT:    add.d $t1, $t0, $a7
+; LA64-NEXT:    add.d $t0, $a7, $a5
+; LA64-NEXT:    sltu $a5, $a5, $a6
+; LA64-NEXT:    mulh.du $a6, $a1, $a2
+; LA64-NEXT:    add.d $a5, $a6, $a5
+; LA64-NEXT:    sltu $a6, $t0, $a7
+; LA64-NEXT:    mulh.du $a7, $a0, $a3
+; LA64-NEXT:    add.d $a6, $a7, $a6
+; LA64-NEXT:    add.d $a6, $a5, $a6
+; LA64-NEXT:    mul.d $a7, $a1, $a3
+; LA64-NEXT:    add.d $t1, $a7, $a6
 ; LA64-NEXT:    srai.d $t2, $a1, 63
 ; LA64-NEXT:    mul.d $t3, $a2, $t2
 ; LA64-NEXT:    srai.d $t4, $a3, 63
 ; LA64-NEXT:    mul.d $t5, $t4, $a0
 ; LA64-NEXT:    add.d $t6, $t5, $t3
 ; LA64-NEXT:    add.d $t7, $t1, $t6
-; LA64-NEXT:    sltu $t8, $t7, $t1
-; LA64-NEXT:    sltu $t0, $t1, $t0
-; LA64-NEXT:    sltu $a6, $a7, $a6
-; LA64-NEXT:    mulh.du $a7, $a1, $a3
-; LA64-NEXT:    add.d $a6, $a7, $a6
-; LA64-NEXT:    add.d $a6, $a6, $t0
-; LA64-NEXT:    mulh.du $a7, $a2, $t2
-; LA64-NEXT:    add.d $a7, $a7, $t3
+; LA64-NEXT:    sltu $a7, $t1, $a7
+; LA64-NEXT:    sltu $t1, $t7, $t1
+; LA64-NEXT:    sltu $a5, $a6, $a5
+; LA64-NEXT:    mulh.du $a6, $a1, $a3
+; LA64-NEXT:    add.d $a5, $a6, $a5
+; LA64-NEXT:    add.d $a5, $a5, $a7
+; LA64-NEXT:    mulh.du $a2, $a2, $t2
+; LA64-NEXT:    add.d $a2, $a2, $t3
 ; LA64-NEXT:    mul.d $a3, $a3, $t2
-; LA64-NEXT:    add.d $a3, $a7, $a3
+; LA64-NEXT:    add.d $a2, $a2, $a3
 ; LA64-NEXT:    mul.d $a1, $t4, $a1
-; LA64-NEXT:    mulh.du $a7, $t4, $a0
-; LA64-NEXT:    add.d $a1, $a7, $a1
-; LA64-NEXT:    add.d $a1, $a1, $t5
-; LA64-NEXT:    add.d $a1, $a1, $a3
-; LA64-NEXT:    sltu $a3, $t6, $t5
-; LA64-NEXT:    add.d $a1, $a1, $a3
-; LA64-NEXT:    add.d $a1, $a6, $a1
-; LA64-NEXT:    add.d $a1, $a1, $t8
-; LA64-NEXT:    srai.d $a3, $a5, 63
-; LA64-NEXT:    xor $a1, $a1, $a3
-; LA64-NEXT:    xor $a3, $t7, $a3
-; LA64-NEXT:    or $a1, $a3, $a1
-; LA64-NEXT:    sltu $a1, $zero, $a1
-; LA64-NEXT:    mul.d $a0, $a0, $a2
-; LA64-NEXT:    st.d $a0, $a4, 0
-; LA64-NEXT:    st.d $a5, $a4, 8
-; LA64-NEXT:    move $a0, $a1
+; LA64-NEXT:    mulh.du $a0, $t4, $a0
+; LA64-NEXT:    add.d $a0, $a0, $a1
+; LA64-NEXT:    add.d $a0, $a0, $t5
+; LA64-NEXT:    add.d $a0, $a0, $a2
+; LA64-NEXT:    sltu $a1, $t6, $t5
+; LA64-NEXT:    add.d $a0, $a0, $a1
+; LA64-NEXT:    add.d $a0, $a5, $a0
+; LA64-NEXT:    add.d $a0, $a0, $t1
+; LA64-NEXT:    srai.d $a1, $t0, 63
+; LA64-NEXT:    xor $a0, $a0, $a1
+; LA64-NEXT:    xor $a1, $t7, $a1
+; LA64-NEXT:    or $a0, $a1, $a0
+; LA64-NEXT:    sltu $a0, $zero, $a0
+; LA64-NEXT:    st.d $t0, $a4, 8
 ; LA64-NEXT:    ret
   %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2)
   %val = extractvalue {i128, i1} %t, 0
diff --git a/llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll b/llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll
index 4eb34bfa09acb..8728fe5d8280e 100644
--- a/llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll
+++ b/llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll
@@ -7,13 +7,13 @@ define i32 @fptosi_i32_fp128(fp128 %X) nounwind {
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    addi.w $sp, $sp, -32
 ; LA32-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT:    ld.w $a1, $a0, 12
+; LA32-NEXT:    st.w $a1, $sp, 20
+; LA32-NEXT:    ld.w $a1, $a0, 8
+; LA32-NEXT:    st.w $a1, $sp, 16
+; LA32-NEXT:    ld.w $a1, $a0, 4
+; LA32-NEXT:    st.w $a1, $sp, 12
 ; LA32-NEXT:    ld.w $a1, $a0, 0
-; LA32-NEXT:    ld.w $a2, $a0, 4
-; LA32-NEXT:    ld.w $a3, $a0, 8
-; LA32-NEXT:    ld.w $a0, $a0, 12
-; LA32-NEXT:    st.w $a0, $sp, 20
-; LA32-NEXT:    st.w $a3, $sp, 16
-; LA32-NEXT:    st.w $a2, $sp, 12
 ; LA32-NEXT:    addi.w $a0, $sp, 8
 ; LA32-NEXT:    st.w $a1, $sp, 8
 ; LA32-NEXT:    bl %plt(__fixtfsi)
@@ -82,13 +82,13 @@ define i64 @fptosi_i64_fp128(fp128 %X) nounwind {
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    addi.w $sp, $sp, -32
 ; LA32-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT:    ld.w $a1, $a0, 12
+; LA32-NEXT:    st.w $a1, $sp, 12
+; LA32-NEXT:    ld.w $a1, $a0, 8
+; LA32-NEXT:    st.w $a1, $sp, 8
+; LA32-NEXT:    ld.w $a1, $a0, 4
+; LA32-NEXT:    st.w $a1, $sp, 4
 ; LA32-NEXT:    ld.w $a1, $a0, 0
-; LA32-NEXT:    ld.w $a2, $a0, 4
-; LA32-NEXT:    ld.w $a3, $a0, 8
-; LA32-NEXT:    ld.w $a0, $a0, 12
-; LA32-NEXT:    st.w $a0, $sp, 12
-; LA32-NEXT:    st.w $a3, $sp, 8
-; LA32-NEXT:    st.w $a2, $sp, 4
 ; LA32-NEXT:    addi.w $a0, $sp, 0
 ; LA32-NEXT:    st.w $a1, $sp, 0
 ; LA32-NEXT:    bl %plt(__fixtfdi)
diff --git a/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll
index 08534e307e4e0..09cea160f4427 100644
--- a/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll
+++ b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll
@@ -19,13 +19,13 @@ define dso_local ptr @f(i32 noundef signext %i) "frame-pointer"="all" {
 ; CHECK-NEXT:    addi.d $fp, $sp, 48
 ; CHECK-NEXT:    .cfi_def_cfa 22, 0
 ; CHECK-NEXT:    st.d $ra, $fp, -40 # 8-byte Folded Spill
-; CHECK-NEXT:    # kill: def $r5 killed $r4
-; CHECK-NEXT:    st.w $a0, $fp, -28
+; CHECK-NEXT:    move $a1, $a0
 ; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(calls)
-; CHECK-NEXT:    addi.d $a2, $a0, %pc_lo12(calls)
-; CHECK-NEXT:    ld.w $a0, $a2, 0
-; CHECK-NEXT:    addi.d $a1, $a0, 1
-; CHECK-NEXT:    st.w $a1, $a2, 0
+; CHECK-NEXT:    addi.d $a3, $a0, %pc_lo12(calls)
+; CHECK-NEXT:    ld.w $a0, $a3, 0
+; CHECK-NEXT:    addi.d $a2, $a0, 1
+; CHECK-NEXT:    st.w $a2, $a3, 0
+; CHECK-NEXT:    st.w $a1, $fp, -28
 ; CHECK-NEXT:    bnez $a0, .LBB0_2
 ; CHECK-NEXT:    b .LBB0_1
 ; CHECK-NEXT:  .LBB0_1: # %if.then
diff --git a/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll b/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll
index 8dd95befb8278..63407ad003f69 100644
--- a/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll
+++ b/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll
@@ -8,60 +8,74 @@ declare void @foo()
 define i1 @load_store_fcc_reg(float %a, i1 %c) {
 ; LA32-LABEL: load_store_fcc_reg:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    .cfi_def_cfa_offset 16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    st.w $fp, $sp, 8 # 4-byte Folded Spill
-; LA32-NEXT:    fst.d $fs0, $sp, 0 # 8-byte Folded Spill
+; LA32-NEXT:    addi.w $sp, $sp, -32
+; LA32-NEXT:    .cfi_def_cfa_offset 32
+; LA32-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT:    fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT:    fst.d $fs1, $sp, 8 # 8-byte Folded Spill
 ; LA32-NEXT:    .cfi_offset 1, -4
 ; LA32-NEXT:    .cfi_offset 22, -8
 ; LA32-NEXT:    .cfi_offset 56, -16
+; LA32-NEXT:    .cfi_offset 57, -24
 ; LA32-NEXT:    move $fp, $a0
 ; LA32-NEXT:    fmov.s $fs0, $fa0
+; LA32-NEXT:    movgr2fr.w $fs1, $zero
+; LA32-NEXT:    fcmp.cult.s $fcc0, $fs1, $fa0
+; LA32-NEXT:    movcf2gr $a0, $fcc0
+; LA32-NEXT:    st.w $a0, $sp, 4
 ; LA32-NEXT:    bl %plt(foo)
-; LA32-NEXT:    movgr2fr.w $fa0, $zero
-; LA32-NEXT:    fcmp.cult.s $fcc0, $fa0, $fs0
+; LA32-NEXT:    ld.w $a0, $sp, 4
+; LA32-NEXT:    movgr2cf $fcc0, $a0
 ; LA32-NEXT:    bcnez $fcc0, .LBB0_2
 ; LA32-NEXT:  # %bb.1: # %if.then
 ; LA32-NEXT:    move $a0, $fp
 ; LA32-NEXT:    b .LBB0_3
 ; LA32-NEXT:  .LBB0_2: # %if.else
-; LA32-NEXT:    fcmp.cle.s $fcc0, $fs0, $fa0
+; LA32-NEXT:    fcmp.cle.s $fcc0, $fs0, $fs1
 ; LA32-NEXT:    movcf2gr $a0, $fcc0
 ; LA32-NEXT:  .LBB0_3: # %if.then
-; LA32-NEXT:    fld.d $fs0, $sp, 0 # 8-byte Folded Reload
-; LA32-NEXT:    ld.w $fp, $sp, 8 # 4-byte Folded Reload
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
+; LA32-NEXT:    fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT:    fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 32
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: load_store_fcc_reg:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    addi.d $sp, $sp, -32
-; LA64-NEXT:    .cfi_def_cfa_offset 32
-; LA64-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
-; LA64-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
-; LA64-NEXT:    fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:    addi.d $sp, $sp, -48
+; LA64-NEXT:    .cfi_def_cfa_offset 48
+; LA64-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64-NEXT:    fst.d $fs0, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT:    fst.d $fs1, $sp, 16 # 8-byte Folded Spill
 ; LA64-NEXT:    .cfi_offset 1, -8
 ; LA64-NEXT:    .cfi_offset 22, -16
 ; LA64-NEXT:    .cfi_offset 56, -24
+; LA64-NEXT:    .cfi_offset 57, -32
 ; LA64-NEXT:    move $fp, $a0
 ; LA64-NEXT:    fmov.s $fs0, $fa0
+; LA64-NEXT:    movgr2fr.w $fs1, $zero
+; LA64-NEXT:    fcmp.cult.s $fcc0, $fs1, $fa0
+; LA64-NEXT:    movcf2gr $a0, $fcc0
+; LA64-NEXT:    st.d $a0, $sp, 8
 ; LA64-NEXT:    bl %plt(foo)
-; LA64-NEXT:    movgr2fr.w $fa0, $zero
-; LA64-NEXT:    fcmp.cult.s $fcc0, $fa0, $fs0
+; LA64-NEXT:    ld.d $a0, $sp, 8
+; LA64-NEXT:    movgr2cf $fcc0, $a0
 ; LA64-NEXT:    bcnez $fcc0, .LBB0_2
 ; LA64-NEXT:  # %bb.1: # %if.then
 ; LA64-NEXT:    move $a0, $fp
 ; LA64-NEXT:    b .LBB0_3
 ; LA64-NEXT:  .LBB0_2: # %if.else
-; LA64-NEXT:    fcmp.cle.s $fcc0, $fs0, $fa0
+; LA64-NEXT:    fcmp.cle.s $fcc0, $fs0, $fs1
 ; LA64-NEXT:    movcf2gr $a0, $fcc0
 ; LA64-NEXT:  .LBB0_3: # %if.then
-; LA64-NEXT:    fld.d $fs0, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
-; LA64-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
-; LA64-NEXT:    addi.d $sp, $sp, 32
+; LA64-NEXT:    fld.d $fs1, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT:    fld.d $fs0, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT:    addi.d $sp, $sp, 48
 ; LA64-NEXT:    ret
   %cmp = fcmp ole float %a, 0.000000e+00
   call void @foo()
diff --git a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
index 0a401ebe5f6b2..1dfb91856698c 100644
--- a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
@@ -17,69 +17,69 @@ define void @test_zero(ptr %P, ptr %S) nounwind {
 ; LA32F-LABEL: test_zero:
 ; LA32F:       # %bb.0:
 ; LA32F-NEXT:    fld.s $fa0, $a0, 12
-; LA32F-NEXT:    fld.s $fa1, $a0, 0
-; LA32F-NEXT:    fld.s $fa2, $a0, 4
-; LA32F-NEXT:    fld.s $fa3, $a0, 8
-; LA32F-NEXT:    movgr2fr.w $fa4, $zero
-; LA32F-NEXT:    fadd.s $fa1, $fa1, $fa4
-; LA32F-NEXT:    fadd.s $fa2, $fa2, $fa4
-; LA32F-NEXT:    fadd.s $fa3, $fa3, $fa4
-; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa4
+; LA32F-NEXT:    movgr2fr.w $fa1, $zero
+; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa1
 ; LA32F-NEXT:    fst.s $fa0, $a1, 12
-; LA32F-NEXT:    fst.s $fa3, $a1, 8
-; LA32F-NEXT:    fst.s $fa2, $a1, 4
-; LA32F-NEXT:    fst.s $fa1, $a1, 0
+; LA32F-NEXT:    fld.s $fa0, $a0, 8
+; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    fst.s $fa0, $a1, 8
+; LA32F-NEXT:    fld.s $fa0, $a0, 4
+; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    fst.s $fa0, $a1, 4
+; LA32F-NEXT:    fld.s $fa0, $a0, 0
+; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    fst.s $fa0, $a1, 0
 ; LA32F-NEXT:    ret
 ;
 ; LA32D-LABEL: test_zero:
 ; LA32D:       # %bb.0:
 ; LA32D-NEXT:    fld.s $fa0, $a0, 12
-; LA32D-NEXT:    fld.s $fa1, $a0, 0
-; LA32D-NEXT:    fld.s $fa2, $a0, 4
-; LA32D-NEXT:    fld.s $fa3, $a0, 8
-; LA32D-NEXT:    movgr2fr.w $fa4, $zero
-; LA32D-NEXT:    fadd.s $fa1, $fa1, $fa4
-; LA32D-NEXT:    fadd.s $fa2, $fa2, $fa4
-; LA32D-NEXT:    fadd.s $fa3, $fa3, $fa4
-; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa4
+; LA32D-NEXT:    movgr2fr.w $fa1, $zero
+; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa1
 ; LA32D-NEXT:    fst.s $fa0, $a1, 12
-; LA32D-NEXT:    fst.s $fa3, $a1, 8
-; LA32D-NEXT:    fst.s $fa2, $a1, 4
-; LA32D-NEXT:    fst.s $fa1, $a1, 0
+; LA32D-NEXT:    fld.s $fa0, $a0, 8
+; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    fst.s $fa0, $a1, 8
+; LA32D-NEXT:    fld.s $fa0, $a0, 4
+; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    fst.s $fa0, $a1, 4
+; LA32D-NEXT:    fld.s $fa0, $a0, 0
+; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    fst.s $fa0, $a1, 0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: test_zero:
 ; LA64F:       # %bb.0:
 ; LA64F-NEXT:    fld.s $fa0, $a0, 12
-; LA64F-NEXT:    fld.s $fa1, $a0, 0
-; LA64F-NEXT:    fld.s $fa2, $a0, 4
-; LA64F-NEXT:    fld.s $fa3, $a0, 8
-; LA64F-NEXT:    movgr2fr.w $fa4, $zero
-; LA64F-NEXT:    fadd.s $fa1, $fa1, $fa4
-; LA64F-NEXT:    fadd.s $fa2, $fa2, $fa4
-; LA64F-NEXT:    fadd.s $fa3, $fa3, $fa4
-; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa4
+; LA64F-NEXT:    movgr2fr.w $fa1, $zero
+; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa1
 ; LA64F-NEXT:    fst.s $fa0, $a1, 12
-; LA64F-NEXT:    fst.s $fa3, $a1, 8
-; LA64F-NEXT:    fst.s $fa2, $a1, 4
-; LA64F-NEXT:    fst.s $fa1, $a1, 0
+; LA64F-NEXT:    fld.s $fa0, $a0, 8
+; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    fst.s $fa0, $a1, 8
+; LA64F-NEXT:    fld.s $fa0, $a0, 4
+; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    fst.s $fa0, $a1, 4
+; LA64F-NEXT:    fld.s $fa0, $a0, 0
+; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    fst.s $fa0, $a1, 0
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: test_zero:
 ; LA64D:       # %bb.0:
 ; LA64D-NEXT:    fld.s $fa0, $a0, 12
-; LA64D-NEXT:    fld.s $fa1, $a0, 0
-; LA64D-NEXT:    fld.s $fa2, $a0, 4
-; LA64D-NEXT:    fld.s $fa3, $a0, 8
-; LA64D-NEXT:    movgr2fr.w $fa4, $zero
-; LA64D-NEXT:    fadd.s $fa1, $fa1, $fa4
-; LA64D-NEXT:    fadd.s $fa2, $fa2, $fa4
-; LA64D-NEXT:    fadd.s $fa3, $fa3, $fa4
-; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa4
+; LA64D-NEXT:    movgr2fr.w $fa1, $zero
+; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa1
 ; LA64D-NEXT:    fst.s $fa0, $a1, 12
-; LA64D-NEXT:    fst.s $fa3, $a1, 8
-; LA64D-NEXT:    fst.s $fa2, $a1, 4
-; LA64D-NEXT:    fst.s $fa1, $a1, 0
+; LA64D-NEXT:    fld.s $fa0, $a0, 8
+; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    fst.s $fa0, $a1, 8
+; LA64D-NEXT:    fld.s $fa0, $a0, 4
+; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    fst.s $fa0, $a1, 4
+; LA64D-NEXT:    fld.s $fa0, $a0, 0
+; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    fst.s $fa0, $a1, 0
 ; LA64D-NEXT:    ret
   %p = load %f4, ptr %P
   %R = fadd %f4 %p, zeroinitializer
@@ -90,66 +90,66 @@ define void @test_zero(ptr %P, ptr %S) nounwind {
 define void @test_f2(ptr %P, ptr %S) nounwind {
 ; LA32F-LABEL: test_f2:
 ; LA32F:       # %bb.0:
-; LA32F-NEXT:    fld.s $fa0, $a0, 4
-; LA32F-NEXT:    fld.s $fa1, $a0, 0
-; LA32F-NEXT:    addi.w $a0, $zero, 1
 ; LA32F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI1_0)
 ; LA32F-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI1_0)
-; LA32F-NEXT:    fld.s $fa2, $a2, 0
-; LA32F-NEXT:    movgr2fr.w $fa3, $a0
-; LA32F-NEXT:    ffint.s.w $fa3, $fa3
-; LA32F-NEXT:    fadd.s $fa1, $fa1, $fa3
-; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa2
+; LA32F-NEXT:    fld.s $fa0, $a2, 0
+; LA32F-NEXT:    fld.s $fa1, $a0, 4
+; LA32F-NEXT:    fadd.s $fa0, $fa1, $fa0
 ; LA32F-NEXT:    fst.s $fa0, $a1, 4
-; LA32F-NEXT:    fst.s $fa1, $a1, 0
+; LA32F-NEXT:    fld.s $fa0, $a0, 0
+; LA32F-NEXT:    addi.w $a0, $zero, 1
+; LA32F-NEXT:    movgr2fr.w $fa1, $a0
+; LA32F-NEXT:    ffint.s.w $fa1, $fa1
+; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    fst.s $fa0, $a1, 0
 ; LA32F-NEXT:    ret
 ;
 ; LA32D-LABEL: test_f2:
 ; LA32D:       # %bb.0:
-; LA32D-NEXT:    fld.s $fa0, $a0, 4
-; LA32D-NEXT:    fld.s $fa1, $a0, 0
-; LA32D-NEXT:    addi.w $a0, $zero, 1
 ; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI1_0)
 ; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI1_0)
-; LA32D-NEXT:    fld.s $fa2, $a2, 0
-; LA32D-NEXT:    movgr2fr.w $fa3, $a0
-; LA32D-NEXT:    ffint.s.w $fa3, $fa3
-; LA32D-NEXT:    fadd.s $fa1, $fa1, $fa3
-; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa2
+; LA32D-NEXT:    fld.s $fa0, $a2, 0
+; LA32D-NEXT:    fld.s $fa1, $a0, 4
+; LA32D-NEXT:    fadd.s $fa0, $fa1, $fa0
 ; LA32D-NEXT:    fst.s $fa0, $a1, 4
-; LA32D-NEXT:    fst.s $fa1, $a1, 0
+; LA32D-NEXT:    fld.s $fa0, $a0, 0
+; LA32D-NEXT:    addi.w $a0, $zero, 1
+; LA32D-NEXT:    movgr2fr.w $fa1, $a0
+; LA32D-NEXT:    ffint.s.w $fa1, $fa1
+; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    fst.s $fa0, $a1, 0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: test_f2:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    fld.s $fa0, $a0, 4
-; LA64F-NEXT:    fld.s $fa1, $a0, 0
-; LA64F-NEXT:    addi.w $a0, $zero, 1
 ; LA64F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI1_0)
 ; LA64F-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI1_0)
-; LA64F-NEXT:    fld.s $fa2, $a2, 0
-; LA64F-NEXT:    movgr2fr.w $fa3, $a0
-; LA64F-NEXT:    ffint.s.w $fa3, $fa3
-; LA64F-NEXT:    fadd.s $fa1, $fa1, $fa3
-; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa2
+; LA64F-NEXT:    fld.s $fa0, $a2, 0
+; LA64F-NEXT:    fld.s $fa1, $a0, 4
+; LA64F-NEXT:    fadd.s $fa0, $fa1, $fa0
 ; LA64F-NEXT:    fst.s $fa0, $a1, 4
-; LA64F-NEXT:    fst.s $fa1, $a1, 0
+; LA64F-NEXT:    fld.s $fa0, $a0, 0
+; LA64F-NEXT:    addi.w $a0, $zero, 1
+; LA64F-NEXT:    movgr2fr.w $fa1, $a0
+; LA64F-NEXT:    ffint.s.w $fa1, $fa1
+; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    fst.s $fa0, $a1, 0
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: test_f2:
 ; LA64D:       # %bb.0:
-; LA64D-NEXT:    fld.s $fa0, $a0, 4
-; LA64D-NEXT:    fld.s $fa1, $a0, 0
-; LA64D-NEXT:    addi.w $a0, $zero, 1
 ; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI1_0)
 ; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI1_0)
-; LA64D-NEXT:    fld.s $fa2, $a2, 0
-; LA64D-NEXT:    movgr2fr.w $fa3, $a0
-; LA64D-NEXT:    ffint.s.w $fa3, $fa3
-; LA64D-NEXT:    fadd.s $fa1, $fa1, $fa3
-; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa2
+; LA64D-NEXT:    fld.s $fa0, $a2, 0
+; LA64D-NEXT:    fld.s $fa1, $a0, 4
+; LA64D-NEXT:    fadd.s $fa0, $fa1, $fa0
 ; LA64D-NEXT:    fst.s $fa0, $a1, 4
-; LA64D-NEXT:    fst.s $fa1, $a1, 0
+; LA64D-NEXT:    fld.s $fa0, $a0, 0
+; LA64D-NEXT:    addi.w $a0, $zero, 1
+; LA64D-NEXT:    movgr2fr.w $fa1, $a0
+; LA64D-NEXT:    ffint.s.w $fa1, $fa1
+; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    fst.s $fa0, $a1, 0
 ; LA64D-NEXT:    ret
   %p = load %f2, ptr %P
   %R = fadd %f2 %p, < float 1.000000e+00, float 2.000000e+00 >
@@ -160,114 +160,114 @@ define void @test_f2(ptr %P, ptr %S) nounwind {
 define void @test_f4(ptr %P, ptr %S) nounwind {
 ; LA32F-LABEL: test_f4:
 ; LA32F:       # %bb.0:
-; LA32F-NEXT:    fld.s $fa0, $a0, 12
+; LA32F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_0)
+; LA32F-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI2_0)
+; LA32F-NEXT:    fld.s $fa0, $a2, 0
+; LA32F-NEXT:    fld.s $fa1, $a0, 12
+; LA32F-NEXT:    fadd.s $fa0, $fa1, $fa0
+; LA32F-NEXT:    fst.s $fa0, $a1, 12
+; LA32F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_1)
+; LA32F-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI2_1)
+; LA32F-NEXT:    fld.s $fa0, $a2, 0
 ; LA32F-NEXT:    fld.s $fa1, $a0, 8
-; LA32F-NEXT:    fld.s $fa2, $a0, 4
-; LA32F-NEXT:    fld.s $fa3, $a0, 0
+; LA32F-NEXT:    fadd.s $fa0, $fa1, $fa0
+; LA32F-NEXT:    fst.s $fa0, $a1, 8
+; LA32F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_2)
+; LA32F-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI2_2)
+; LA32F-NEXT:    fld.s $fa0, $a2, 0
+; LA32F-NEXT:    fld.s $fa1, $a0, 4
+; LA32F-NEXT:    fadd.s $fa0, $fa1, $fa0
+; LA32F-NEXT:    fst.s $fa0, $a1, 4
+; LA32F-NEXT:    fld.s $fa0, $a0, 0
 ; LA32F-NEXT:    addi.w $a0, $zero, 1
-; LA32F-NEXT:    movgr2fr.w $fa4, $a0
-; LA32F-NEXT:    ffint.s.w $fa4, $fa4
-; LA32F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; LA32F-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI2_0)
-; LA32F-NEXT:    fld.s $fa5, $a0, 0
-; LA32F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_1)
-; LA32F-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI2_1)
-; LA32F-NEXT:    fld.s $fa6, $a0, 0
-; LA32F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_2)
-; LA32F-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI2_2)
-; LA32F-NEXT:    fld.s $fa7, $a0, 0
-; LA32F-NEXT:    fadd.s $fa3, $fa3, $fa4
-; LA32F-NEXT:    fadd.s $fa2, $fa2, $fa5
-; LA32F-NEXT:    fadd.s $fa1, $fa1, $fa6
-; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa7
-; LA32F-NEXT:    fst.s $fa0, $a1, 12
-; LA32F-NEXT:    fst.s $fa1, $a1, 8
-; LA32F-NEXT:    fst.s $fa2, $a1, 4
-; LA32F-NEXT:    fst.s $fa3, $a1, 0
+; LA32F-NEXT:    movgr2fr.w $fa1, $a0
+; LA32F-NEXT:    ffint.s.w $fa1, $fa1
+; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    fst.s $fa0, $a1, 0
 ; LA32F-NEXT:    ret
 ;
 ; LA32D-LABEL: test_f4:
 ; LA32D:       # %bb.0:
-; LA32D-NEXT:    fld.s $fa0, $a0, 12
+; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_0)
+; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI2_0)
+; LA32D-NEXT:    fld.s $fa0, $a2, 0
+; LA32D-NEXT:    fld.s $fa1, $a0, 12
+; LA32D-NEXT:    fadd.s $fa0, $fa1, $fa0
+; LA32D-NEXT:    fst.s $fa0, $a1, 12
+; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_1)
+; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI2_1)
+; LA32D-NEXT:    fld.s $fa0, $a2, 0
 ; LA32D-NEXT:    fld.s $fa1, $a0, 8
-; LA32D-NEXT:    fld.s $fa2, $a0, 4
-; LA32D-NEXT:    fld.s $fa3, $a0, 0
+; LA32D-NEXT:    fadd.s $fa0, $fa1, $fa0
+; LA32D-NEXT:    fst.s $fa0, $a1, 8
+; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_2)
+; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI2_2)
+; LA32D-NEXT:    fld.s $fa0, $a2, 0
+; LA32D-NEXT:    fld.s $fa1, $a0, 4
+; LA32D-NEXT:    fadd.s $fa0, $fa1, $fa0
+; LA32D-NEXT:    fst.s $fa0, $a1, 4
+; LA32D-NEXT:    fld.s $fa0, $a0, 0
 ; LA32D-NEXT:    addi.w $a0, $zero, 1
-; LA32D-NEXT:    movgr2fr.w $fa4, $a0
-; LA32D-NEXT:    ffint.s.w $fa4, $fa4
-; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI2_0)
-; LA32D-NEXT:    fld.s $fa5, $a0, 0
-; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_1)
-; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI2_1)
-; LA32D-NEXT:    fld.s $fa6, $a0, 0
-; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_2)
-; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI2_2)
-; LA32D-NEXT:    fld.s $fa7, $a0, 0
-; LA32D-NEXT:    fadd.s $fa3, $fa3, $fa4
-; LA32D-NEXT:    fadd.s $fa2, $fa2, $fa5
-; LA32D-NEXT:    fadd.s $fa1, $fa1, $fa6
-; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa7
-; LA32D-NEXT:    fst.s $fa0, $a1, 12
-; LA32D-NEXT:    fst.s $fa1, $a1, 8
-; LA32D-NEXT:    fst.s $fa2, $a1, 4
-; LA32D-NEXT:    fst.s $fa3, $a1, 0
+; LA32D-NEXT:    movgr2fr.w $fa1, $a0
+; LA32D-NEXT:    ffint.s.w $fa1, $fa1
+; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    fst.s $fa0, $a1, 0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: test_f4:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    fld.s $fa0, $a0, 12
+; LA64F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_0)
+; LA64F-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI2_0)
+; LA64F-NEXT:    fld.s $fa0, $a2, 0
+; LA64F-NEXT:    fld.s $fa1, $a0, 12
+; LA64F-NEXT:    fadd.s $fa0, $fa1, $fa0
+; LA64F-NEXT:    fst.s $fa0, $a1, 12
+; LA64F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_1)
+; LA64F-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI2_1)
+; LA64F-NEXT:    fld.s $fa0, $a2, 0
 ; LA64F-NEXT:    fld.s $fa1, $a0, 8
-; LA64F-NEXT:    fld.s $fa2, $a0, 4
-; LA64F-NEXT:    fld.s $fa3, $a0, 0
+; LA64F-NEXT:    fadd.s $fa0, $fa1, $fa0
+; LA64F-NEXT:    fst.s $fa0, $a1, 8
+; LA64F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_2)
+; LA64F-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI2_2)
+; LA64F-NEXT:    fld.s $fa0, $a2, 0
+; LA64F-NEXT:    fld.s $fa1, $a0, 4
+; LA64F-NEXT:    fadd.s $fa0, $fa1, $fa0
+; LA64F-NEXT:    fst.s $fa0, $a1, 4
+; LA64F-NEXT:    fld.s $fa0, $a0, 0
 ; LA64F-NEXT:    addi.w $a0, $zero, 1
-; LA64F-NEXT:    movgr2fr.w $fa4, $a0
-; LA64F-NEXT:    ffint.s.w $fa4, $fa4
-; LA64F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; LA64F-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI2_0)
-; LA64F-NEXT:    fld.s $fa5, $a0, 0
-; LA64F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_1)
-; LA64F-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI2_1)
-; LA64F-NEXT:    fld.s $fa6, $a0, 0
-; LA64F-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_2)
-; LA64F-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI2_2)
-; LA64F-NEXT:    fld.s $fa7, $a0, 0
-; LA64F-NEXT:    fadd.s $fa3, $fa3, $fa4
-; LA64F-NEXT:    fadd.s $fa2, $fa2, $fa5
-; LA64F-NEXT:    fadd.s $fa1, $fa1, $fa6
-; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa7
-; LA64F-NEXT:    fst.s $fa0, $a1, 12
-; LA64F-NEXT:    fst.s $fa1, $a1, 8
-; LA64F-NEXT:    fst.s $fa2, $a1, 4
-; LA64F-NEXT:    fst.s $fa3, $a1, 0
+; LA64F-NEXT:    movgr2fr.w $fa1, $a0
+; LA64F-NEXT:    ffint.s.w $fa1, $fa1
+; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    fst.s $fa0, $a1, 0
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: test_f4:
 ; LA64D:       # %bb.0:
-; LA64D-NEXT:    fld.s $fa0, $a0, 12
+; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_0)
+; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI2_0)
+; LA64D-NEXT:    fld.s $fa0, $a2, 0
+; LA64D-NEXT:    fld.s $fa1, $a0, 12
+; LA64D-NEXT:    fadd.s $fa0, $fa1, $fa0
+; LA64D-NEXT:    fst.s $fa0, $a1, 12
+; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_1)
+; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI2_1)
+; LA64D-NEXT:    fld.s $fa0, $a2, 0
 ; LA64D-NEXT:    fld.s $fa1, $a0, 8
-; LA64D-NEXT:    fld.s $fa2, $a0, 4
-; LA64D-NEXT:    fld.s $fa3, $a0, 0
+; LA64D-NEXT:    fadd.s $fa0, $fa1, $fa0
+; LA64D-NEXT:    fst.s $fa0, $a1, 8
+; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_2)
+; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI2_2)
+; LA64D-NEXT:    fld.s $fa0, $a2, 0
+; LA64D-NEXT:    fld.s $fa1, $a0, 4
+; LA64D-NEXT:    fadd.s $fa0, $fa1, $fa0
+; LA64D-NEXT:    fst.s $fa0, $a1, 4
+; LA64D-NEXT:    fld.s $fa0, $a0, 0
 ; LA64D-NEXT:    addi.w $a0, $zero, 1
-; LA64D-NEXT:    movgr2fr.w $fa4, $a0
-; LA64D-NEXT:    ffint.s.w $fa4, $fa4
-; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI2_0)
-; LA64D-NEXT:    fld.s $fa5, $a0, 0
-; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_1)
-; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI2_1)
-; LA64D-NEXT:    fld.s $fa6, $a0, 0
-; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_2)
-; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI2_2)
-; LA64D-NEXT:    fld.s $fa7, $a0, 0
-; LA64D-NEXT:    fadd.s $fa3, $fa3, $fa4
-; LA64D-NEXT:    fadd.s $fa2, $fa2, $fa5
-; LA64D-NEXT:    fadd.s $fa1, $fa1, $fa6
-; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa7
-; LA64D-NEXT:    fst.s $fa0, $a1, 12
-; LA64D-NEXT:    fst.s $fa1, $a1, 8
-; LA64D-NEXT:    fst.s $fa2, $a1, 4
-; LA64D-NEXT:    fst.s $fa3, $a1, 0
+; LA64D-NEXT:    movgr2fr.w $fa1, $a0
+; LA64D-NEXT:    ffint.s.w $fa1, $fa1
+; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    fst.s $fa0, $a1, 0
 ; LA64D-NEXT:    ret
   %p = load %f4, ptr %P
   %R = fadd %f4 %p, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
@@ -278,162 +278,162 @@ define void @test_f4(ptr %P, ptr %S) nounwind {
 define void @test_f8(ptr %P, ptr %S) nounwind {
 ; LA32F-LABEL: test_f8:
 ; LA32F:       # %bb.0:
-; LA32F-NEXT:    addi.w $a2, $zero, 1
-; LA32F-NEXT:    movgr2fr.w $fa0, $a2
 ; LA32F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_0)
 ; LA32F-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI3_0)
-; LA32F-NEXT:    fld.s $fa1, $a2, 0
+; LA32F-NEXT:    fld.s $fa0, $a2, 0
+; LA32F-NEXT:    fld.s $fa1, $a0, 28
+; LA32F-NEXT:    fadd.s $fa1, $fa1, $fa0
+; LA32F-NEXT:    fst.s $fa1, $a1, 28
 ; LA32F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_1)
 ; LA32F-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI3_1)
-; LA32F-NEXT:    fld.s $fa2, $a2, 0
+; LA32F-NEXT:    fld.s $fa1, $a2, 0
+; LA32F-NEXT:    fld.s $fa2, $a0, 24
+; LA32F-NEXT:    fadd.s $fa2, $fa2, $fa1
+; LA32F-NEXT:    fst.s $fa2, $a1, 24
 ; LA32F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_2)
 ; LA32F-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI3_2)
-; LA32F-NEXT:    fld.s $fa3, $a2, 0
-; LA32F-NEXT:    fld.s $fa4, $a0, 28
-; LA32F-NEXT:    fld.s $fa5, $a0, 24
-; LA32F-NEXT:    fld.s $fa6, $a0, 12
-; LA32F-NEXT:    fld.s $fa7, $a0, 8
-; LA32F-NEXT:    fld.s $ft0, $a0, 0
-; LA32F-NEXT:    fld.s $ft1, $a0, 16
-; LA32F-NEXT:    fld.s $ft2, $a0, 4
-; LA32F-NEXT:    ffint.s.w $fa0, $fa0
-; LA32F-NEXT:    fadd.s $ft0, $ft0, $fa0
-; LA32F-NEXT:    fadd.s $fa0, $ft1, $fa0
-; LA32F-NEXT:    fld.s $ft1, $a0, 20
-; LA32F-NEXT:    fadd.s $ft2, $ft2, $fa1
-; LA32F-NEXT:    fadd.s $fa7, $fa7, $fa2
-; LA32F-NEXT:    fadd.s $fa6, $fa6, $fa3
-; LA32F-NEXT:    fadd.s $fa1, $ft1, $fa1
-; LA32F-NEXT:    fadd.s $fa2, $fa5, $fa2
-; LA32F-NEXT:    fadd.s $fa3, $fa4, $fa3
-; LA32F-NEXT:    fst.s $fa3, $a1, 28
-; LA32F-NEXT:    fst.s $fa2, $a1, 24
-; LA32F-NEXT:    fst.s $fa1, $a1, 20
-; LA32F-NEXT:    fst.s $fa6, $a1, 12
-; LA32F-NEXT:    fst.s $fa7, $a1, 8
-; LA32F-NEXT:    fst.s $ft2, $a1, 4
+; LA32F-NEXT:    fld.s $fa2, $a2, 0
+; LA32F-NEXT:    fld.s $fa3, $a0, 20
+; LA32F-NEXT:    fadd.s $fa3, $fa3, $fa2
+; LA32F-NEXT:    fst.s $fa3, $a1, 20
+; LA32F-NEXT:    fld.s $fa3, $a0, 12
+; LA32F-NEXT:    fadd.s $fa0, $fa3, $fa0
+; LA32F-NEXT:    fst.s $fa0, $a1, 12
+; LA32F-NEXT:    fld.s $fa0, $a0, 8
+; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    fst.s $fa0, $a1, 8
+; LA32F-NEXT:    fld.s $fa0, $a0, 4
+; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa2
+; LA32F-NEXT:    fst.s $fa0, $a1, 4
+; LA32F-NEXT:    fld.s $fa0, $a0, 16
+; LA32F-NEXT:    addi.w $a2, $zero, 1
+; LA32F-NEXT:    movgr2fr.w $fa1, $a2
+; LA32F-NEXT:    ffint.s.w $fa1, $fa1
+; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa1
 ; LA32F-NEXT:    fst.s $fa0, $a1, 16
-; LA32F-NEXT:    fst.s $ft0, $a1, 0
+; LA32F-NEXT:    fld.s $fa0, $a0, 0
+; LA32F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    fst.s $fa0, $a1, 0
 ; LA32F-NEXT:    ret
 ;
 ; LA32D-LABEL: test_f8:
 ; LA32D:       # %bb.0:
-; LA32D-NEXT:    addi.w $a2, $zero, 1
-; LA32D-NEXT:    movgr2fr.w $fa0, $a2
 ; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_0)
 ; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI3_0)
-; LA32D-NEXT:    fld.s $fa1, $a2, 0
+; LA32D-NEXT:    fld.s $fa0, $a2, 0
+; LA32D-NEXT:    fld.s $fa1, $a0, 28
+; LA32D-NEXT:    fadd.s $fa1, $fa1, $fa0
+; LA32D-NEXT:    fst.s $fa1, $a1, 28
 ; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_1)
 ; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI3_1)
-; LA32D-NEXT:    fld.s $fa2, $a2, 0
+; LA32D-NEXT:    fld.s $fa1, $a2, 0
+; LA32D-NEXT:    fld.s $fa2, $a0, 24
+; LA32D-NEXT:    fadd.s $fa2, $fa2, $fa1
+; LA32D-NEXT:    fst.s $fa2, $a1, 24
 ; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_2)
 ; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI3_2)
-; LA32D-NEXT:    fld.s $fa3, $a2, 0
-; LA32D-NEXT:    fld.s $fa4, $a0, 28
-; LA32D-NEXT:    fld.s $fa5, $a0, 24
-; LA32D-NEXT:    fld.s $fa6, $a0, 12
-; LA32D-NEXT:    fld.s $fa7, $a0, 8
-; LA32D-NEXT:    fld.s $ft0, $a0, 0
-; LA32D-NEXT:    fld.s $ft1, $a0, 16
-; LA32D-NEXT:    fld.s $ft2, $a0, 4
-; LA32D-NEXT:    ffint.s.w $fa0, $fa0
-; LA32D-NEXT:    fadd.s $ft0, $ft0, $fa0
-; LA32D-NEXT:    fadd.s $fa0, $ft1, $fa0
-; LA32D-NEXT:    fld.s $ft1, $a0, 20
-; LA32D-NEXT:    fadd.s $ft2, $ft2, $fa1
-; LA32D-NEXT:    fadd.s $fa7, $fa7, $fa2
-; LA32D-NEXT:    fadd.s $fa6, $fa6, $fa3
-; LA32D-NEXT:    fadd.s $fa1, $ft1, $fa1
-; LA32D-NEXT:    fadd.s $fa2, $fa5, $fa2
-; LA32D-NEXT:    fadd.s $fa3, $fa4, $fa3
-; LA32D-NEXT:    fst.s $fa3, $a1, 28
-; LA32D-NEXT:    fst.s $fa2, $a1, 24
-; LA32D-NEXT:    fst.s $fa1, $a1, 20
-; LA32D-NEXT:    fst.s $fa6, $a1, 12
-; LA32D-NEXT:    fst.s $fa7, $a1, 8
-; LA32D-NEXT:    fst.s $ft2, $a1, 4
+; LA32D-NEXT:    fld.s $fa2, $a2, 0
+; LA32D-NEXT:    fld.s $fa3, $a0, 20
+; LA32D-NEXT:    fadd.s $fa3, $fa3, $fa2
+; LA32D-NEXT:    fst.s $fa3, $a1, 20
+; LA32D-NEXT:    fld.s $fa3, $a0, 12
+; LA32D-NEXT:    fadd.s $fa0, $fa3, $fa0
+; LA32D-NEXT:    fst.s $fa0, $a1, 12
+; LA32D-NEXT:    fld.s $fa0, $a0, 8
+; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    fst.s $fa0, $a1, 8
+; LA32D-NEXT:    fld.s $fa0, $a0, 4
+; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa2
+; LA32D-NEXT:    fst.s $fa0, $a1, 4
+; LA32D-NEXT:    fld.s $fa0, $a0, 16
+; LA32D-NEXT:    addi.w $a2, $zero, 1
+; LA32D-NEXT:    movgr2fr.w $fa1, $a2
+; LA32D-NEXT:    ffint.s.w $fa1, $fa1
+; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa1
 ; LA32D-NEXT:    fst.s $fa0, $a1, 16
-; LA32D-NEXT:    fst.s $ft0, $a1, 0
+; LA32D-NEXT:    fld.s $fa0, $a0, 0
+; LA32D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    fst.s $fa0, $a1, 0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: test_f8:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    addi.w $a2, $zero, 1
-; LA64F-NEXT:    movgr2fr.w $fa0, $a2
 ; LA64F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_0)
 ; LA64F-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI3_0)
-; LA64F-NEXT:    fld.s $fa1, $a2, 0
+; LA64F-NEXT:    fld.s $fa0, $a2, 0
+; LA64F-NEXT:    fld.s $fa1, $a0, 28
+; LA64F-NEXT:    fadd.s $fa1, $fa1, $fa0
+; LA64F-NEXT:    fst.s $fa1, $a1, 28
 ; LA64F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_1)
 ; LA64F-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI3_1)
-; LA64F-NEXT:    fld.s $fa2, $a2, 0
+; LA64F-NEXT:    fld.s $fa1, $a2, 0
+; LA64F-NEXT:    fld.s $fa2, $a0, 24
+; LA64F-NEXT:    fadd.s $fa2, $fa2, $fa1
+; LA64F-NEXT:    fst.s $fa2, $a1, 24
 ; LA64F-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_2)
 ; LA64F-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI3_2)
-; LA64F-NEXT:    fld.s $fa3, $a2, 0
-; LA64F-NEXT:    fld.s $fa4, $a0, 28
-; LA64F-NEXT:    fld.s $fa5, $a0, 24
-; LA64F-NEXT:    fld.s $fa6, $a0, 12
-; LA64F-NEXT:    fld.s $fa7, $a0, 8
-; LA64F-NEXT:    fld.s $ft0, $a0, 0
-; LA64F-NEXT:    fld.s $ft1, $a0, 16
-; LA64F-NEXT:    fld.s $ft2, $a0, 4
-; LA64F-NEXT:    ffint.s.w $fa0, $fa0
-; LA64F-NEXT:    fadd.s $ft0, $ft0, $fa0
-; LA64F-NEXT:    fadd.s $fa0, $ft1, $fa0
-; LA64F-NEXT:    fld.s $ft1, $a0, 20
-; LA64F-NEXT:    fadd.s $ft2, $ft2, $fa1
-; LA64F-NEXT:    fadd.s $fa7, $fa7, $fa2
-; LA64F-NEXT:    fadd.s $fa6, $fa6, $fa3
-; LA64F-NEXT:    fadd.s $fa1, $ft1, $fa1
-; LA64F-NEXT:    fadd.s $fa2, $fa5, $fa2
-; LA64F-NEXT:    fadd.s $fa3, $fa4, $fa3
-; LA64F-NEXT:    fst.s $fa3, $a1, 28
-; LA64F-NEXT:    fst.s $fa2, $a1, 24
-; LA64F-NEXT:    fst.s $fa1, $a1, 20
-; LA64F-NEXT:    fst.s $fa6, $a1, 12
-; LA64F-NEXT:    fst.s $fa7, $a1, 8
-; LA64F-NEXT:    fst.s $ft2, $a1, 4
+; LA64F-NEXT:    fld.s $fa2, $a2, 0
+; LA64F-NEXT:    fld.s $fa3, $a0, 20
+; LA64F-NEXT:    fadd.s $fa3, $fa3, $fa2
+; LA64F-NEXT:    fst.s $fa3, $a1, 20
+; LA64F-NEXT:    fld.s $fa3, $a0, 12
+; LA64F-NEXT:    fadd.s $fa0, $fa3, $fa0
+; LA64F-NEXT:    fst.s $fa0, $a1, 12
+; LA64F-NEXT:    fld.s $fa0, $a0, 8
+; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    fst.s $fa0, $a1, 8
+; LA64F-NEXT:    fld.s $fa0, $a0, 4
+; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa2
+; LA64F-NEXT:    fst.s $fa0, $a1, 4
+; LA64F-NEXT:    fld.s $fa0, $a0, 16
+; LA64F-NEXT:    addi.w $a2, $zero, 1
+; LA64F-NEXT:    movgr2fr.w $fa1, $a2
+; LA64F-NEXT:    ffint.s.w $fa1, $fa1
+; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa1
 ; LA64F-NEXT:    fst.s $fa0, $a1, 16
-; LA64F-NEXT:    fst.s $ft0, $a1, 0
+; LA64F-NEXT:    fld.s $fa0, $a0, 0
+; LA64F-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    fst.s $fa0, $a1, 0
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: test_f8:
 ; LA64D:       # %bb.0:
-; LA64D-NEXT:    addi.w $a2, $zero, 1
-; LA64D-NEXT:    movgr2fr.w $fa0, $a2
 ; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_0)
 ; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI3_0)
-; LA64D-NEXT:    fld.s $fa1, $a2, 0
+; LA64D-NEXT:    fld.s $fa0, $a2, 0
+; LA64D-NEXT:    fld.s $fa1, $a0, 28
+; LA64D-NEXT:    fadd.s $fa1, $fa1, $fa0
+; LA64D-NEXT:    fst.s $fa1, $a1, 28
 ; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_1)
 ; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI3_1)
-; LA64D-NEXT:    fld.s $fa2, $a2, 0
+; LA64D-NEXT:    fld.s $fa1, $a2, 0
+; LA64D-NEXT:    fld.s $fa2, $a0, 24
+; LA64D-NEXT:    fadd.s $fa2, $fa2, $fa1
+; LA64D-NEXT:    fst.s $fa2, $a1, 24
 ; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_2)
 ; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI3_2)
-; LA64D-NEXT:    fld.s $fa3, $a2, 0
-; LA64D-NEXT:    fld.s $fa4, $a0, 28
-; LA64D-NEXT:    fld.s $fa5, $a0, 24
-; LA64D-NEXT:    fld.s $fa6, $a0, 12
-; LA64D-NEXT:    fld.s $fa7, $a0, 8
-; LA64D-NEXT:    fld.s $ft0, $a0, 0
-; LA64D-NEXT:    fld.s $ft1, $a0, 16
-; LA64D-NEXT:    fld.s $ft2, $a0, 4
-; LA64D-NEXT:    ffint.s.w $fa0, $fa0
-; LA64D-NEXT:    fadd.s $ft0, $ft0, $fa0
-; LA64D-NEXT:    fadd.s $fa0, $ft1, $fa0
-; LA64D-NEXT:    fld.s $ft1, $a0, 20
-; LA64D-NEXT:    fadd.s $ft2, $ft2, $fa1
-; LA64D-NEXT:    fadd.s $fa7, $fa7, $fa2
-; LA64D-NEXT:    fadd.s $fa6, $fa6, $fa3
-; LA64D-NEXT:    fadd.s $fa1, $ft1, $fa1
-; LA64D-NEXT:    fadd.s $fa2, $fa5, $fa2
-; LA64D-NEXT:    fadd.s $fa3, $fa4, $fa3
-; LA64D-NEXT:    fst.s $fa3, $a1, 28
-; LA64D-NEXT:    fst.s $fa2, $a1, 24
-; LA64D-NEXT:    fst.s $fa1, $a1, 20
-; LA64D-NEXT:    fst.s $fa6, $a1, 12
-; LA64D-NEXT:    fst.s $fa7, $a1, 8
-; LA64D-NEXT:    fst.s $ft2, $a1, 4
+; LA64D-NEXT:    fld.s $fa2, $a2, 0
+; LA64D-NEXT:    fld.s $fa3, $a0, 20
+; LA64D-NEXT:    fadd.s $fa3, $fa3, $fa2
+; LA64D-NEXT:    fst.s $fa3, $a1, 20
+; LA64D-NEXT:    fld.s $fa3, $a0, 12
+; LA64D-NEXT:    fadd.s $fa0, $fa3, $fa0
+; LA64D-NEXT:    fst.s $fa0, $a1, 12
+; LA64D-NEXT:    fld.s $fa0, $a0, 8
+; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    fst.s $fa0, $a1, 8
+; LA64D-NEXT:    fld.s $fa0, $a0, 4
+; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa2
+; LA64D-NEXT:    fst.s $fa0, $a1, 4
+; LA64D-NEXT:    fld.s $fa0, $a0, 16
+; LA64D-NEXT:    addi.w $a2, $zero, 1
+; LA64D-NEXT:    movgr2fr.w $fa1, $a2
+; LA64D-NEXT:    ffint.s.w $fa1, $fa1
+; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa1
 ; LA64D-NEXT:    fst.s $fa0, $a1, 16
-; LA64D-NEXT:    fst.s $ft0, $a1, 0
+; LA64D-NEXT:    fld.s $fa0, $a0, 0
+; LA64D-NEXT:    fadd.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    fst.s $fa0, $a1, 0
 ; LA64D-NEXT:    ret
   %p = load %f8, ptr %P
   %R = fadd %f8 %p, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
@@ -444,58 +444,47 @@ define void @test_f8(ptr %P, ptr %S) nounwind {
 define void @test_d2(ptr %P, ptr %S) nounwind {
 ; LA32F-LABEL: test_d2:
 ; LA32F:       # %bb.0:
-; LA32F-NEXT:    addi.w $sp, $sp, -32
-; LA32F-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $fp, $sp, 24 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s0, $sp, 20 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s1, $sp, 16 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s2, $sp, 12 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s3, $sp, 8 # 4-byte Folded Spill
-; LA32F-NEXT:    ld.w $fp, $a0, 8
-; LA32F-NEXT:    ld.w $s0, $a0, 12
-; LA32F-NEXT:    ld.w $a2, $a0, 0
-; LA32F-NEXT:    ld.w $a4, $a0, 4
-; LA32F-NEXT:    move $s1, $a1
-; LA32F-NEXT:    lu12i.w $a3, 261888
-; LA32F-NEXT:    move $a0, $a2
-; LA32F-NEXT:    move $a1, $a4
+; LA32F-NEXT:    addi.w $sp, $sp, -16
+; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s0, $sp, 4 # 4-byte Folded Spill
+; LA32F-NEXT:    move $fp, $a1
+; LA32F-NEXT:    move $s0, $a0
+; LA32F-NEXT:    ld.w $a0, $a0, 8
+; LA32F-NEXT:    ld.w $a1, $s0, 12
+; LA32F-NEXT:    lu12i.w $a3, 262144
 ; LA32F-NEXT:    move $a2, $zero
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    move $s2, $a0
-; LA32F-NEXT:    move $s3, $a1
-; LA32F-NEXT:    lu12i.w $a3, 262144
-; LA32F-NEXT:    move $a0, $fp
-; LA32F-NEXT:    move $a1, $s0
+; LA32F-NEXT:    st.w $a0, $fp, 8
+; LA32F-NEXT:    st.w $a1, $fp, 12
+; LA32F-NEXT:    ld.w $a0, $s0, 0
+; LA32F-NEXT:    ld.w $a1, $s0, 4
+; LA32F-NEXT:    lu12i.w $a3, 261888
 ; LA32F-NEXT:    move $a2, $zero
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $s1, 8
-; LA32F-NEXT:    st.w $a1, $s1, 12
-; LA32F-NEXT:    st.w $s2, $s1, 0
-; LA32F-NEXT:    st.w $s3, $s1, 4
-; LA32F-NEXT:    ld.w $s3, $sp, 8 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s2, $sp, 12 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s1, $sp, 16 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s0, $sp, 20 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32F-NEXT:    addi.w $sp, $sp, 32
+; LA32F-NEXT:    st.w $a0, $fp, 0
+; LA32F-NEXT:    st.w $a1, $fp, 4
+; LA32F-NEXT:    ld.w $s0, $sp, 4 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 16
 ; LA32F-NEXT:    ret
 ;
 ; LA32D-LABEL: test_d2:
 ; LA32D:       # %bb.0:
-; LA32D-NEXT:    fld.d $fa0, $a0, 8
-; LA32D-NEXT:    fld.d $fa1, $a0, 0
-; LA32D-NEXT:    addi.w $a0, $zero, 1
-; LA32D-NEXT:    movgr2fr.w $fa2, $a0
-; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI4_0)
-; LA32D-NEXT:    fld.d $fa3, $a0, 0
-; LA32D-NEXT:    ffint.s.w $fa2, $fa2
-; LA32D-NEXT:    fcvt.d.s $fa2, $fa2
-; LA32D-NEXT:    fadd.d $fa1, $fa1, $fa2
-; LA32D-NEXT:    fadd.d $fa0, $fa0, $fa3
+; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI4_0)
+; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI4_0)
+; LA32D-NEXT:    fld.d $fa0, $a2, 0
+; LA32D-NEXT:    fld.d $fa1, $a0, 8
+; LA32D-NEXT:    fadd.d $fa0, $fa1, $fa0
 ; LA32D-NEXT:    fst.d $fa0, $a1, 8
-; LA32D-NEXT:    fst.d $fa1, $a1, 0
+; LA32D-NEXT:    fld.d $fa0, $a0, 0
+; LA32D-NEXT:    addi.w $a0, $zero, 1
+; LA32D-NEXT:    movgr2fr.w $fa1, $a0
+; LA32D-NEXT:    ffint.s.w $fa1, $fa1
+; LA32D-NEXT:    fcvt.d.s $fa1, $fa1
+; LA32D-NEXT:    fadd.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    fst.d $fa0, $a1, 0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: test_d2:
@@ -504,19 +493,16 @@ define void @test_d2(ptr %P, ptr %S) nounwind {
 ; LA64F-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
 ; LA64F-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 0 # 8-byte Folded Spill
-; LA64F-NEXT:    ld.d $fp, $a0, 8
-; LA64F-NEXT:    ld.d $a0, $a0, 0
-; LA64F-NEXT:    move $s0, $a1
-; LA64F-NEXT:    lu52i.d $a1, $zero, 1023
-; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    move $s1, $a0
+; LA64F-NEXT:    move $fp, $a1
+; LA64F-NEXT:    move $s0, $a0
+; LA64F-NEXT:    ld.d $a0, $a0, 8
 ; LA64F-NEXT:    lu52i.d $a1, $zero, 1024
-; LA64F-NEXT:    move $a0, $fp
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $s0, 8
-; LA64F-NEXT:    st.d $s1, $s0, 0
-; LA64F-NEXT:    ld.d $s1, $sp, 0 # 8-byte Folded Reload
+; LA64F-NEXT:    st.d $a0, $fp, 8
+; LA64F-NEXT:    ld.d $a0, $s0, 0
+; LA64F-NEXT:    lu52i.d $a1, $zero, 1023
+; LA64F-NEXT:    bl %plt(__adddf3)
+; LA64F-NEXT:    st.d $a0, $fp, 0
 ; LA64F-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
 ; LA64F-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
@@ -525,18 +511,18 @@ define void @test_d2(ptr %P, ptr %S) nounwind {
 ;
 ; LA64D-LABEL: test_d2:
 ; LA64D:       # %bb.0:
-; LA64D-NEXT:    fld.d $fa0, $a0, 8
-; LA64D-NEXT:    fld.d $fa1, $a0, 0
-; LA64D-NEXT:    addi.d $a0, $zero, 1
 ; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI4_0)
 ; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI4_0)
-; LA64D-NEXT:    fld.d $fa2, $a2, 0
-; LA64D-NEXT:    movgr2fr.d $fa3, $a0
-; LA64D-NEXT:    ffint.d.l $fa3, $fa3
-; LA64D-NEXT:    fadd.d $fa1, $fa1, $fa3
-; LA64D-NEXT:    fadd.d $fa0, $fa0, $fa2
+; LA64D-NEXT:    fld.d $fa0, $a2, 0
+; LA64D-NEXT:    fld.d $fa1, $a0, 8
+; LA64D-NEXT:    fadd.d $fa0, $fa1, $fa0
 ; LA64D-NEXT:    fst.d $fa0, $a1, 8
-; LA64D-NEXT:    fst.d $fa1, $a1, 0
+; LA64D-NEXT:    fld.d $fa0, $a0, 0
+; LA64D-NEXT:    addi.d $a0, $zero, 1
+; LA64D-NEXT:    movgr2fr.d $fa1, $a0
+; LA64D-NEXT:    ffint.d.l $fa1, $fa1
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    fst.d $fa0, $a1, 0
 ; LA64D-NEXT:    ret
   %p = load %d2, ptr %P
   %R = fadd %d2 %p, < double 1.000000e+00, double 2.000000e+00 >
@@ -547,171 +533,133 @@ define void @test_d2(ptr %P, ptr %S) nounwind {
 define void @test_d4(ptr %P, ptr %S) nounwind {
 ; LA32F-LABEL: test_d4:
 ; LA32F:       # %bb.0:
-; LA32F-NEXT:    addi.w $sp, $sp, -48
-; LA32F-NEXT:    st.w $ra, $sp, 44 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $fp, $sp, 40 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s0, $sp, 36 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s1, $sp, 32 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s2, $sp, 28 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s3, $sp, 24 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s4, $sp, 20 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s5, $sp, 16 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s6, $sp, 12 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s7, $sp, 8 # 4-byte Folded Spill
-; LA32F-NEXT:    ld.w $fp, $a0, 24
-; LA32F-NEXT:    ld.w $s0, $a0, 28
-; LA32F-NEXT:    ld.w $s1, $a0, 16
-; LA32F-NEXT:    ld.w $s2, $a0, 20
-; LA32F-NEXT:    ld.w $s3, $a0, 8
-; LA32F-NEXT:    ld.w $s4, $a0, 12
-; LA32F-NEXT:    ld.w $a2, $a0, 0
-; LA32F-NEXT:    ld.w $a4, $a0, 4
-; LA32F-NEXT:    move $s5, $a1
-; LA32F-NEXT:    lu12i.w $a3, 261888
-; LA32F-NEXT:    move $a0, $a2
-; LA32F-NEXT:    move $a1, $a4
+; LA32F-NEXT:    addi.w $sp, $sp, -16
+; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s0, $sp, 4 # 4-byte Folded Spill
+; LA32F-NEXT:    move $fp, $a1
+; LA32F-NEXT:    move $s0, $a0
+; LA32F-NEXT:    ld.w $a0, $a0, 24
+; LA32F-NEXT:    ld.w $a1, $s0, 28
+; LA32F-NEXT:    lu12i.w $a3, 262400
 ; LA32F-NEXT:    move $a2, $zero
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    move $s6, $a0
-; LA32F-NEXT:    move $s7, $a1
-; LA32F-NEXT:    lu12i.w $a3, 262144
-; LA32F-NEXT:    move $a0, $s3
-; LA32F-NEXT:    move $a1, $s4
+; LA32F-NEXT:    st.w $a0, $fp, 24
+; LA32F-NEXT:    st.w $a1, $fp, 28
+; LA32F-NEXT:    ld.w $a0, $s0, 16
+; LA32F-NEXT:    ld.w $a1, $s0, 20
+; LA32F-NEXT:    lu12i.w $a3, 262272
 ; LA32F-NEXT:    move $a2, $zero
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    move $s3, $a0
-; LA32F-NEXT:    move $s4, $a1
-; LA32F-NEXT:    lu12i.w $a3, 262272
-; LA32F-NEXT:    move $a0, $s1
-; LA32F-NEXT:    move $a1, $s2
+; LA32F-NEXT:    st.w $a0, $fp, 16
+; LA32F-NEXT:    st.w $a1, $fp, 20
+; LA32F-NEXT:    ld.w $a0, $s0, 8
+; LA32F-NEXT:    ld.w $a1, $s0, 12
+; LA32F-NEXT:    lu12i.w $a3, 262144
 ; LA32F-NEXT:    move $a2, $zero
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    move $s1, $a0
-; LA32F-NEXT:    move $s2, $a1
-; LA32F-NEXT:    lu12i.w $a3, 262400
-; LA32F-NEXT:    move $a0, $fp
-; LA32F-NEXT:    move $a1, $s0
+; LA32F-NEXT:    st.w $a0, $fp, 8
+; LA32F-NEXT:    st.w $a1, $fp, 12
+; LA32F-NEXT:    ld.w $a0, $s0, 0
+; LA32F-NEXT:    ld.w $a1, $s0, 4
+; LA32F-NEXT:    lu12i.w $a3, 261888
 ; LA32F-NEXT:    move $a2, $zero
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $s5, 24
-; LA32F-NEXT:    st.w $a1, $s5, 28
-; LA32F-NEXT:    st.w $s1, $s5, 16
-; LA32F-NEXT:    st.w $s2, $s5, 20
-; LA32F-NEXT:    st.w $s3, $s5, 8
-; LA32F-NEXT:    st.w $s4, $s5, 12
-; LA32F-NEXT:    st.w $s6, $s5, 0
-; LA32F-NEXT:    st.w $s7, $s5, 4
-; LA32F-NEXT:    ld.w $s7, $sp, 8 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s6, $sp, 12 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s5, $sp, 16 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s4, $sp, 20 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s3, $sp, 24 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s2, $sp, 28 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s1, $sp, 32 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s0, $sp, 36 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $fp, $sp, 40 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $ra, $sp, 44 # 4-byte Folded Reload
-; LA32F-NEXT:    addi.w $sp, $sp, 48
+; LA32F-NEXT:    st.w $a0, $fp, 0
+; LA32F-NEXT:    st.w $a1, $fp, 4
+; LA32F-NEXT:    ld.w $s0, $sp, 4 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 16
 ; LA32F-NEXT:    ret
 ;
 ; LA32D-LABEL: test_d4:
 ; LA32D:       # %bb.0:
-; LA32D-NEXT:    fld.d $fa0, $a0, 24
+; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI5_0)
+; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI5_0)
+; LA32D-NEXT:    fld.d $fa0, $a2, 0
+; LA32D-NEXT:    fld.d $fa1, $a0, 24
+; LA32D-NEXT:    fadd.d $fa0, $fa1, $fa0
+; LA32D-NEXT:    fst.d $fa0, $a1, 24
+; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI5_1)
+; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI5_1)
+; LA32D-NEXT:    fld.d $fa0, $a2, 0
 ; LA32D-NEXT:    fld.d $fa1, $a0, 16
-; LA32D-NEXT:    fld.d $fa2, $a0, 8
-; LA32D-NEXT:    fld.d $fa3, $a0, 0
+; LA32D-NEXT:    fadd.d $fa0, $fa1, $fa0
+; LA32D-NEXT:    fst.d $fa0, $a1, 16
+; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI5_2)
+; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI5_2)
+; LA32D-NEXT:    fld.d $fa0, $a2, 0
+; LA32D-NEXT:    fld.d $fa1, $a0, 8
+; LA32D-NEXT:    fadd.d $fa0, $fa1, $fa0
+; LA32D-NEXT:    fst.d $fa0, $a1, 8
+; LA32D-NEXT:    fld.d $fa0, $a0, 0
 ; LA32D-NEXT:    addi.w $a0, $zero, 1
-; LA32D-NEXT:    movgr2fr.w $fa4, $a0
-; LA32D-NEXT:    ffint.s.w $fa4, $fa4
-; LA32D-NEXT:    fcvt.d.s $fa4, $fa4
-; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI5_0)
-; LA32D-NEXT:    fld.d $fa5, $a0, 0
-; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_1)
-; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI5_1)
-; LA32D-NEXT:    fld.d $fa6, $a0, 0
-; LA32D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_2)
-; LA32D-NEXT:    addi.w $a0, $a0, %pc_lo12(.LCPI5_2)
-; LA32D-NEXT:    fld.d $fa7, $a0, 0
-; LA32D-NEXT:    fadd.d $fa3, $fa3, $fa4
-; LA32D-NEXT:    fadd.d $fa2, $fa2, $fa5
-; LA32D-NEXT:    fadd.d $fa1, $fa1, $fa6
-; LA32D-NEXT:    fadd.d $fa0, $fa0, $fa7
-; LA32D-NEXT:    fst.d $fa0, $a1, 24
-; LA32D-NEXT:    fst.d $fa1, $a1, 16
-; LA32D-NEXT:    fst.d $fa2, $a1, 8
-; LA32D-NEXT:    fst.d $fa3, $a1, 0
+; LA32D-NEXT:    movgr2fr.w $fa1, $a0
+; LA32D-NEXT:    ffint.s.w $fa1, $fa1
+; LA32D-NEXT:    fcvt.d.s $fa1, $fa1
+; LA32D-NEXT:    fadd.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    fst.d $fa0, $a1, 0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: test_d4:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    addi.d $sp, $sp, -48
-; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s2, $sp, 8 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s3, $sp, 0 # 8-byte Folded Spill
-; LA64F-NEXT:    ld.d $fp, $a0, 24
-; LA64F-NEXT:    ld.d $s0, $a0, 8
-; LA64F-NEXT:    ld.d $s1, $a0, 0
-; LA64F-NEXT:    ld.d $a0, $a0, 16
-; LA64F-NEXT:    move $s2, $a1
-; LA64F-NEXT:    ori $a1, $zero, 0
-; LA64F-NEXT:    lu32i.d $a1, -524288
-; LA64F-NEXT:    lu52i.d $a1, $a1, 1024
-; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    move $s3, $a0
-; LA64F-NEXT:    lu52i.d $a1, $zero, 1023
-; LA64F-NEXT:    move $a0, $s1
+; LA64F-NEXT:    addi.d $sp, $sp, -32
+; LA64F-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    move $fp, $a1
+; LA64F-NEXT:    move $s0, $a0
+; LA64F-NEXT:    ld.d $a0, $a0, 24
+; LA64F-NEXT:    lu52i.d $a1, $zero, 1025
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    move $s1, $a0
+; LA64F-NEXT:    st.d $a0, $fp, 24
+; LA64F-NEXT:    ld.d $a0, $s0, 8
 ; LA64F-NEXT:    lu52i.d $a1, $zero, 1024
-; LA64F-NEXT:    move $a0, $s0
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    move $s0, $a0
-; LA64F-NEXT:    lu52i.d $a1, $zero, 1025
-; LA64F-NEXT:    move $a0, $fp
+; LA64F-NEXT:    st.d $a0, $fp, 8
+; LA64F-NEXT:    ld.d $a0, $s0, 0
+; LA64F-NEXT:    lu52i.d $a1, $zero, 1023
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $s2, 24
-; LA64F-NEXT:    st.d $s0, $s2, 8
-; LA64F-NEXT:    st.d $s1, $s2, 0
-; LA64F-NEXT:    st.d $s3, $s2, 16
-; LA64F-NEXT:    ld.d $s3, $sp, 0 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s2, $sp, 8 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT:    addi.d $sp, $sp, 48
+; LA64F-NEXT:    st.d $a0, $fp, 0
+; LA64F-NEXT:    ld.d $a0, $s0, 16
+; LA64F-NEXT:    ori $a1, $zero, 0
+; LA64F-NEXT:    lu32i.d $a1, -524288
+; LA64F-NEXT:    lu52i.d $a1, $a1, 1024
+; LA64F-NEXT:    bl %plt(__adddf3)
+; LA64F-NEXT:    st.d $a0, $fp, 16
+; LA64F-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 32
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: test_d4:
 ; LA64D:       # %bb.0:
-; LA64D-NEXT:    fld.d $fa0, $a0, 24
+; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI5_0)
+; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI5_0)
+; LA64D-NEXT:    fld.d $fa0, $a2, 0
+; LA64D-NEXT:    fld.d $fa1, $a0, 24
+; LA64D-NEXT:    fadd.d $fa0, $fa1, $fa0
+; LA64D-NEXT:    fst.d $fa0, $a1, 24
+; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI5_1)
+; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI5_1)
+; LA64D-NEXT:    fld.d $fa0, $a2, 0
 ; LA64D-NEXT:    fld.d $fa1, $a0, 16
-; LA64D-NEXT:    fld.d $fa2, $a0, 8
-; LA64D-NEXT:    fld.d $fa3, $a0, 0
+; LA64D-NEXT:    fadd.d $fa0, $fa1, $fa0
+; LA64D-NEXT:    fst.d $fa0, $a1, 16
+; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI5_2)
+; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI5_2)
+; LA64D-NEXT:    fld.d $fa0, $a2, 0
+; LA64D-NEXT:    fld.d $fa1, $a0, 8
+; LA64D-NEXT:    fadd.d $fa0, $fa1, $fa0
+; LA64D-NEXT:    fst.d $fa0, $a1, 8
+; LA64D-NEXT:    fld.d $fa0, $a0, 0
 ; LA64D-NEXT:    addi.d $a0, $zero, 1
-; LA64D-NEXT:    movgr2fr.d $fa4, $a0
-; LA64D-NEXT:    ffint.d.l $fa4, $fa4
-; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI5_0)
-; LA64D-NEXT:    fld.d $fa5, $a0, 0
-; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_1)
-; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI5_1)
-; LA64D-NEXT:    fld.d $fa6, $a0, 0
-; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_2)
-; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI5_2)
-; LA64D-NEXT:    fld.d $fa7, $a0, 0
-; LA64D-NEXT:    fadd.d $fa3, $fa3, $fa4
-; LA64D-NEXT:    fadd.d $fa2, $fa2, $fa5
-; LA64D-NEXT:    fadd.d $fa1, $fa1, $fa6
-; LA64D-NEXT:    fadd.d $fa0, $fa0, $fa7
-; LA64D-NEXT:    fst.d $fa0, $a1, 24
-; LA64D-NEXT:    fst.d $fa1, $a1, 16
-; LA64D-NEXT:    fst.d $fa2, $a1, 8
-; LA64D-NEXT:    fst.d $fa3, $a1, 0
+; LA64D-NEXT:    movgr2fr.d $fa1, $a0
+; LA64D-NEXT:    ffint.d.l $fa1, $fa1
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    fst.d $fa0, $a1, 0
 ; LA64D-NEXT:    ret
   %p = load %d4, ptr %P
   %R = fadd %d4 %p, < double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00 >
@@ -722,293 +670,223 @@ define void @test_d4(ptr %P, ptr %S) nounwind {
 define void @test_d8(ptr %P, ptr %S) nounwind {
 ; LA32F-LABEL: test_d8:
 ; LA32F:       # %bb.0:
-; LA32F-NEXT:    addi.w $sp, $sp, -96
-; LA32F-NEXT:    st.w $ra, $sp, 92 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $fp, $sp, 88 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s0, $sp, 84 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s1, $sp, 80 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s2, $sp, 76 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s3, $sp, 72 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s4, $sp, 68 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s5, $sp, 64 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s6, $sp, 60 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s7, $sp, 56 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $s8, $sp, 52 # 4-byte Folded Spill
-; LA32F-NEXT:    ld.w $a2, $a0, 56
-; LA32F-NEXT:    st.w $a2, $sp, 48 # 4-byte Folded Spill
-; LA32F-NEXT:    ld.w $a2, $a0, 60
-; LA32F-NEXT:    st.w $a2, $sp, 44 # 4-byte Folded Spill
-; LA32F-NEXT:    ld.w $a2, $a0, 48
-; LA32F-NEXT:    st.w $a2, $sp, 32 # 4-byte Folded Spill
-; LA32F-NEXT:    ld.w $a2, $a0, 52
-; LA32F-NEXT:    st.w $a2, $sp, 28 # 4-byte Folded Spill
-; LA32F-NEXT:    ld.w $s8, $a0, 40
-; LA32F-NEXT:    ld.w $a2, $a0, 44
-; LA32F-NEXT:    st.w $a2, $sp, 16 # 4-byte Folded Spill
-; LA32F-NEXT:    ld.w $s3, $a0, 32
-; LA32F-NEXT:    ld.w $s4, $a0, 36
-; LA32F-NEXT:    ld.w $s5, $a0, 24
-; LA32F-NEXT:    ld.w $s6, $a0, 28
-; LA32F-NEXT:    ld.w $s1, $a0, 16
-; LA32F-NEXT:    ld.w $s2, $a0, 20
-; LA32F-NEXT:    ld.w $s7, $a0, 8
-; LA32F-NEXT:    ld.w $s0, $a0, 12
-; LA32F-NEXT:    ld.w $a2, $a0, 0
-; LA32F-NEXT:    ld.w $a4, $a0, 4
+; LA32F-NEXT:    addi.w $sp, $sp, -32
+; LA32F-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s0, $sp, 20 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s1, $sp, 16 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s2, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s3, $sp, 8 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $s4, $sp, 4 # 4-byte Folded Spill
 ; LA32F-NEXT:    move $fp, $a1
-; LA32F-NEXT:    lu12i.w $a3, 261888
-; LA32F-NEXT:    move $a0, $a2
-; LA32F-NEXT:    move $a1, $a4
+; LA32F-NEXT:    move $s0, $a0
+; LA32F-NEXT:    ld.w $a0, $a0, 56
+; LA32F-NEXT:    ld.w $a1, $s0, 60
+; LA32F-NEXT:    lu12i.w $s1, 262400
 ; LA32F-NEXT:    move $a2, $zero
+; LA32F-NEXT:    move $a3, $s1
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $sp, 40 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $a1, $sp, 36 # 4-byte Folded Spill
-; LA32F-NEXT:    lu12i.w $a3, 262144
-; LA32F-NEXT:    move $a0, $s7
-; LA32F-NEXT:    move $a1, $s0
+; LA32F-NEXT:    st.w $a0, $fp, 56
+; LA32F-NEXT:    st.w $a1, $fp, 60
+; LA32F-NEXT:    ld.w $a0, $s0, 48
+; LA32F-NEXT:    ld.w $a1, $s0, 52
+; LA32F-NEXT:    lu12i.w $s2, 262272
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $s0, $a3
+; LA32F-NEXT:    move $a3, $s2
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $sp, 24 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $a1, $sp, 20 # 4-byte Folded Spill
-; LA32F-NEXT:    lu12i.w $s7, 262272
-; LA32F-NEXT:    move $a0, $s1
-; LA32F-NEXT:    move $a1, $s2
+; LA32F-NEXT:    st.w $a0, $fp, 48
+; LA32F-NEXT:    st.w $a1, $fp, 52
+; LA32F-NEXT:    ld.w $a0, $s0, 40
+; LA32F-NEXT:    ld.w $a1, $s0, 44
+; LA32F-NEXT:    lu12i.w $s3, 262144
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s7
+; LA32F-NEXT:    move $a3, $s3
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $sp, 12 # 4-byte Folded Spill
-; LA32F-NEXT:    move $s2, $a1
-; LA32F-NEXT:    lu12i.w $a3, 262400
-; LA32F-NEXT:    move $a0, $s5
-; LA32F-NEXT:    move $a1, $s6
+; LA32F-NEXT:    st.w $a0, $fp, 40
+; LA32F-NEXT:    st.w $a1, $fp, 44
+; LA32F-NEXT:    ld.w $a0, $s0, 32
+; LA32F-NEXT:    ld.w $a1, $s0, 36
+; LA32F-NEXT:    lu12i.w $s4, 261888
 ; LA32F-NEXT:    move $a2, $zero
+; LA32F-NEXT:    move $a3, $s4
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    move $s5, $a0
-; LA32F-NEXT:    move $s6, $a1
-; LA32F-NEXT:    move $a0, $s3
-; LA32F-NEXT:    move $a1, $s4
+; LA32F-NEXT:    st.w $a0, $fp, 32
+; LA32F-NEXT:    st.w $a1, $fp, 36
+; LA32F-NEXT:    ld.w $a0, $s0, 24
+; LA32F-NEXT:    ld.w $a1, $s0, 28
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    lu12i.w $a3, 261888
+; LA32F-NEXT:    move $a3, $s1
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    move $s3, $a0
-; LA32F-NEXT:    move $s4, $a1
-; LA32F-NEXT:    move $a0, $s8
-; LA32F-NEXT:    ld.w $a1, $sp, 16 # 4-byte Folded Reload
+; LA32F-NEXT:    st.w $a0, $fp, 24
+; LA32F-NEXT:    st.w $a1, $fp, 28
+; LA32F-NEXT:    ld.w $a0, $s0, 16
+; LA32F-NEXT:    ld.w $a1, $s0, 20
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s0
+; LA32F-NEXT:    move $a3, $s2
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    move $s8, $a0
-; LA32F-NEXT:    move $s0, $a1
-; LA32F-NEXT:    ld.w $a0, $sp, 32 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $a1, $sp, 28 # 4-byte Folded Reload
+; LA32F-NEXT:    st.w $a0, $fp, 16
+; LA32F-NEXT:    st.w $a1, $fp, 20
+; LA32F-NEXT:    ld.w $a0, $s0, 8
+; LA32F-NEXT:    ld.w $a1, $s0, 12
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s7
+; LA32F-NEXT:    move $a3, $s3
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    move $s7, $a0
-; LA32F-NEXT:    move $s1, $a1
-; LA32F-NEXT:    ld.w $a0, $sp, 48 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $a1, $sp, 44 # 4-byte Folded Reload
+; LA32F-NEXT:    st.w $a0, $fp, 8
+; LA32F-NEXT:    st.w $a1, $fp, 12
+; LA32F-NEXT:    ld.w $a0, $s0, 0
+; LA32F-NEXT:    ld.w $a1, $s0, 4
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    lu12i.w $a3, 262400
+; LA32F-NEXT:    move $a3, $s4
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $fp, 56
-; LA32F-NEXT:    st.w $a1, $fp, 60
-; LA32F-NEXT:    st.w $s7, $fp, 48
-; LA32F-NEXT:    st.w $s1, $fp, 52
-; LA32F-NEXT:    st.w $s8, $fp, 40
-; LA32F-NEXT:    st.w $s0, $fp, 44
-; LA32F-NEXT:    st.w $s3, $fp, 32
-; LA32F-NEXT:    st.w $s4, $fp, 36
-; LA32F-NEXT:    st.w $s5, $fp, 24
-; LA32F-NEXT:    st.w $s6, $fp, 28
-; LA32F-NEXT:    ld.w $a0, $sp, 12 # 4-byte Folded Reload
-; LA32F-NEXT:    st.w $a0, $fp, 16
-; LA32F-NEXT:    st.w $s2, $fp, 20
-; LA32F-NEXT:    ld.w $a0, $sp, 24 # 4-byte Folded Reload
-; LA32F-NEXT:    st.w $a0, $fp, 8
-; LA32F-NEXT:    ld.w $a0, $sp, 20 # 4-byte Folded Reload
-; LA32F-NEXT:    st.w $a0, $fp, 12
-; LA32F-NEXT:    ld.w $a0, $sp, 40 # 4-byte Folded Reload
 ; LA32F-NEXT:    st.w $a0, $fp, 0
-; LA32F-NEXT:    ld.w $a0, $sp, 36 # 4-byte Folded Reload
-; LA32F-NEXT:    st.w $a0, $fp, 4
-; LA32F-NEXT:    ld.w $s8, $sp, 52 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s7, $sp, 56 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s6, $sp, 60 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s5, $sp, 64 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s4, $sp, 68 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s3, $sp, 72 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s2, $sp, 76 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s1, $sp, 80 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $s0, $sp, 84 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $fp, $sp, 88 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $ra, $sp, 92 # 4-byte Folded Reload
-; LA32F-NEXT:    addi.w $sp, $sp, 96
+; LA32F-NEXT:    st.w $a1, $fp, 4
+; LA32F-NEXT:    ld.w $s4, $sp, 4 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s3, $sp, 8 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s2, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s1, $sp, 16 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $s0, $sp, 20 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 32
 ; LA32F-NEXT:    ret
 ;
 ; LA32D-LABEL: test_d8:
 ; LA32D:       # %bb.0:
-; LA32D-NEXT:    addi.w $a2, $zero, 1
-; LA32D-NEXT:    movgr2fr.w $fa0, $a2
 ; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI6_0)
 ; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI6_0)
-; LA32D-NEXT:    fld.d $fa1, $a2, 0
+; LA32D-NEXT:    fld.d $fa0, $a2, 0
+; LA32D-NEXT:    fld.d $fa1, $a0, 56
+; LA32D-NEXT:    fadd.d $fa1, $fa1, $fa0
+; LA32D-NEXT:    fst.d $fa1, $a1, 56
 ; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI6_1)
 ; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI6_1)
-; LA32D-NEXT:    fld.d $fa2, $a2, 0
+; LA32D-NEXT:    fld.d $fa1, $a2, 0
+; LA32D-NEXT:    fld.d $fa2, $a0, 48
+; LA32D-NEXT:    fadd.d $fa2, $fa2, $fa1
+; LA32D-NEXT:    fst.d $fa2, $a1, 48
 ; LA32D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI6_2)
 ; LA32D-NEXT:    addi.w $a2, $a2, %pc_lo12(.LCPI6_2)
-; LA32D-NEXT:    fld.d $fa3, $a2, 0
-; LA32D-NEXT:    fld.d $fa4, $a0, 56
-; LA32D-NEXT:    fld.d $fa5, $a0, 48
-; LA32D-NEXT:    fld.d $fa6, $a0, 24
-; LA32D-NEXT:    fld.d $fa7, $a0, 16
-; LA32D-NEXT:    fld.d $ft0, $a0, 8
-; LA32D-NEXT:    fld.d $ft1, $a0, 0
-; LA32D-NEXT:    fld.d $ft2, $a0, 32
-; LA32D-NEXT:    ffint.s.w $fa0, $fa0
-; LA32D-NEXT:    fcvt.d.s $fa0, $fa0
-; LA32D-NEXT:    fadd.d $ft1, $ft1, $fa0
-; LA32D-NEXT:    fadd.d $fa0, $ft2, $fa0
-; LA32D-NEXT:    fld.d $ft2, $a0, 40
-; LA32D-NEXT:    fadd.d $ft0, $ft0, $fa1
-; LA32D-NEXT:    fadd.d $fa7, $fa7, $fa2
-; LA32D-NEXT:    fadd.d $fa6, $fa6, $fa3
-; LA32D-NEXT:    fadd.d $fa1, $ft2, $fa1
-; LA32D-NEXT:    fadd.d $fa2, $fa5, $fa2
-; LA32D-NEXT:    fadd.d $fa3, $fa4, $fa3
-; LA32D-NEXT:    fst.d $fa3, $a1, 56
-; LA32D-NEXT:    fst.d $fa2, $a1, 48
-; LA32D-NEXT:    fst.d $fa1, $a1, 40
-; LA32D-NEXT:    fst.d $fa6, $a1, 24
-; LA32D-NEXT:    fst.d $fa7, $a1, 16
-; LA32D-NEXT:    fst.d $ft0, $a1, 8
+; LA32D-NEXT:    fld.d $fa2, $a2, 0
+; LA32D-NEXT:    fld.d $fa3, $a0, 40
+; LA32D-NEXT:    fadd.d $fa3, $fa3, $fa2
+; LA32D-NEXT:    fst.d $fa3, $a1, 40
+; LA32D-NEXT:    fld.d $fa3, $a0, 24
+; LA32D-NEXT:    fadd.d $fa0, $fa3, $fa0
+; LA32D-NEXT:    fst.d $fa0, $a1, 24
+; LA32D-NEXT:    fld.d $fa0, $a0, 16
+; LA32D-NEXT:    fadd.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    fst.d $fa0, $a1, 16
+; LA32D-NEXT:    fld.d $fa0, $a0, 8
+; LA32D-NEXT:    fadd.d $fa0, $fa0, $fa2
+; LA32D-NEXT:    fst.d $fa0, $a1, 8
+; LA32D-NEXT:    fld.d $fa0, $a0, 32
+; LA32D-NEXT:    addi.w $a2, $zero, 1
+; LA32D-NEXT:    movgr2fr.w $fa1, $a2
+; LA32D-NEXT:    ffint.s.w $fa1, $fa1
+; LA32D-NEXT:    fcvt.d.s $fa1, $fa1
+; LA32D-NEXT:    fadd.d $fa0, $fa0, $fa1
 ; LA32D-NEXT:    fst.d $fa0, $a1, 32
-; LA32D-NEXT:    fst.d $ft1, $a1, 0
+; LA32D-NEXT:    fld.d $fa0, $a0, 0
+; LA32D-NEXT:    fadd.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    fst.d $fa0, $a1, 0
 ; LA32D-NEXT:    ret
 ;
 ; LA64F-LABEL: test_d8:
 ; LA64F:       # %bb.0:
-; LA64F-NEXT:    addi.d $sp, $sp, -112
-; LA64F-NEXT:    st.d $ra, $sp, 104 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $fp, $sp, 96 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s0, $sp, 88 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s1, $sp, 80 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s2, $sp, 72 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s3, $sp, 64 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s4, $sp, 56 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s5, $sp, 48 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s6, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s7, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT:    st.d $s8, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT:    ld.d $a2, $a0, 56
-; LA64F-NEXT:    st.d $a2, $sp, 16 # 8-byte Folded Spill
-; LA64F-NEXT:    ld.d $s1, $a0, 40
-; LA64F-NEXT:    ld.d $s2, $a0, 32
-; LA64F-NEXT:    ld.d $s3, $a0, 24
-; LA64F-NEXT:    ld.d $s4, $a0, 8
-; LA64F-NEXT:    ld.d $s5, $a0, 0
-; LA64F-NEXT:    ld.d $s6, $a0, 48
-; LA64F-NEXT:    ld.d $a0, $a0, 16
+; LA64F-NEXT:    addi.d $sp, $sp, -48
+; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s2, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    st.d $s3, $sp, 0 # 8-byte Folded Spill
 ; LA64F-NEXT:    move $fp, $a1
-; LA64F-NEXT:    ori $a1, $zero, 0
-; LA64F-NEXT:    lu32i.d $a1, -524288
-; LA64F-NEXT:    lu52i.d $s7, $a1, 1024
-; LA64F-NEXT:    move $a1, $s7
+; LA64F-NEXT:    move $s0, $a0
+; LA64F-NEXT:    ld.d $a0, $a0, 56
+; LA64F-NEXT:    lu52i.d $s1, $zero, 1025
+; LA64F-NEXT:    move $a1, $s1
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $sp, 8 # 8-byte Folded Spill
-; LA64F-NEXT:    move $a0, $s6
-; LA64F-NEXT:    move $a1, $s7
+; LA64F-NEXT:    st.d $a0, $fp, 56
+; LA64F-NEXT:    ld.d $a0, $s0, 40
+; LA64F-NEXT:    lu52i.d $s2, $zero, 1024
+; LA64F-NEXT:    move $a1, $s2
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    move $s6, $a0
-; LA64F-NEXT:    lu52i.d $s7, $zero, 1023
-; LA64F-NEXT:    move $a0, $s5
-; LA64F-NEXT:    move $a1, $s7
+; LA64F-NEXT:    st.d $a0, $fp, 40
+; LA64F-NEXT:    ld.d $a0, $s0, 32
+; LA64F-NEXT:    lu52i.d $s3, $zero, 1023
+; LA64F-NEXT:    move $a1, $s3
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    move $s5, $a0
-; LA64F-NEXT:    lu52i.d $s0, $zero, 1024
-; LA64F-NEXT:    move $a0, $s4
-; LA64F-NEXT:    move $a1, $s0
+; LA64F-NEXT:    st.d $a0, $fp, 32
+; LA64F-NEXT:    ld.d $a0, $s0, 24
+; LA64F-NEXT:    move $a1, $s1
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    move $s4, $a0
-; LA64F-NEXT:    lu52i.d $s8, $zero, 1025
-; LA64F-NEXT:    move $a0, $s3
-; LA64F-NEXT:    move $a1, $s8
+; LA64F-NEXT:    st.d $a0, $fp, 24
+; LA64F-NEXT:    ld.d $a0, $s0, 8
+; LA64F-NEXT:    move $a1, $s2
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    move $s3, $a0
-; LA64F-NEXT:    move $a0, $s2
-; LA64F-NEXT:    move $a1, $s7
+; LA64F-NEXT:    st.d $a0, $fp, 8
+; LA64F-NEXT:    ld.d $a0, $s0, 0
+; LA64F-NEXT:    move $a1, $s3
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    move $s2, $a0
-; LA64F-NEXT:    move $a0, $s1
-; LA64F-NEXT:    move $a1, $s0
+; LA64F-NEXT:    st.d $a0, $fp, 0
+; LA64F-NEXT:    ld.d $a0, $s0, 48
+; LA64F-NEXT:    ori $a1, $zero, 0
+; LA64F-NEXT:    lu32i.d $a1, -524288
+; LA64F-NEXT:    lu52i.d $s1, $a1, 1024
+; LA64F-NEXT:    move $a1, $s1
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    move $s0, $a0
-; LA64F-NEXT:    ld.d $a0, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT:    move $a1, $s8
+; LA64F-NEXT:    st.d $a0, $fp, 48
+; LA64F-NEXT:    ld.d $a0, $s0, 16
+; LA64F-NEXT:    move $a1, $s1
 ; LA64F-NEXT:    bl %plt(__adddf3)
-; LA64F-NEXT:    st.d $a0, $fp, 56
-; LA64F-NEXT:    st.d $s0, $fp, 40
-; LA64F-NEXT:    st.d $s2, $fp, 32
-; LA64F-NEXT:    st.d $s3, $fp, 24
-; LA64F-NEXT:    st.d $s4, $fp, 8
-; LA64F-NEXT:    st.d $s5, $fp, 0
-; LA64F-NEXT:    st.d $s6, $fp, 48
-; LA64F-NEXT:    ld.d $a0, $sp, 8 # 8-byte Folded Reload
 ; LA64F-NEXT:    st.d $a0, $fp, 16
-; LA64F-NEXT:    ld.d $s8, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s7, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s6, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s5, $sp, 48 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s4, $sp, 56 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s3, $sp, 64 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s2, $sp, 72 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s1, $sp, 80 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $s0, $sp, 88 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $fp, $sp, 96 # 8-byte Folded Reload
-; LA64F-NEXT:    ld.d $ra, $sp, 104 # 8-byte Folded Reload
-; LA64F-NEXT:    addi.d $sp, $sp, 112
+; LA64F-NEXT:    ld.d $s3, $sp, 0 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s2, $sp, 8 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 48
 ; LA64F-NEXT:    ret
 ;
 ; LA64D-LABEL: test_d8:
 ; LA64D:       # %bb.0:
-; LA64D-NEXT:    addi.d $a2, $zero, 1
-; LA64D-NEXT:    movgr2fr.d $fa0, $a2
 ; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI6_0)
 ; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI6_0)
-; LA64D-NEXT:    fld.d $fa1, $a2, 0
+; LA64D-NEXT:    fld.d $fa0, $a2, 0
+; LA64D-NEXT:    fld.d $fa1, $a0, 56
+; LA64D-NEXT:    fadd.d $fa1, $fa1, $fa0
+; LA64D-NEXT:    fst.d $fa1, $a1, 56
 ; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI6_1)
 ; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI6_1)
-; LA64D-NEXT:    fld.d $fa2, $a2, 0
+; LA64D-NEXT:    fld.d $fa1, $a2, 0
+; LA64D-NEXT:    fld.d $fa2, $a0, 48
+; LA64D-NEXT:    fadd.d $fa2, $fa2, $fa1
+; LA64D-NEXT:    fst.d $fa2, $a1, 48
 ; LA64D-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI6_2)
 ; LA64D-NEXT:    addi.d $a2, $a2, %pc_lo12(.LCPI6_2)
-; LA64D-NEXT:    fld.d $fa3, $a2, 0
-; LA64D-NEXT:    fld.d $fa4, $a0, 56
-; LA64D-NEXT:    fld.d $fa5, $a0, 48
-; LA64D-NEXT:    fld.d $fa6, $a0, 24
-; LA64D-NEXT:    fld.d $fa7, $a0, 16
-; LA64D-NEXT:    fld.d $ft0, $a0, 0
-; LA64D-NEXT:    fld.d $ft1, $a0, 32
-; LA64D-NEXT:    fld.d $ft2, $a0, 8
-; LA64D-NEXT:    ffint.d.l $fa0, $fa0
-; LA64D-NEXT:    fadd.d $ft0, $ft0, $fa0
-; LA64D-NEXT:    fadd.d $fa0, $ft1, $fa0
-; LA64D-NEXT:    fld.d $ft1, $a0, 40
-; LA64D-NEXT:    fadd.d $ft2, $ft2, $fa1
-; LA64D-NEXT:    fadd.d $fa7, $fa7, $fa2
-; LA64D-NEXT:    fadd.d $fa6, $fa6, $fa3
-; LA64D-NEXT:    fadd.d $fa1, $ft1, $fa1
-; LA64D-NEXT:    fadd.d $fa2, $fa5, $fa2
-; LA64D-NEXT:    fadd.d $fa3, $fa4, $fa3
-; LA64D-NEXT:    fst.d $fa3, $a1, 56
-; LA64D-NEXT:    fst.d $fa2, $a1, 48
-; LA64D-NEXT:    fst.d $fa1, $a1, 40
-; LA64D-NEXT:    fst.d $fa6, $a1, 24
-; LA64D-NEXT:    fst.d $fa7, $a1, 16
-; LA64D-NEXT:    fst.d $ft2, $a1, 8
+; LA64D-NEXT:    fld.d $fa2, $a2, 0
+; LA64D-NEXT:    fld.d $fa3, $a0, 40
+; LA64D-NEXT:    fadd.d $fa3, $fa3, $fa2
+; LA64D-NEXT:    fst.d $fa3, $a1, 40
+; LA64D-NEXT:    fld.d $fa3, $a0, 24
+; LA64D-NEXT:    fadd.d $fa0, $fa3, $fa0
+; LA64D-NEXT:    fst.d $fa0, $a1, 24
+; LA64D-NEXT:    fld.d $fa0, $a0, 16
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    fst.d $fa0, $a1, 16
+; LA64D-NEXT:    fld.d $fa0, $a0, 8
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fa2
+; LA64D-NEXT:    fst.d $fa0, $a1, 8
+; LA64D-NEXT:    fld.d $fa0, $a0, 32
+; LA64D-NEXT:    addi.d $a2, $zero, 1
+; LA64D-NEXT:    movgr2fr.d $fa1, $a2
+; LA64D-NEXT:    ffint.d.l $fa1, $fa1
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fa1
 ; LA64D-NEXT:    fst.d $fa0, $a1, 32
-; LA64D-NEXT:    fst.d $ft0, $a1, 0
+; LA64D-NEXT:    fld.d $fa0, $a0, 0
+; LA64D-NEXT:    fadd.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    fst.d $fa0, $a1, 0
 ; LA64D-NEXT:    ret
   %p = load %d8, ptr %P
   %R = fadd %d8 %p, < double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00 >
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected
index 56b6c90a2f6f3..e5bdc8b010e4d 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected
@@ -72,11 +72,11 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
 ; CHECK-NEXT:    .cfi_offset 22, -8
 ; CHECK-NEXT:    addi.w $fp, $sp, 32
 ; CHECK-NEXT:    .cfi_def_cfa 22, 0
-; CHECK-NEXT:    st.w $zero, $fp, -12
 ; CHECK-NEXT:    st.w $zero, $fp, -16
-; CHECK-NEXT:    ori $a0, $zero, 1
+; CHECK-NEXT:    st.w $zero, $fp, -12
 ; CHECK-NEXT:    beqz $zero, .LBB0_3
 ; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    ori $a0, $zero, 1
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    ld.w $a0, $fp, -16
 ; CHECK-NEXT:    beqz $a0, .LBB0_4
@@ -85,9 +85,10 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    b .LBB0_5
 ; CHECK-NEXT:  .LBB0_3:
-; CHECK-NEXT:    st.w $a0, $fp, -16
 ; CHECK-NEXT:    ori $a0, $zero, 2
 ; CHECK-NEXT:    st.w $a0, $fp, -20
+; CHECK-NEXT:    ori $a0, $zero, 1
+; CHECK-NEXT:    st.w $a0, $fp, -16
 ; CHECK-NEXT:    ori $a0, $zero, 3
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    ori $a0, $zero, 4
@@ -95,10 +96,10 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
 ; CHECK-NEXT:    ld.w $a0, $fp, -16
 ; CHECK-NEXT:    bnez $a0, .LBB0_2
 ; CHECK-NEXT:  .LBB0_4:
-; CHECK-NEXT:    ori $a0, $zero, 1
-; CHECK-NEXT:    st.w $a0, $fp, -16
 ; CHECK-NEXT:    ori $a0, $zero, 2
 ; CHECK-NEXT:    st.w $a0, $fp, -20
+; CHECK-NEXT:    ori $a0, $zero, 1
+; CHECK-NEXT:    st.w $a0, $fp, -16
 ; CHECK-NEXT:    ori $a0, $zero, 3
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    ori $a0, $zero, 4
@@ -120,24 +121,24 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
 ; CHECK-NEXT:    .cfi_offset 22, -8
 ; CHECK-NEXT:    addi.w $fp, $sp, 32
 ; CHECK-NEXT:    .cfi_def_cfa 22, 0
-; CHECK-NEXT:    st.w $zero, $fp, -12
 ; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(x)
 ; CHECK-NEXT:    addi.w $a0, $a0, %pc_lo12(x)
 ; CHECK-NEXT:    ori $a1, $zero, 1
-; CHECK-NEXT:    st.w $a1, $fp, -16
-; CHECK-NEXT:    ori $a2, $zero, 2
-; CHECK-NEXT:    st.w $a2, $fp, -20
-; CHECK-NEXT:    ori $a3, $zero, 3
-; CHECK-NEXT:    st.w $a3, $fp, -24
-; CHECK-NEXT:    ori $a4, $zero, 4
-; CHECK-NEXT:    st.w $a4, $fp, -28
 ; CHECK-NEXT:    st.w $a1, $a0, 0
+; CHECK-NEXT:    st.w $zero, $fp, -12
+; CHECK-NEXT:    st.w $a1, $fp, -16
+; CHECK-NEXT:    ori $a0, $zero, 2
+; CHECK-NEXT:    st.w $a0, $fp, -20
+; CHECK-NEXT:    ori $a2, $zero, 3
+; CHECK-NEXT:    st.w $a2, $fp, -24
+; CHECK-NEXT:    ori $a3, $zero, 4
+; CHECK-NEXT:    st.w $a3, $fp, -28
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    st.w $a0, $fp, -20
 ; CHECK-NEXT:    st.w $a1, $fp, -16
-; CHECK-NEXT:    st.w $a2, $fp, -20
-; CHECK-NEXT:    st.w $a3, $fp, -24
-; CHECK-NEXT:    st.w $a4, $fp, -28
+; CHECK-NEXT:    st.w $a2, $fp, -24
+; CHECK-NEXT:    st.w $a3, $fp, -28
 ; CHECK-NEXT:    move $a0, $zero
 ; CHECK-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
 ; CHECK-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected
index 2e063202fcf79..20e34cdf3c64c 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected
@@ -13,11 +13,11 @@ define dso_local i32 @check_boundaries() #0 {
 ; CHECK-NEXT:    .cfi_offset 22, -8
 ; CHECK-NEXT:    addi.w $fp, $sp, 32
 ; CHECK-NEXT:    .cfi_def_cfa 22, 0
-; CHECK-NEXT:    st.w $zero, $fp, -12
 ; CHECK-NEXT:    st.w $zero, $fp, -16
-; CHECK-NEXT:    ori $a0, $zero, 1
+; CHECK-NEXT:    st.w $zero, $fp, -12
 ; CHECK-NEXT:    beqz $zero, .LBB0_3
 ; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    ori $a0, $zero, 1
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    ld.w $a0, $fp, -16
 ; CHECK-NEXT:    beqz $a0, .LBB0_4
@@ -26,9 +26,10 @@ define dso_local i32 @check_boundaries() #0 {
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    b .LBB0_5
 ; CHECK-NEXT:  .LBB0_3:
-; CHECK-NEXT:    st.w $a0, $fp, -16
 ; CHECK-NEXT:    ori $a0, $zero, 2
 ; CHECK-NEXT:    st.w $a0, $fp, -20
+; CHECK-NEXT:    ori $a0, $zero, 1
+; CHECK-NEXT:    st.w $a0, $fp, -16
 ; CHECK-NEXT:    ori $a0, $zero, 3
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    ori $a0, $zero, 4
@@ -36,10 +37,10 @@ define dso_local i32 @check_boundaries() #0 {
 ; CHECK-NEXT:    ld.w $a0, $fp, -16
 ; CHECK-NEXT:    bnez $a0, .LBB0_2
 ; CHECK-NEXT:  .LBB0_4:
-; CHECK-NEXT:    ori $a0, $zero, 1
-; CHECK-NEXT:    st.w $a0, $fp, -16
 ; CHECK-NEXT:    ori $a0, $zero, 2
 ; CHECK-NEXT:    st.w $a0, $fp, -20
+; CHECK-NEXT:    ori $a0, $zero, 1
+; CHECK-NEXT:    st.w $a0, $fp, -16
 ; CHECK-NEXT:    ori $a0, $zero, 3
 ; CHECK-NEXT:    st.w $a0, $fp, -24
 ; CHECK-NEXT:    ori $a0, $zero, 4
@@ -97,24 +98,24 @@ define dso_local i32 @main() #0 {
 ; CHECK-NEXT:    .cfi_offset 22, -8
 ; CHECK-NEXT:    addi.w $fp, $sp, 32
 ; CHECK-NEXT:    .cfi_def_cfa 22, 0
-; CHECK-NEXT:    st.w $zero, $fp, -12
 ; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(x)
 ; CHECK-NEXT:    addi.w $a0, $a0, %pc_lo12(x)
 ; CHECK-NEXT:    ori $a1, $zero, 1
-; CHECK-NEXT:    st.w $a1, $fp, -16
-; CHECK-NEXT:    ori $a2, $zero, 2
-; CHECK-NEXT:    st.w $a2, $fp, -20
-; CHECK-NEXT:    ori $a3, $zero, 3
-; CHECK-NEXT:    st.w $a3, $fp, -24
-; CHECK-NEXT:    ori $a4, $zero, 4
-; CHECK-NEXT:    st.w $a4, $fp, -28
 ; CHECK-NEXT:    st.w $a1, $a0, 0
+; CHECK-NEXT:    st.w $zero, $fp, -12
+; CHECK-NEXT:    st.w $a1, $fp, -16
+; CHECK-NEXT:    ori $a0, $zero, 2
+; CHECK-NEXT:    st.w $a0, $fp, -20
+; CHECK-NEXT:    ori $a2, $zero, 3
+; CHECK-NEXT:    st.w $a2, $fp, -24
+; CHECK-NEXT:    ori $a3, $zero, 4
+; CHECK-NEXT:    st.w $a3, $fp, -28
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    st.w $a0, $fp, -20
 ; CHECK-NEXT:    st.w $a1, $fp, -16
-; CHECK-NEXT:    st.w $a2, $fp, -20
-; CHECK-NEXT:    st.w $a3, $fp, -24
-; CHECK-NEXT:    st.w $a4, $fp, -28
+; CHECK-NEXT:    st.w $a2, $fp, -24
+; CHECK-NEXT:    st.w $a3, $fp, -28
 ; CHECK-NEXT:    move $a0, $zero
 ; CHECK-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
 ; CHECK-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload



More information about the llvm-commits mailing list