[llvm] 2602157 - [LoongArch] Optimize (and (a & ~((2^^X - 1) << Y)) to (bstrins a, zero, X+Y-1, Y)
Weining Lu via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 27 18:10:51 PDT 2023
Author: Weining Lu
Date: 2023-08-28T08:36:54+08:00
New Revision: 26021577d1e3882b30c344baf8c365d7ca4ab873
URL: https://github.com/llvm/llvm-project/commit/26021577d1e3882b30c344baf8c365d7ca4ab873
DIFF: https://github.com/llvm/llvm-project/commit/26021577d1e3882b30c344baf8c365d7ca4ab873.diff
LOG: [LoongArch] Optimize (and (a & ~((2^^X - 1) << Y)) to (bstrins a, zero, X+Y-1, Y)
Inspired by D158384.
Differential Revision: https://reviews.llvm.org/D158832
Added:
Modified:
llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
llvm/test/CodeGen/LoongArch/alloca.ll
llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
llvm/test/CodeGen/LoongArch/ir-instruction/and.ll
llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll
llvm/test/CodeGen/LoongArch/vararg.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index b2c4bb812ba5e9..cabb0c1431c8f2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -519,6 +519,40 @@ def AlslSlliImmI0 : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;
+// Check if (and r, imm) can be optimized to (BSTRINS r, R0, msb, lsb),
+// in which imm = ~((2^^(msb-lsb+1) - 1) << lsb).
+def BstrinsImm : PatLeaf<(imm), [{
+ if (!N->hasOneUse())
+ return false;
+ uint64_t Imm = N->getZExtValue();
+ // andi can be used instead if Imm <= 0xfff.
+ if (Imm <= 0xfff)
+ return false;
+ unsigned MaskIdx, MaskLen;
+ return N->getValueType(0).getSizeInBits() == 32
+ ? llvm::isShiftedMask_32(~Imm, MaskIdx, MaskLen)
+ : llvm::isShiftedMask_64(~Imm, MaskIdx, MaskLen);
+}]>;
+
+def BstrinsMsb: SDNodeXForm<imm, [{
+ uint64_t Imm = N->getZExtValue();
+ unsigned MaskIdx, MaskLen;
+ N->getValueType(0).getSizeInBits() == 32
+ ? llvm::isShiftedMask_32(~Imm, MaskIdx, MaskLen)
+ : llvm::isShiftedMask_64(~Imm, MaskIdx, MaskLen);
+ return CurDAG->getTargetConstant(MaskIdx + MaskLen - 1, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+def BstrinsLsb: SDNodeXForm<imm, [{
+ uint64_t Imm = N->getZExtValue();
+ unsigned MaskIdx, MaskLen;
+ N->getValueType(0).getSizeInBits() == 32
+ ? llvm::isShiftedMask_32(~Imm, MaskIdx, MaskLen)
+ : llvm::isShiftedMask_64(~Imm, MaskIdx, MaskLen);
+ return CurDAG->getTargetConstant(MaskIdx, SDLoc(N), N->getValueType(0));
+}]>;
+
//===----------------------------------------------------------------------===//
// Instruction Formats
//===----------------------------------------------------------------------===//
@@ -1142,6 +1176,18 @@ def : Pat<(not (or GPR:$rj, GPR:$rk)), (NOR GPR:$rj, GPR:$rk)>;
def : Pat<(or GPR:$rj, (not GPR:$rk)), (ORN GPR:$rj, GPR:$rk)>;
def : Pat<(and GPR:$rj, (not GPR:$rk)), (ANDN GPR:$rj, GPR:$rk)>;
+let Predicates = [IsLA32] in {
+def : Pat<(and GPR:$rj, BstrinsImm:$imm),
+ (BSTRINS_W GPR:$rj, R0, (BstrinsMsb BstrinsImm:$imm),
+ (BstrinsLsb BstrinsImm:$imm))>;
+} // Predicates = [IsLA32]
+
+let Predicates = [IsLA64] in {
+def : Pat<(and GPR:$rj, BstrinsImm:$imm),
+ (BSTRINS_D GPR:$rj, R0, (BstrinsMsb BstrinsImm:$imm),
+ (BstrinsLsb BstrinsImm:$imm))>;
+} // Predicates = [IsLA64]
+
/// Traps
// We lower `trap` to `amswap.w rd:$r0, rk:$r1, rj:$r0`, as this is guaranteed
diff --git a/llvm/test/CodeGen/LoongArch/alloca.ll b/llvm/test/CodeGen/LoongArch/alloca.ll
index 22473098e8fbab..d766be6aac9509 100644
--- a/llvm/test/CodeGen/LoongArch/alloca.ll
+++ b/llvm/test/CodeGen/LoongArch/alloca.ll
@@ -17,8 +17,7 @@ define void @simple_alloca(i32 %n) nounwind {
; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
; LA32-NEXT: addi.w $fp, $sp, 16
; LA32-NEXT: addi.w $a0, $a0, 15
-; LA32-NEXT: addi.w $a1, $zero, -16
-; LA32-NEXT: and $a0, $a0, $a1
+; LA32-NEXT: bstrins.w $a0, $zero, 3, 0
; LA32-NEXT: sub.w $a0, $sp, $a0
; LA32-NEXT: move $sp, $a0
; LA32-NEXT: bl %plt(notdead)
@@ -62,10 +61,9 @@ define void @scoped_alloca(i32 %n) nounwind {
; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
; LA32-NEXT: st.w $s0, $sp, 4 # 4-byte Folded Spill
; LA32-NEXT: addi.w $fp, $sp, 16
-; LA32-NEXT: addi.w $a0, $a0, 15
-; LA32-NEXT: addi.w $a1, $zero, -16
-; LA32-NEXT: and $a0, $a0, $a1
; LA32-NEXT: move $s0, $sp
+; LA32-NEXT: addi.w $a0, $a0, 15
+; LA32-NEXT: bstrins.w $a0, $zero, 3, 0
; LA32-NEXT: sub.w $a0, $sp, $a0
; LA32-NEXT: move $sp, $a0
; LA32-NEXT: bl %plt(notdead)
@@ -118,8 +116,7 @@ define void @alloca_callframe(i32 %n) nounwind {
; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
; LA32-NEXT: addi.w $fp, $sp, 16
; LA32-NEXT: addi.w $a0, $a0, 15
-; LA32-NEXT: addi.w $a1, $zero, -16
-; LA32-NEXT: and $a0, $a0, $a1
+; LA32-NEXT: bstrins.w $a0, $zero, 3, 0
; LA32-NEXT: sub.w $a0, $sp, $a0
; LA32-NEXT: move $sp, $a0
; LA32-NEXT: addi.w $sp, $sp, -16
diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
index f11af8fe652842..fba340bed42224 100644
--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
@@ -4,39 +4,38 @@
define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; LA64-LABEL: atomicrmw_uinc_wrap_i8:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: ld.w $a3, $a2, 0
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ld.w $a2, $a0, 0
; LA64-NEXT: ori $a4, $zero, 255
-; LA64-NEXT: sll.w $a4, $a4, $a0
-; LA64-NEXT: andi $a0, $a0, 24
+; LA64-NEXT: sll.w $a4, $a4, $a3
+; LA64-NEXT: andi $a3, $a3, 24
; LA64-NEXT: nor $a4, $a4, $zero
; LA64-NEXT: andi $a1, $a1, 255
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB0_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB0_3 Depth 2
-; LA64-NEXT: srl.w $a5, $a3, $a0
+; LA64-NEXT: srl.w $a5, $a2, $a3
; LA64-NEXT: andi $a6, $a5, 255
; LA64-NEXT: sltu $a6, $a6, $a1
; LA64-NEXT: addi.d $a5, $a5, 1
; LA64-NEXT: xori $a6, $a6, 1
; LA64-NEXT: masknez $a5, $a5, $a6
; LA64-NEXT: andi $a5, $a5, 255
-; LA64-NEXT: sll.w $a5, $a5, $a0
-; LA64-NEXT: and $a6, $a3, $a4
+; LA64-NEXT: sll.w $a5, $a5, $a3
+; LA64-NEXT: and $a6, $a2, $a4
; LA64-NEXT: or $a6, $a6, $a5
; LA64-NEXT: .LBB0_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB0_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a5, $a2, 0
-; LA64-NEXT: bne $a5, $a3, .LBB0_5
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: bne $a5, $a2, .LBB0_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2
; LA64-NEXT: dbar 0
; LA64-NEXT: move $a7, $a6
-; LA64-NEXT: sc.w $a7, $a2, 0
+; LA64-NEXT: sc.w $a7, $a0, 0
; LA64-NEXT: beqz $a7, .LBB0_3
; LA64-NEXT: b .LBB0_6
; LA64-NEXT: .LBB0_5: # %atomicrmw.start
@@ -44,11 +43,11 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB0_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1
-; LA64-NEXT: addi.w $a6, $a3, 0
-; LA64-NEXT: move $a3, $a5
+; LA64-NEXT: addi.w $a6, $a2, 0
+; LA64-NEXT: move $a2, $a5
; LA64-NEXT: bne $a5, $a6, .LBB0_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: srl.w $a0, $a5, $a3
; LA64-NEXT: ret
%result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst
ret i8 %result
@@ -57,40 +56,39 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; LA64-LABEL: atomicrmw_uinc_wrap_i16:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: ld.w $a3, $a2, 0
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ld.w $a2, $a0, 0
; LA64-NEXT: lu12i.w $a4, 15
; LA64-NEXT: ori $a4, $a4, 4095
-; LA64-NEXT: sll.w $a4, $a4, $a0
-; LA64-NEXT: andi $a0, $a0, 24
+; LA64-NEXT: sll.w $a4, $a4, $a3
+; LA64-NEXT: andi $a3, $a3, 24
; LA64-NEXT: nor $a4, $a4, $zero
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB1_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB1_3 Depth 2
-; LA64-NEXT: srl.w $a5, $a3, $a0
+; LA64-NEXT: srl.w $a5, $a2, $a3
; LA64-NEXT: bstrpick.d $a6, $a5, 15, 0
; LA64-NEXT: sltu $a6, $a6, $a1
; LA64-NEXT: addi.d $a5, $a5, 1
; LA64-NEXT: xori $a6, $a6, 1
; LA64-NEXT: masknez $a5, $a5, $a6
; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
-; LA64-NEXT: sll.w $a5, $a5, $a0
-; LA64-NEXT: and $a6, $a3, $a4
+; LA64-NEXT: sll.w $a5, $a5, $a3
+; LA64-NEXT: and $a6, $a2, $a4
; LA64-NEXT: or $a6, $a6, $a5
; LA64-NEXT: .LBB1_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB1_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a5, $a2, 0
-; LA64-NEXT: bne $a5, $a3, .LBB1_5
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: bne $a5, $a2, .LBB1_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2
; LA64-NEXT: dbar 0
; LA64-NEXT: move $a7, $a6
-; LA64-NEXT: sc.w $a7, $a2, 0
+; LA64-NEXT: sc.w $a7, $a0, 0
; LA64-NEXT: beqz $a7, .LBB1_3
; LA64-NEXT: b .LBB1_6
; LA64-NEXT: .LBB1_5: # %atomicrmw.start
@@ -98,11 +96,11 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB1_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1
-; LA64-NEXT: addi.w $a6, $a3, 0
-; LA64-NEXT: move $a3, $a5
+; LA64-NEXT: addi.w $a6, $a2, 0
+; LA64-NEXT: move $a2, $a5
; LA64-NEXT: bne $a5, $a6, .LBB1_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: srl.w $a0, $a5, $a3
; LA64-NEXT: ret
%result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst
ret i16 %result
@@ -189,20 +187,19 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; LA64-LABEL: atomicrmw_udec_wrap_i8:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: ld.w $a3, $a2, 0
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ld.w $a2, $a0, 0
; LA64-NEXT: ori $a4, $zero, 255
-; LA64-NEXT: sll.w $a4, $a4, $a0
-; LA64-NEXT: andi $a0, $a0, 24
+; LA64-NEXT: sll.w $a4, $a4, $a3
+; LA64-NEXT: andi $a3, $a3, 24
; LA64-NEXT: nor $a4, $a4, $zero
; LA64-NEXT: andi $a5, $a1, 255
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB4_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB4_3 Depth 2
-; LA64-NEXT: srl.w $a6, $a3, $a0
+; LA64-NEXT: srl.w $a6, $a2, $a3
; LA64-NEXT: andi $a7, $a6, 255
; LA64-NEXT: sltu $t0, $a5, $a7
; LA64-NEXT: addi.d $a6, $a6, -1
@@ -214,19 +211,19 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: maskeqz $a7, $a1, $a7
; LA64-NEXT: or $a6, $a7, $a6
; LA64-NEXT: andi $a6, $a6, 255
-; LA64-NEXT: sll.w $a6, $a6, $a0
-; LA64-NEXT: and $a7, $a3, $a4
+; LA64-NEXT: sll.w $a6, $a6, $a3
+; LA64-NEXT: and $a7, $a2, $a4
; LA64-NEXT: or $a7, $a7, $a6
; LA64-NEXT: .LBB4_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB4_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a6, $a2, 0
-; LA64-NEXT: bne $a6, $a3, .LBB4_5
+; LA64-NEXT: ll.w $a6, $a0, 0
+; LA64-NEXT: bne $a6, $a2, .LBB4_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2
; LA64-NEXT: dbar 0
; LA64-NEXT: move $t0, $a7
-; LA64-NEXT: sc.w $t0, $a2, 0
+; LA64-NEXT: sc.w $t0, $a0, 0
; LA64-NEXT: beqz $t0, .LBB4_3
; LA64-NEXT: b .LBB4_6
; LA64-NEXT: .LBB4_5: # %atomicrmw.start
@@ -234,11 +231,11 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB4_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
-; LA64-NEXT: addi.w $a7, $a3, 0
-; LA64-NEXT: move $a3, $a6
+; LA64-NEXT: addi.w $a7, $a2, 0
+; LA64-NEXT: move $a2, $a6
; LA64-NEXT: bne $a6, $a7, .LBB4_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a6, $a0
+; LA64-NEXT: srl.w $a0, $a6, $a3
; LA64-NEXT: ret
%result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst
ret i8 %result
@@ -247,21 +244,20 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; LA64-LABEL: atomicrmw_udec_wrap_i16:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: ld.w $a3, $a2, 0
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: ld.w $a2, $a0, 0
; LA64-NEXT: lu12i.w $a4, 15
; LA64-NEXT: ori $a4, $a4, 4095
-; LA64-NEXT: sll.w $a4, $a4, $a0
-; LA64-NEXT: andi $a0, $a0, 24
+; LA64-NEXT: sll.w $a4, $a4, $a3
+; LA64-NEXT: andi $a3, $a3, 24
; LA64-NEXT: nor $a4, $a4, $zero
; LA64-NEXT: bstrpick.d $a5, $a1, 15, 0
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB5_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB5_3 Depth 2
-; LA64-NEXT: srl.w $a6, $a3, $a0
+; LA64-NEXT: srl.w $a6, $a2, $a3
; LA64-NEXT: bstrpick.d $a7, $a6, 15, 0
; LA64-NEXT: sltu $t0, $a5, $a7
; LA64-NEXT: addi.d $a6, $a6, -1
@@ -273,19 +269,19 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: maskeqz $a7, $a1, $a7
; LA64-NEXT: or $a6, $a7, $a6
; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0
-; LA64-NEXT: sll.w $a6, $a6, $a0
-; LA64-NEXT: and $a7, $a3, $a4
+; LA64-NEXT: sll.w $a6, $a6, $a3
+; LA64-NEXT: and $a7, $a2, $a4
; LA64-NEXT: or $a7, $a7, $a6
; LA64-NEXT: .LBB5_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB5_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a6, $a2, 0
-; LA64-NEXT: bne $a6, $a3, .LBB5_5
+; LA64-NEXT: ll.w $a6, $a0, 0
+; LA64-NEXT: bne $a6, $a2, .LBB5_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2
; LA64-NEXT: dbar 0
; LA64-NEXT: move $t0, $a7
-; LA64-NEXT: sc.w $t0, $a2, 0
+; LA64-NEXT: sc.w $t0, $a0, 0
; LA64-NEXT: beqz $t0, .LBB5_3
; LA64-NEXT: b .LBB5_6
; LA64-NEXT: .LBB5_5: # %atomicrmw.start
@@ -293,11 +289,11 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB5_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
-; LA64-NEXT: addi.w $a7, $a3, 0
-; LA64-NEXT: move $a3, $a6
+; LA64-NEXT: addi.w $a7, $a2, 0
+; LA64-NEXT: move $a2, $a6
; LA64-NEXT: bne $a6, $a7, .LBB5_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a6, $a0
+; LA64-NEXT: srl.w $a0, $a6, $a3
; LA64-NEXT: ret
%result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst
ret i16 %result
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll
index cc9dec19c8a344..b43ed7859cde8f 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll
@@ -369,20 +369,15 @@ define i64 @and_i64_0xff000(i64 %a) {
ret i64 %b
}
-;; This case is not optimized to `bstrpick + slli`,
-;; since the immediate -2048 can be composed via
-;; a single `addi.w $rx, $zero, -2048`.
define i64 @and_i64_minus_2048(i64 %a) {
; LA32-LABEL: and_i64_minus_2048:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -2048
-; LA32-NEXT: and $a0, $a0, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 10, 0
; LA32-NEXT: ret
;
; LA64-LABEL: and_i64_minus_2048:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a1, $zero, -2048
-; LA64-NEXT: and $a0, $a0, $a1
+; LA64-NEXT: bstrins.d $a0, $zero, 10, 0
; LA64-NEXT: ret
%b = and i64 %a, -2048
ret i64 %b
@@ -425,20 +420,15 @@ define i64 @and_i64_0xfff0_multiple_times(i64 %a, i64 %b, i64 %c) {
ret i64 %i
}
-;; TODO: this can be codegened to bstrins.[wd] $a0, $zero, 23, 16.
define i64 @and_i64_0xffffffffff00ffff(i64 %a) {
; LA32-LABEL: and_i64_0xffffffffff00ffff:
; LA32: # %bb.0:
-; LA32-NEXT: lu12i.w $a2, -4081
-; LA32-NEXT: ori $a2, $a2, 4095
-; LA32-NEXT: and $a0, $a0, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 23, 16
; LA32-NEXT: ret
;
; LA64-LABEL: and_i64_0xffffffffff00ffff:
; LA64: # %bb.0:
-; LA64-NEXT: lu12i.w $a1, -4081
-; LA64-NEXT: ori $a1, $a1, 4095
-; LA64-NEXT: and $a0, $a0, $a1
+; LA64-NEXT: bstrins.d $a0, $zero, 23, 16
; LA64-NEXT: ret
%b = and i64 %a, 18446744073692839935
ret i64 %b
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
index 76e51fe7d3e850..4e458e989c27e5 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
@@ -4,27 +4,26 @@
define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; LA64-LABEL: cmpxchg_i8_acquire_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a3, $zero, -4
-; LA64-NEXT: and $a3, $a0, $a3
-; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: andi $a2, $a2, 255
-; LA64-NEXT: sll.w $a2, $a2, $a0
+; LA64-NEXT: sll.w $a2, $a2, $a3
; LA64-NEXT: ori $a4, $zero, 255
-; LA64-NEXT: sll.w $a0, $a4, $a0
-; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: sll.w $a3, $a4, $a3
+; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a4, $a3, 0
-; LA64-NEXT: and $a5, $a4, $a0
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a5, $a4, $a3
; LA64-NEXT: bne $a5, $a1, .LBB0_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: andn $a5, $a4, $a0
+; LA64-NEXT: andn $a5, $a4, $a3
; LA64-NEXT: or $a5, $a5, $a2
-; LA64-NEXT: sc.w $a5, $a3, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB0_1
; LA64-NEXT: b .LBB0_4
; LA64-NEXT: .LBB0_3:
@@ -38,28 +37,27 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind {
; LA64-LABEL: cmpxchg_i16_acquire_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a3, $zero, -4
-; LA64-NEXT: and $a3, $a0, $a3
-; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0
-; LA64-NEXT: sll.w $a2, $a2, $a0
+; LA64-NEXT: sll.w $a2, $a2, $a3
; LA64-NEXT: lu12i.w $a4, 15
; LA64-NEXT: ori $a4, $a4, 4095
-; LA64-NEXT: sll.w $a0, $a4, $a0
-; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: sll.w $a3, $a4, $a3
+; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a4, $a3, 0
-; LA64-NEXT: and $a5, $a4, $a0
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a5, $a4, $a3
; LA64-NEXT: bne $a5, $a1, .LBB1_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: andn $a5, $a4, $a0
+; LA64-NEXT: andn $a5, $a4, $a3
; LA64-NEXT: or $a5, $a5, $a2
-; LA64-NEXT: sc.w $a5, $a3, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB1_1
; LA64-NEXT: b .LBB1_4
; LA64-NEXT: .LBB1_3:
@@ -113,33 +111,32 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a3, $zero, -4
-; LA64-NEXT: and $a3, $a0, $a3
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a3, $a0, 3
; LA64-NEXT: ori $a4, $zero, 255
-; LA64-NEXT: sll.w $a4, $a4, $a0
-; LA64-NEXT: addi.w $a4, $a4, 0
+; LA64-NEXT: sll.w $a4, $a4, $a3
; LA64-NEXT: andi $a2, $a2, 255
-; LA64-NEXT: sll.w $a2, $a2, $a0
+; LA64-NEXT: addi.w $a4, $a4, 0
+; LA64-NEXT: sll.w $a2, $a2, $a3
; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a5, $a3, 0
+; LA64-NEXT: ll.w $a5, $a0, 0
; LA64-NEXT: and $a6, $a5, $a4
; LA64-NEXT: bne $a6, $a1, .LBB4_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
; LA64-NEXT: dbar 0
; LA64-NEXT: andn $a6, $a5, $a4
; LA64-NEXT: or $a6, $a6, $a2
-; LA64-NEXT: sc.w $a6, $a3, 0
+; LA64-NEXT: sc.w $a6, $a0, 0
; LA64-NEXT: beqz $a6, .LBB4_1
; LA64-NEXT: b .LBB4_4
; LA64-NEXT: .LBB4_3:
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB4_4:
-; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: srl.w $a0, $a5, $a3
; LA64-NEXT: ret
%tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire
%res = extractvalue { i8, i1 } %tmp, 0
@@ -149,34 +146,33 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind
define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nounwind {
; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti16:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a3, $zero, -4
-; LA64-NEXT: and $a3, $a0, $a3
-; LA64-NEXT: slli.d $a0, $a0, 3
-; LA64-NEXT: lu12i.w $a4, 15
-; LA64-NEXT: ori $a4, $a4, 4095
-; LA64-NEXT: sll.w $a4, $a4, $a0
-; LA64-NEXT: addi.w $a4, $a4, 0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: slli.d $a4, $a0, 3
+; LA64-NEXT: sll.w $a3, $a3, $a4
; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0
-; LA64-NEXT: sll.w $a2, $a2, $a0
+; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: sll.w $a2, $a2, $a4
; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a4
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a5, $a3, 0
-; LA64-NEXT: and $a6, $a5, $a4
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a6, $a5, $a3
; LA64-NEXT: bne $a6, $a1, .LBB5_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: andn $a6, $a5, $a4
+; LA64-NEXT: andn $a6, $a5, $a3
; LA64-NEXT: or $a6, $a6, $a2
-; LA64-NEXT: sc.w $a6, $a3, 0
+; LA64-NEXT: sc.w $a6, $a0, 0
; LA64-NEXT: beqz $a6, .LBB5_1
; LA64-NEXT: b .LBB5_4
; LA64-NEXT: .LBB5_3:
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB5_4:
-; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: srl.w $a0, $a5, $a4
; LA64-NEXT: ret
%tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire
%res = extractvalue { i16, i1 } %tmp, 0
@@ -230,27 +226,26 @@ define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nou
define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind {
; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti1:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a3, $zero, -4
-; LA64-NEXT: and $a3, $a0, $a3
-; LA64-NEXT: slli.d $a0, $a0, 3
-; LA64-NEXT: ori $a4, $zero, 255
-; LA64-NEXT: sll.w $a4, $a4, $a0
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: ori $a4, $zero, 255
+; LA64-NEXT: sll.w $a4, $a4, $a3
+; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: andi $a2, $a2, 255
-; LA64-NEXT: sll.w $a0, $a2, $a0
-; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: sll.w $a2, $a2, $a3
+; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: addi.w $a1, $a1, 0
-; LA64-NEXT: addi.w $a2, $a4, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: addi.w $a3, $a4, 0
; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a5, $a3, 0
-; LA64-NEXT: and $a6, $a5, $a2
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a6, $a5, $a3
; LA64-NEXT: bne $a6, $a1, .LBB8_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: andn $a6, $a5, $a2
-; LA64-NEXT: or $a6, $a6, $a0
-; LA64-NEXT: sc.w $a6, $a3, 0
+; LA64-NEXT: andn $a6, $a5, $a3
+; LA64-NEXT: or $a6, $a6, $a2
+; LA64-NEXT: sc.w $a6, $a0, 0
; LA64-NEXT: beqz $a6, .LBB8_1
; LA64-NEXT: b .LBB8_4
; LA64-NEXT: .LBB8_3:
@@ -269,34 +264,33 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind
define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounwind {
; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti1:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a3, $zero, -4
-; LA64-NEXT: and $a3, $a0, $a3
-; LA64-NEXT: slli.d $a0, $a0, 3
-; LA64-NEXT: lu12i.w $a4, 15
-; LA64-NEXT: ori $a4, $a4, 4095
-; LA64-NEXT: sll.w $a4, $a4, $a0
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: slli.d $a4, $a0, 3
+; LA64-NEXT: sll.w $a3, $a3, $a4
+; LA64-NEXT: sll.w $a1, $a1, $a4
; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0
-; LA64-NEXT: sll.w $a0, $a2, $a0
-; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: sll.w $a2, $a2, $a4
+; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: addi.w $a1, $a1, 0
-; LA64-NEXT: addi.w $a2, $a4, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: addi.w $a4, $a3, 0
; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a5, $a3, 0
-; LA64-NEXT: and $a6, $a5, $a2
+; LA64-NEXT: ll.w $a5, $a0, 0
+; LA64-NEXT: and $a6, $a5, $a4
; LA64-NEXT: bne $a6, $a1, .LBB9_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: andn $a6, $a5, $a2
-; LA64-NEXT: or $a6, $a6, $a0
-; LA64-NEXT: sc.w $a6, $a3, 0
+; LA64-NEXT: andn $a6, $a5, $a4
+; LA64-NEXT: or $a6, $a6, $a2
+; LA64-NEXT: sc.w $a6, $a0, 0
; LA64-NEXT: beqz $a6, .LBB9_1
; LA64-NEXT: b .LBB9_4
; LA64-NEXT: .LBB9_3:
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB9_4:
-; LA64-NEXT: and $a0, $a5, $a4
+; LA64-NEXT: and $a0, $a5, $a3
; LA64-NEXT: addi.w $a0, $a0, 0
; LA64-NEXT: xor $a0, $a1, $a0
; LA64-NEXT: sltui $a0, $a0, 1
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
index cd4a9e7fa9c4ff..d1fb9ede9a9ae2 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
@@ -7,18 +7,17 @@
define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-LABEL: atomicrmw_umax_i8_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: ori $a3, $zero, 255
-; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: and $a6, $a4, $a3
; LA64-NEXT: move $a5, $a4
; LA64-NEXT: bgeu $a6, $a1, .LBB0_3
@@ -27,12 +26,12 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: .LBB0_3: # in Loop: Header=BB0_1 Depth=1
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB0_1
; LA64-NEXT: # %bb.4:
; LA64-NEXT: dbar 1792
; LA64-NEXT: # %bb.5:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a2
; LA64-NEXT: ret
%1 = atomicrmw umax ptr %a, i8 %b acquire
ret i8 %1
@@ -41,33 +40,32 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
; LA64-LABEL: atomicrmw_umax_i16_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
-; LA64-NEXT: lu12i.w $a3, 15
-; LA64-NEXT: ori $a3, $a3, 4095
-; LA64-NEXT: sll.w $a3, $a3, $a0
-; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: sll.w $a2, $a2, $a3
+; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a4, $a2, 0
-; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a2
; LA64-NEXT: move $a5, $a4
; LA64-NEXT: bgeu $a6, $a1, .LBB1_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
; LA64-NEXT: xor $a5, $a4, $a1
-; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: and $a5, $a5, $a2
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: .LBB1_3: # in Loop: Header=BB1_1 Depth=1
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB1_1
; LA64-NEXT: # %bb.4:
; LA64-NEXT: dbar 1792
; LA64-NEXT: # %bb.5:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a3
; LA64-NEXT: ret
%1 = atomicrmw umax ptr %a, i16 %b acquire
ret i16 %1
@@ -96,18 +94,17 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-LABEL: atomicrmw_umin_i8_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: ori $a3, $zero, 255
-; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: and $a6, $a4, $a3
; LA64-NEXT: move $a5, $a4
; LA64-NEXT: bgeu $a1, $a6, .LBB4_3
@@ -116,12 +113,12 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: .LBB4_3: # in Loop: Header=BB4_1 Depth=1
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB4_1
; LA64-NEXT: # %bb.4:
; LA64-NEXT: dbar 1792
; LA64-NEXT: # %bb.5:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a2
; LA64-NEXT: ret
%1 = atomicrmw umin ptr %a, i8 %b acquire
ret i8 %1
@@ -130,33 +127,32 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
; LA64-LABEL: atomicrmw_umin_i16_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
-; LA64-NEXT: lu12i.w $a3, 15
-; LA64-NEXT: ori $a3, $a3, 4095
-; LA64-NEXT: sll.w $a3, $a3, $a0
-; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: sll.w $a2, $a2, $a3
+; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a4, $a2, 0
-; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: ll.w $a4, $a0, 0
+; LA64-NEXT: and $a6, $a4, $a2
; LA64-NEXT: move $a5, $a4
; LA64-NEXT: bgeu $a1, $a6, .LBB5_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
; LA64-NEXT: xor $a5, $a4, $a1
-; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: and $a5, $a5, $a2
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: .LBB5_3: # in Loop: Header=BB5_1 Depth=1
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB5_1
; LA64-NEXT: # %bb.4:
; LA64-NEXT: dbar 1792
; LA64-NEXT: # %bb.5:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a3
; LA64-NEXT: ret
%1 = atomicrmw umin ptr %a, i16 %b acquire
ret i16 %1
@@ -185,20 +181,19 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-LABEL: atomicrmw_max_i8_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: ori $a3, $zero, 255
-; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: ext.w.b $a1, $a1
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: addi.w $a1, $a1, 0
-; LA64-NEXT: andi $a4, $a0, 24
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a4, $a2, 24
; LA64-NEXT: xori $a4, $a4, 56
; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a5, $a2, 0
+; LA64-NEXT: ll.w $a5, $a0, 0
; LA64-NEXT: and $a7, $a5, $a3
; LA64-NEXT: move $a6, $a5
; LA64-NEXT: sll.w $a7, $a7, $a4
@@ -209,12 +204,12 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-NEXT: and $a6, $a6, $a3
; LA64-NEXT: xor $a6, $a5, $a6
; LA64-NEXT: .LBB8_3: # in Loop: Header=BB8_1 Depth=1
-; LA64-NEXT: sc.w $a6, $a2, 0
+; LA64-NEXT: sc.w $a6, $a0, 0
; LA64-NEXT: beqz $a6, .LBB8_1
; LA64-NEXT: # %bb.4:
; LA64-NEXT: dbar 1792
; LA64-NEXT: # %bb.5:
-; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: srl.w $a0, $a5, $a2
; LA64-NEXT: ret
%1 = atomicrmw max ptr %a, i8 %b acquire
ret i8 %1
@@ -223,22 +218,21 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
; LA64-LABEL: atomicrmw_max_i16_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
-; LA64-NEXT: andi $a3, $a0, 24
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: andi $a3, $a2, 24
; LA64-NEXT: ori $a4, $zero, 48
; LA64-NEXT: sub.d $a3, $a4, $a3
; LA64-NEXT: lu12i.w $a4, 15
; LA64-NEXT: ori $a4, $a4, 4095
-; LA64-NEXT: sll.w $a4, $a4, $a0
+; LA64-NEXT: sll.w $a4, $a4, $a2
; LA64-NEXT: addi.w $a4, $a4, 0
; LA64-NEXT: ext.w.h $a1, $a1
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a5, $a2, 0
+; LA64-NEXT: ll.w $a5, $a0, 0
; LA64-NEXT: and $a7, $a5, $a4
; LA64-NEXT: move $a6, $a5
; LA64-NEXT: sll.w $a7, $a7, $a3
@@ -249,12 +243,12 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
; LA64-NEXT: and $a6, $a6, $a4
; LA64-NEXT: xor $a6, $a5, $a6
; LA64-NEXT: .LBB9_3: # in Loop: Header=BB9_1 Depth=1
-; LA64-NEXT: sc.w $a6, $a2, 0
+; LA64-NEXT: sc.w $a6, $a0, 0
; LA64-NEXT: beqz $a6, .LBB9_1
; LA64-NEXT: # %bb.4:
; LA64-NEXT: dbar 1792
; LA64-NEXT: # %bb.5:
-; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: srl.w $a0, $a5, $a2
; LA64-NEXT: ret
%1 = atomicrmw max ptr %a, i16 %b acquire
ret i16 %1
@@ -283,20 +277,19 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-LABEL: atomicrmw_min_i8_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: ori $a3, $zero, 255
-; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: ext.w.b $a1, $a1
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: addi.w $a1, $a1, 0
-; LA64-NEXT: andi $a4, $a0, 24
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: andi $a4, $a2, 24
; LA64-NEXT: xori $a4, $a4, 56
; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a5, $a2, 0
+; LA64-NEXT: ll.w $a5, $a0, 0
; LA64-NEXT: and $a7, $a5, $a3
; LA64-NEXT: move $a6, $a5
; LA64-NEXT: sll.w $a7, $a7, $a4
@@ -307,12 +300,12 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-NEXT: and $a6, $a6, $a3
; LA64-NEXT: xor $a6, $a5, $a6
; LA64-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1
-; LA64-NEXT: sc.w $a6, $a2, 0
+; LA64-NEXT: sc.w $a6, $a0, 0
; LA64-NEXT: beqz $a6, .LBB12_1
; LA64-NEXT: # %bb.4:
; LA64-NEXT: dbar 1792
; LA64-NEXT: # %bb.5:
-; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: srl.w $a0, $a5, $a2
; LA64-NEXT: ret
%1 = atomicrmw min ptr %a, i8 %b acquire
ret i8 %1
@@ -321,22 +314,21 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
; LA64-LABEL: atomicrmw_min_i16_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
-; LA64-NEXT: andi $a3, $a0, 24
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: andi $a3, $a2, 24
; LA64-NEXT: ori $a4, $zero, 48
; LA64-NEXT: sub.d $a3, $a4, $a3
; LA64-NEXT: lu12i.w $a4, 15
; LA64-NEXT: ori $a4, $a4, 4095
-; LA64-NEXT: sll.w $a4, $a4, $a0
+; LA64-NEXT: sll.w $a4, $a4, $a2
; LA64-NEXT: addi.w $a4, $a4, 0
; LA64-NEXT: ext.w.h $a1, $a1
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a5, $a2, 0
+; LA64-NEXT: ll.w $a5, $a0, 0
; LA64-NEXT: and $a7, $a5, $a4
; LA64-NEXT: move $a6, $a5
; LA64-NEXT: sll.w $a7, $a7, $a3
@@ -347,12 +339,12 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
; LA64-NEXT: and $a6, $a6, $a4
; LA64-NEXT: xor $a6, $a5, $a6
; LA64-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1
-; LA64-NEXT: sc.w $a6, $a2, 0
+; LA64-NEXT: sc.w $a6, $a0, 0
; LA64-NEXT: beqz $a6, .LBB13_1
; LA64-NEXT: # %bb.4:
; LA64-NEXT: dbar 1792
; LA64-NEXT: # %bb.5:
-; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: srl.w $a0, $a5, $a2
; LA64-NEXT: ret
%1 = atomicrmw min ptr %a, i16 %b acquire
ret i16 %1
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
index c077d14f728f7e..b3274d474a1a28 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
@@ -5,48 +5,46 @@
define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
; LA32-LABEL: atomicrmw_xchg_i8_acquire:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
+; LA32-NEXT: slli.w $a2, $a0, 3
; LA32-NEXT: ori $a3, $zero, 255
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: sll.w $a3, $a3, $a2
; LA32-NEXT: andi $a1, $a1, 255
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: addi.w $a5, $a1, 0
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB0_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_xchg_i8_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: ori $a3, $zero, 255
-; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: addi.w $a5, $a1, 0
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB0_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a2
; LA64-NEXT: ret
%1 = atomicrmw xchg ptr %a, i8 %b acquire
ret i8 %1
@@ -55,50 +53,48 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
; LA32-LABEL: atomicrmw_xchg_i16_acquire:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
-; LA32-NEXT: lu12i.w $a3, 15
-; LA32-NEXT: ori $a3, $a3, 4095
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: lu12i.w $a2, 15
+; LA32-NEXT: ori $a2, $a2, 4095
+; LA32-NEXT: slli.w $a3, $a0, 3
+; LA32-NEXT: sll.w $a2, $a2, $a3
; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a3
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: addi.w $a5, $a1, 0
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: and $a5, $a5, $a3
+; LA32-NEXT: and $a5, $a5, $a2
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB1_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a3
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_xchg_i16_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
-; LA64-NEXT: lu12i.w $a3, 15
-; LA64-NEXT: ori $a3, $a3, 4095
-; LA64-NEXT: sll.w $a3, $a3, $a0
-; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: sll.w $a2, $a2, $a3
+; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: addi.w $a5, $a1, 0
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: and $a5, $a5, $a2
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB1_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a3
; LA64-NEXT: ret
%1 = atomicrmw xchg ptr %a, i16 %b acquire
ret i16 %1
@@ -149,48 +145,46 @@ define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind {
define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
; LA32-LABEL: atomicrmw_add_i8_acquire:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
+; LA32-NEXT: slli.w $a2, $a0, 3
; LA32-NEXT: ori $a3, $zero, 255
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: sll.w $a3, $a3, $a2
; LA32-NEXT: andi $a1, $a1, 255
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: add.w $a5, $a4, $a1
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB4_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_add_i8_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: ori $a3, $zero, 255
-; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: add.w $a5, $a4, $a1
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB4_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a2
; LA64-NEXT: ret
%1 = atomicrmw add ptr %a, i8 %b acquire
ret i8 %1
@@ -199,50 +193,48 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
; LA32-LABEL: atomicrmw_add_i16_acquire:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
-; LA32-NEXT: lu12i.w $a3, 15
-; LA32-NEXT: ori $a3, $a3, 4095
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: lu12i.w $a2, 15
+; LA32-NEXT: ori $a2, $a2, 4095
+; LA32-NEXT: slli.w $a3, $a0, 3
+; LA32-NEXT: sll.w $a2, $a2, $a3
; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a3
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: add.w $a5, $a4, $a1
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: and $a5, $a5, $a3
+; LA32-NEXT: and $a5, $a5, $a2
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB5_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a3
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_add_i16_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
-; LA64-NEXT: lu12i.w $a3, 15
-; LA64-NEXT: ori $a3, $a3, 4095
-; LA64-NEXT: sll.w $a3, $a3, $a0
-; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: sll.w $a2, $a2, $a3
+; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: add.w $a5, $a4, $a1
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: and $a5, $a5, $a2
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB5_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a3
; LA64-NEXT: ret
%1 = atomicrmw add ptr %a, i16 %b acquire
ret i16 %1
@@ -293,48 +285,46 @@ define i64 @atomicrmw_add_i64_acquire(ptr %a, i64 %b) nounwind {
define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
; LA32-LABEL: atomicrmw_sub_i8_acquire:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
+; LA32-NEXT: slli.w $a2, $a0, 3
; LA32-NEXT: ori $a3, $zero, 255
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: sll.w $a3, $a3, $a2
; LA32-NEXT: andi $a1, $a1, 255
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: sub.w $a5, $a4, $a1
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB8_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_sub_i8_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: ori $a3, $zero, 255
-; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: sub.w $a5, $a4, $a1
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB8_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a2
; LA64-NEXT: ret
%1 = atomicrmw sub ptr %a, i8 %b acquire
ret i8 %1
@@ -343,50 +333,48 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
; LA32-LABEL: atomicrmw_sub_i16_acquire:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
-; LA32-NEXT: lu12i.w $a3, 15
-; LA32-NEXT: ori $a3, $a3, 4095
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: lu12i.w $a2, 15
+; LA32-NEXT: ori $a2, $a2, 4095
+; LA32-NEXT: slli.w $a3, $a0, 3
+; LA32-NEXT: sll.w $a2, $a2, $a3
; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a3
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: sub.w $a5, $a4, $a1
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: and $a5, $a5, $a3
+; LA32-NEXT: and $a5, $a5, $a2
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB9_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a3
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_sub_i16_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
-; LA64-NEXT: lu12i.w $a3, 15
-; LA64-NEXT: ori $a3, $a3, 4095
-; LA64-NEXT: sll.w $a3, $a3, $a0
-; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: sll.w $a2, $a2, $a3
+; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: sub.w $a5, $a4, $a1
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: and $a5, $a5, $a2
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB9_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a3
; LA64-NEXT: ret
%1 = atomicrmw sub ptr %a, i16 %b acquire
ret i16 %1
@@ -439,50 +427,48 @@ define i64 @atomicrmw_sub_i64_acquire(ptr %a, i64 %b) nounwind {
define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
; LA32-LABEL: atomicrmw_nand_i8_acquire:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
+; LA32-NEXT: slli.w $a2, $a0, 3
; LA32-NEXT: ori $a3, $zero, 255
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: sll.w $a3, $a3, $a2
; LA32-NEXT: andi $a1, $a1, 255
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: and $a5, $a4, $a1
; LA32-NEXT: nor $a5, $a5, $zero
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB12_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_nand_i8_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: ori $a3, $zero, 255
-; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: and $a5, $a4, $a1
; LA64-NEXT: nor $a5, $a5, $zero
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB12_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a2
; LA64-NEXT: ret
%1 = atomicrmw nand ptr %a, i8 %b acquire
ret i8 %1
@@ -491,52 +477,50 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
; LA32-LABEL: atomicrmw_nand_i16_acquire:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
-; LA32-NEXT: lu12i.w $a3, 15
-; LA32-NEXT: ori $a3, $a3, 4095
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: lu12i.w $a2, 15
+; LA32-NEXT: ori $a2, $a2, 4095
+; LA32-NEXT: slli.w $a3, $a0, 3
+; LA32-NEXT: sll.w $a2, $a2, $a3
; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a3
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: and $a5, $a4, $a1
; LA32-NEXT: nor $a5, $a5, $zero
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: and $a5, $a5, $a3
+; LA32-NEXT: and $a5, $a5, $a2
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB13_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a3
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_nand_i16_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
-; LA64-NEXT: lu12i.w $a3, 15
-; LA64-NEXT: ori $a3, $a3, 4095
-; LA64-NEXT: sll.w $a3, $a3, $a0
-; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: sll.w $a2, $a2, $a3
+; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: and $a5, $a4, $a1
; LA64-NEXT: nor $a5, $a5, $zero
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: and $a5, $a5, $a2
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB13_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a3
; LA64-NEXT: ret
%1 = atomicrmw nand ptr %a, i16 %b acquire
ret i16 %1
@@ -608,8 +592,7 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
; LA32-NEXT: andi $a1, $a1, 255
; LA32-NEXT: sll.w $a1, $a1, $a2
; LA32-NEXT: orn $a1, $a1, $a3
-; LA32-NEXT: addi.w $a3, $zero, -4
-; LA32-NEXT: and $a0, $a0, $a3
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
; LA32-NEXT: ll.w $a3, $a0, 0
@@ -628,8 +611,7 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-NEXT: andi $a1, $a1, 255
; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: orn $a1, $a1, $a3
-; LA64-NEXT: addi.w $a3, $zero, -4
-; LA64-NEXT: and $a0, $a0, $a3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: amand_db.w $a3, $a1, $a0
; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
@@ -647,8 +629,7 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
; LA32-NEXT: sll.w $a1, $a1, $a3
; LA32-NEXT: orn $a1, $a1, $a2
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a0, $a0, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
; LA32-NEXT: ll.w $a2, $a0, 0
@@ -668,8 +649,7 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: orn $a1, $a1, $a2
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a0, $a0, $a2
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: amand_db.w $a2, $a1, $a0
; LA64-NEXT: srl.w $a0, $a2, $a3
; LA64-NEXT: ret
@@ -722,30 +702,28 @@ define i64 @atomicrmw_and_i64_acquire(ptr %a, i64 %b) nounwind {
define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
; LA32-LABEL: atomicrmw_or_i8_acquire:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
; LA32-NEXT: andi $a1, $a1, 255
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: slli.w $a2, $a0, 3
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
-; LA32-NEXT: ll.w $a3, $a2, 0
+; LA32-NEXT: ll.w $a3, $a0, 0
; LA32-NEXT: or $a4, $a3, $a1
-; LA32-NEXT: sc.w $a4, $a2, 0
+; LA32-NEXT: sc.w $a4, $a0, 0
; LA32-NEXT: beqz $a4, .LBB20_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a3, $a0
+; LA32-NEXT: srl.w $a0, $a3, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_or_i8_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
-; LA64-NEXT: amor_db.w $a3, $a1, $a2
-; LA64-NEXT: srl.w $a0, $a3, $a0
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%1 = atomicrmw or ptr %a, i8 %b acquire
ret i8 %1
@@ -754,30 +732,28 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
; LA32-LABEL: atomicrmw_or_i16_acquire:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: slli.w $a2, $a0, 3
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
-; LA32-NEXT: ll.w $a3, $a2, 0
+; LA32-NEXT: ll.w $a3, $a0, 0
; LA32-NEXT: or $a4, $a3, $a1
-; LA32-NEXT: sc.w $a4, $a2, 0
+; LA32-NEXT: sc.w $a4, $a0, 0
; LA32-NEXT: beqz $a4, .LBB21_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a3, $a0
+; LA32-NEXT: srl.w $a0, $a3, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_or_i16_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
-; LA64-NEXT: amor_db.w $a3, $a1, $a2
-; LA64-NEXT: srl.w $a0, $a3, $a0
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%1 = atomicrmw or ptr %a, i16 %b acquire
ret i16 %1
@@ -828,30 +804,28 @@ define i64 @atomicrmw_or_i64_acquire(ptr %a, i64 %b) nounwind {
define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
; LA32-LABEL: atomicrmw_xor_i8_acquire:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
; LA32-NEXT: andi $a1, $a1, 255
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: slli.w $a2, $a0, 3
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
-; LA32-NEXT: ll.w $a3, $a2, 0
+; LA32-NEXT: ll.w $a3, $a0, 0
; LA32-NEXT: xor $a4, $a3, $a1
-; LA32-NEXT: sc.w $a4, $a2, 0
+; LA32-NEXT: sc.w $a4, $a0, 0
; LA32-NEXT: beqz $a4, .LBB24_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a3, $a0
+; LA32-NEXT: srl.w $a0, $a3, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_xor_i8_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
-; LA64-NEXT: amxor_db.w $a3, $a1, $a2
-; LA64-NEXT: srl.w $a0, $a3, $a0
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%1 = atomicrmw xor ptr %a, i8 %b acquire
ret i8 %1
@@ -860,30 +834,28 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
; LA32-LABEL: atomicrmw_xor_i16_acquire:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: slli.w $a2, $a0, 3
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
-; LA32-NEXT: ll.w $a3, $a2, 0
+; LA32-NEXT: ll.w $a3, $a0, 0
; LA32-NEXT: xor $a4, $a3, $a1
-; LA32-NEXT: sc.w $a4, $a2, 0
+; LA32-NEXT: sc.w $a4, $a0, 0
; LA32-NEXT: beqz $a4, .LBB25_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a3, $a0
+; LA32-NEXT: srl.w $a0, $a3, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_xor_i16_acquire:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
-; LA64-NEXT: amxor_db.w $a3, $a1, $a2
-; LA64-NEXT: srl.w $a0, $a3, $a0
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%1 = atomicrmw xor ptr %a, i16 %b acquire
ret i16 %1
@@ -934,46 +906,44 @@ define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind {
define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
; LA32-LABEL: atomicrmw_xchg_i8_monotonic:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
+; LA32-NEXT: slli.w $a2, $a0, 3
; LA32-NEXT: ori $a3, $zero, 255
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: sll.w $a3, $a3, $a2
; LA32-NEXT: andi $a1, $a1, 255
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: addi.w $a5, $a1, 0
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB28_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_xchg_i8_monotonic:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: ori $a3, $zero, 255
-; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: addi.w $a5, $a1, 0
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB28_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a2
; LA64-NEXT: ret
%1 = atomicrmw xchg ptr %a, i8 %b monotonic
ret i8 %1
@@ -982,48 +952,46 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
; LA32-LABEL: atomicrmw_xchg_i16_monotonic:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
-; LA32-NEXT: lu12i.w $a3, 15
-; LA32-NEXT: ori $a3, $a3, 4095
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: lu12i.w $a2, 15
+; LA32-NEXT: ori $a2, $a2, 4095
+; LA32-NEXT: slli.w $a3, $a0, 3
+; LA32-NEXT: sll.w $a2, $a2, $a3
; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a3
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: addi.w $a5, $a1, 0
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: and $a5, $a5, $a3
+; LA32-NEXT: and $a5, $a5, $a2
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB29_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a3
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_xchg_i16_monotonic:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
-; LA64-NEXT: lu12i.w $a3, 15
-; LA64-NEXT: ori $a3, $a3, 4095
-; LA64-NEXT: sll.w $a3, $a3, $a0
-; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: sll.w $a2, $a2, $a3
+; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: addi.w $a5, $a1, 0
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: and $a5, $a5, $a2
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB29_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a3
; LA64-NEXT: ret
%1 = atomicrmw xchg ptr %a, i16 %b monotonic
ret i16 %1
@@ -1073,46 +1041,44 @@ define i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind {
define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
; LA32-LABEL: atomicrmw_add_i8_monotonic:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
+; LA32-NEXT: slli.w $a2, $a0, 3
; LA32-NEXT: ori $a3, $zero, 255
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: sll.w $a3, $a3, $a2
; LA32-NEXT: andi $a1, $a1, 255
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: add.w $a5, $a4, $a1
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB32_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_add_i8_monotonic:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: ori $a3, $zero, 255
-; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: add.w $a5, $a4, $a1
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB32_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a2
; LA64-NEXT: ret
%1 = atomicrmw add ptr %a, i8 %b monotonic
ret i8 %1
@@ -1121,48 +1087,46 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
; LA32-LABEL: atomicrmw_add_i16_monotonic:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
-; LA32-NEXT: lu12i.w $a3, 15
-; LA32-NEXT: ori $a3, $a3, 4095
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: lu12i.w $a2, 15
+; LA32-NEXT: ori $a2, $a2, 4095
+; LA32-NEXT: slli.w $a3, $a0, 3
+; LA32-NEXT: sll.w $a2, $a2, $a3
; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a3
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: add.w $a5, $a4, $a1
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: and $a5, $a5, $a3
+; LA32-NEXT: and $a5, $a5, $a2
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB33_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a3
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_add_i16_monotonic:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
-; LA64-NEXT: lu12i.w $a3, 15
-; LA64-NEXT: ori $a3, $a3, 4095
-; LA64-NEXT: sll.w $a3, $a3, $a0
-; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: sll.w $a2, $a2, $a3
+; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: add.w $a5, $a4, $a1
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: and $a5, $a5, $a2
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB33_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a3
; LA64-NEXT: ret
%1 = atomicrmw add ptr %a, i16 %b monotonic
ret i16 %1
@@ -1212,46 +1176,44 @@ define i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind {
define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
; LA32-LABEL: atomicrmw_sub_i8_monotonic:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
+; LA32-NEXT: slli.w $a2, $a0, 3
; LA32-NEXT: ori $a3, $zero, 255
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: sll.w $a3, $a3, $a2
; LA32-NEXT: andi $a1, $a1, 255
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: sub.w $a5, $a4, $a1
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB36_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_sub_i8_monotonic:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: ori $a3, $zero, 255
-; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: sub.w $a5, $a4, $a1
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB36_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a2
; LA64-NEXT: ret
%1 = atomicrmw sub ptr %a, i8 %b monotonic
ret i8 %1
@@ -1260,48 +1222,46 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
; LA32-LABEL: atomicrmw_sub_i16_monotonic:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
-; LA32-NEXT: lu12i.w $a3, 15
-; LA32-NEXT: ori $a3, $a3, 4095
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: lu12i.w $a2, 15
+; LA32-NEXT: ori $a2, $a2, 4095
+; LA32-NEXT: slli.w $a3, $a0, 3
+; LA32-NEXT: sll.w $a2, $a2, $a3
; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a3
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: sub.w $a5, $a4, $a1
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: and $a5, $a5, $a3
+; LA32-NEXT: and $a5, $a5, $a2
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB37_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a3
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_sub_i16_monotonic:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
-; LA64-NEXT: lu12i.w $a3, 15
-; LA64-NEXT: ori $a3, $a3, 4095
-; LA64-NEXT: sll.w $a3, $a3, $a0
-; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: sll.w $a2, $a2, $a3
+; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: sub.w $a5, $a4, $a1
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: and $a5, $a5, $a2
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB37_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a3
; LA64-NEXT: ret
%1 = atomicrmw sub ptr %a, i16 %b monotonic
ret i16 %1
@@ -1353,48 +1313,46 @@ define i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind {
define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
; LA32-LABEL: atomicrmw_nand_i8_monotonic:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
+; LA32-NEXT: slli.w $a2, $a0, 3
; LA32-NEXT: ori $a3, $zero, 255
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: sll.w $a3, $a3, $a2
; LA32-NEXT: andi $a1, $a1, 255
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: and $a5, $a4, $a1
; LA32-NEXT: nor $a5, $a5, $zero
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB40_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_nand_i8_monotonic:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: ori $a3, $zero, 255
-; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: and $a5, $a4, $a1
; LA64-NEXT: nor $a5, $a5, $zero
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB40_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a2
; LA64-NEXT: ret
%1 = atomicrmw nand ptr %a, i8 %b monotonic
ret i8 %1
@@ -1403,50 +1361,48 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
; LA32-LABEL: atomicrmw_nand_i16_monotonic:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
-; LA32-NEXT: lu12i.w $a3, 15
-; LA32-NEXT: ori $a3, $a3, 4095
-; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: lu12i.w $a2, 15
+; LA32-NEXT: ori $a2, $a2, 4095
+; LA32-NEXT: slli.w $a3, $a0, 3
+; LA32-NEXT: sll.w $a2, $a2, $a3
; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: sll.w $a1, $a1, $a3
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: ll.w $a4, $a0, 0
; LA32-NEXT: and $a5, $a4, $a1
; LA32-NEXT: nor $a5, $a5, $zero
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: and $a5, $a5, $a3
+; LA32-NEXT: and $a5, $a5, $a2
; LA32-NEXT: xor $a5, $a4, $a5
-; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: sc.w $a5, $a0, 0
; LA32-NEXT: beqz $a5, .LBB41_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: srl.w $a0, $a4, $a3
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_nand_i16_monotonic:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
-; LA64-NEXT: lu12i.w $a3, 15
-; LA64-NEXT: ori $a3, $a3, 4095
-; LA64-NEXT: sll.w $a3, $a3, $a0
-; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: lu12i.w $a2, 15
+; LA64-NEXT: ori $a2, $a2, 4095
+; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: sll.w $a2, $a2, $a3
+; LA64-NEXT: addi.w $a2, $a2, 0
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: ll.w $a4, $a0, 0
; LA64-NEXT: and $a5, $a4, $a1
; LA64-NEXT: nor $a5, $a5, $zero
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: and $a5, $a5, $a2
; LA64-NEXT: xor $a5, $a4, $a5
-; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB41_1
; LA64-NEXT: # %bb.2:
-; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: srl.w $a0, $a4, $a3
; LA64-NEXT: ret
%1 = atomicrmw nand ptr %a, i16 %b monotonic
ret i16 %1
@@ -1515,8 +1471,7 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
; LA32-NEXT: andi $a1, $a1, 255
; LA32-NEXT: sll.w $a1, $a1, $a2
; LA32-NEXT: orn $a1, $a1, $a3
-; LA32-NEXT: addi.w $a3, $zero, -4
-; LA32-NEXT: and $a0, $a0, $a3
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: ll.w $a3, $a0, 0
; LA32-NEXT: and $a4, $a3, $a1
@@ -1534,8 +1489,7 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
; LA64-NEXT: andi $a1, $a1, 255
; LA64-NEXT: sll.w $a1, $a1, $a2
; LA64-NEXT: orn $a1, $a1, $a3
-; LA64-NEXT: addi.w $a3, $zero, -4
-; LA64-NEXT: and $a0, $a0, $a3
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: amand_db.w $a3, $a1, $a0
; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
@@ -1553,8 +1507,7 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
; LA32-NEXT: sll.w $a1, $a1, $a3
; LA32-NEXT: orn $a1, $a1, $a2
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a0, $a0, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: ll.w $a2, $a0, 0
; LA32-NEXT: and $a4, $a2, $a1
@@ -1573,8 +1526,7 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
; LA64-NEXT: sll.w $a1, $a1, $a3
; LA64-NEXT: orn $a1, $a1, $a2
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a0, $a0, $a2
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
; LA64-NEXT: amand_db.w $a2, $a1, $a0
; LA64-NEXT: srl.w $a0, $a2, $a3
; LA64-NEXT: ret
@@ -1626,29 +1578,27 @@ define i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind {
define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
; LA32-LABEL: atomicrmw_or_i8_monotonic:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
; LA32-NEXT: andi $a1, $a1, 255
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: slli.w $a2, $a0, 3
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT: ll.w $a3, $a2, 0
+; LA32-NEXT: ll.w $a3, $a0, 0
; LA32-NEXT: or $a4, $a3, $a1
-; LA32-NEXT: sc.w $a4, $a2, 0
+; LA32-NEXT: sc.w $a4, $a0, 0
; LA32-NEXT: beqz $a4, .LBB48_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a3, $a0
+; LA32-NEXT: srl.w $a0, $a3, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_or_i8_monotonic:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
-; LA64-NEXT: amor_db.w $a3, $a1, $a2
-; LA64-NEXT: srl.w $a0, $a3, $a0
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%1 = atomicrmw or ptr %a, i8 %b monotonic
ret i8 %1
@@ -1657,29 +1607,27 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
; LA32-LABEL: atomicrmw_or_i16_monotonic:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: slli.w $a2, $a0, 3
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT: ll.w $a3, $a2, 0
+; LA32-NEXT: ll.w $a3, $a0, 0
; LA32-NEXT: or $a4, $a3, $a1
-; LA32-NEXT: sc.w $a4, $a2, 0
+; LA32-NEXT: sc.w $a4, $a0, 0
; LA32-NEXT: beqz $a4, .LBB49_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a3, $a0
+; LA32-NEXT: srl.w $a0, $a3, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_or_i16_monotonic:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
-; LA64-NEXT: amor_db.w $a3, $a1, $a2
-; LA64-NEXT: srl.w $a0, $a3, $a0
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: amor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%1 = atomicrmw or ptr %a, i16 %b monotonic
ret i16 %1
@@ -1729,29 +1677,27 @@ define i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind {
define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
; LA32-LABEL: atomicrmw_xor_i8_monotonic:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
; LA32-NEXT: andi $a1, $a1, 255
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: slli.w $a2, $a0, 3
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT: ll.w $a3, $a2, 0
+; LA32-NEXT: ll.w $a3, $a0, 0
; LA32-NEXT: xor $a4, $a3, $a1
-; LA32-NEXT: sc.w $a4, $a2, 0
+; LA32-NEXT: sc.w $a4, $a0, 0
; LA32-NEXT: beqz $a4, .LBB52_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a3, $a0
+; LA32-NEXT: srl.w $a0, $a3, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_xor_i8_monotonic:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: andi $a1, $a1, 255
-; LA64-NEXT: sll.w $a1, $a1, $a0
-; LA64-NEXT: amxor_db.w $a3, $a1, $a2
-; LA64-NEXT: srl.w $a0, $a3, $a0
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%1 = atomicrmw xor ptr %a, i8 %b monotonic
ret i8 %1
@@ -1760,29 +1706,27 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
; LA32-LABEL: atomicrmw_xor_i16_monotonic:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a2, $zero, -4
-; LA32-NEXT: and $a2, $a0, $a2
-; LA32-NEXT: slli.w $a0, $a0, 3
; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: slli.w $a2, $a0, 3
+; LA32-NEXT: sll.w $a1, $a1, $a2
+; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
; LA32-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT: ll.w $a3, $a2, 0
+; LA32-NEXT: ll.w $a3, $a0, 0
; LA32-NEXT: xor $a4, $a3, $a1
-; LA32-NEXT: sc.w $a4, $a2, 0
+; LA32-NEXT: sc.w $a4, $a0, 0
; LA32-NEXT: beqz $a4, .LBB53_1
; LA32-NEXT: # %bb.2:
-; LA32-NEXT: srl.w $a0, $a3, $a0
+; LA32-NEXT: srl.w $a0, $a3, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_xor_i16_monotonic:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a2, $zero, -4
-; LA64-NEXT: and $a2, $a0, $a2
-; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT: sll.w $a1, $a1, $a0
-; LA64-NEXT: amxor_db.w $a3, $a1, $a2
-; LA64-NEXT: srl.w $a0, $a3, $a0
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: sll.w $a1, $a1, $a2
+; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
+; LA64-NEXT: amxor_db.w $a3, $a1, $a0
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%1 = atomicrmw xor ptr %a, i16 %b monotonic
ret i16 %1
diff --git a/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll
index 30a352215606ff..497ac065a8c3fc 100644
--- a/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll
+++ b/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll
@@ -22,8 +22,7 @@ define void @caller(i32 %n) {
; LA32-NEXT: bstrins.w $sp, $zero, 5, 0
; LA32-NEXT: move $s8, $sp
; LA32-NEXT: addi.w $a0, $a0, 15
-; LA32-NEXT: addi.w $a1, $zero, -16
-; LA32-NEXT: and $a0, $a0, $a1
+; LA32-NEXT: bstrins.w $a0, $zero, 3, 0
; LA32-NEXT: sub.w $a0, $sp, $a0
; LA32-NEXT: move $sp, $a0
; LA32-NEXT: addi.w $a1, $s8, 0
diff --git a/llvm/test/CodeGen/LoongArch/vararg.ll b/llvm/test/CodeGen/LoongArch/vararg.ll
index 90881e2aa4cbd8..a377628c3d7864 100644
--- a/llvm/test/CodeGen/LoongArch/vararg.ll
+++ b/llvm/test/CodeGen/LoongArch/vararg.ll
@@ -126,10 +126,9 @@ define i64 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
; LA64-FPELIM-NEXT: st.d $a2, $fp, 16
; LA64-FPELIM-NEXT: addi.d $a0, $fp, 16
; LA64-FPELIM-NEXT: st.d $a0, $fp, -32
+; LA64-FPELIM-NEXT: st.d $a1, $fp, 8
; LA64-FPELIM-NEXT: addi.d $a0, $a1, 15
-; LA64-FPELIM-NEXT: addi.w $a1, $zero, -16
-; LA64-FPELIM-NEXT: and $a0, $a0, $a1
-; LA64-FPELIM-NEXT: st.d $s0, $fp, 8
+; LA64-FPELIM-NEXT: bstrins.d $a0, $zero, 3, 0
; LA64-FPELIM-NEXT: sub.d $a0, $sp, $a0
; LA64-FPELIM-NEXT: move $sp, $a0
; LA64-FPELIM-NEXT: bl %plt(notdead)
@@ -157,10 +156,9 @@ define i64 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
; LA64-WITHFP-NEXT: st.d $a2, $fp, 16
; LA64-WITHFP-NEXT: addi.d $a0, $fp, 16
; LA64-WITHFP-NEXT: st.d $a0, $fp, -32
+; LA64-WITHFP-NEXT: st.d $a1, $fp, 8
; LA64-WITHFP-NEXT: addi.d $a0, $a1, 15
-; LA64-WITHFP-NEXT: addi.w $a1, $zero, -16
-; LA64-WITHFP-NEXT: and $a0, $a0, $a1
-; LA64-WITHFP-NEXT: st.d $s0, $fp, 8
+; LA64-WITHFP-NEXT: bstrins.d $a0, $zero, 3, 0
; LA64-WITHFP-NEXT: sub.d $a0, $sp, $a0
; LA64-WITHFP-NEXT: move $sp, $a0
; LA64-WITHFP-NEXT: bl %plt(notdead)
More information about the llvm-commits
mailing list