[llvm] [LoongArch] Switch to the Machine Scheduler (PR #83759)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 3 19:49:34 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: wanglei (wangleiat)
<details>
<summary>Changes</summary>
The SelectionDAG scheduling preference now becomes source order scheduling (machine scheduler generates better code -- even without there being a machine model defined for LoongArch yet).
Most of the test changes are trivial instruction reorderings and differing register allocations, without any obvious performance impact.
This is similar to commit: 3d0fbafd0bce43bb9106230a45d1130f7a40e5ec
---
Patch is 785.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83759.diff
95 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchSubtarget.h (+1)
- (modified) llvm/test/CodeGen/LoongArch/alloca.ll (+4-4)
- (modified) llvm/test/CodeGen/LoongArch/alsl.ll (+8-8)
- (modified) llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll (+108-108)
- (modified) llvm/test/CodeGen/LoongArch/bitreverse.ll (+23-23)
- (modified) llvm/test/CodeGen/LoongArch/branch-relaxation.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/bswap.ll (+20-20)
- (modified) llvm/test/CodeGen/LoongArch/bytepick.ll (+6-9)
- (modified) llvm/test/CodeGen/LoongArch/calling-conv-common.ll (+68-68)
- (modified) llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll (+2-2)
- (modified) llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir (+2-2)
- (modified) llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll (+49-49)
- (modified) llvm/test/CodeGen/LoongArch/fcopysign.ll (+4-4)
- (modified) llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/ghc-cc.ll (+48-48)
- (modified) llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll (+8-8)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/and.ll (+20-20)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll (+2-2)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll (+105-105)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll (+398-338)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll (+260-260)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll (+735-735)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll (+16-16)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll (+23-23)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll (+754-370)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll (+2-2)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll (+52-51)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll (+2-2)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll (+2-2)
- (modified) llvm/test/CodeGen/LoongArch/lasx/build-vector.ll (+44-44)
- (modified) llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll (+224-224)
- (modified) llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll (+224-224)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll (+84-84)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll (+72-72)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll (+8-8)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/mulh.ll (+24-24)
- (modified) llvm/test/CodeGen/LoongArch/lasx/vselect.ll (+11-11)
- (modified) llvm/test/CodeGen/LoongArch/lsx/build-vector.ll (+20-20)
- (modified) llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll (+224-224)
- (modified) llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll (+224-224)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll (+84-84)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll (+72-72)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll (+8-8)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/mulh.ll (+24-24)
- (modified) llvm/test/CodeGen/LoongArch/lsx/vselect.ll (+11-11)
- (modified) llvm/test/CodeGen/LoongArch/preferred-alignments.ll (+8-8)
- (modified) llvm/test/CodeGen/LoongArch/rotl-rotr.ll (+192-196)
- (modified) llvm/test/CodeGen/LoongArch/select-to-shiftand.ll (+2-2)
- (modified) llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll (+18-18)
- (modified) llvm/test/CodeGen/LoongArch/shrinkwrap.ll (+1-3)
- (modified) llvm/test/CodeGen/LoongArch/smul-with-overflow.ll (+359-347)
- (modified) llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll (+16-16)
- (modified) llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll (+24-38)
- (modified) llvm/test/CodeGen/LoongArch/tail-calls.ll (+10-8)
- (modified) llvm/test/CodeGen/LoongArch/unaligned-access.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/vararg.ll (+10-10)
- (modified) llvm/test/CodeGen/LoongArch/vector-fp-imm.ll (+726-599)
- (modified) llvm/test/CodeGen/LoongArch/zext-with-load-is-free.ll (+4-4)
- (modified) llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected (+16-17)
- (modified) llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected (+16-17)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
index 11c0b39e176e61..cecb4a50aa7633 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -113,6 +113,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
unsigned getMaxBytesForAlignment() const { return MaxBytesForAlignment; }
+ bool enableMachineScheduler() const override { return true; }
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/LoongArch/alloca.ll b/llvm/test/CodeGen/LoongArch/alloca.ll
index d766be6aac9509..75a05689e4178d 100644
--- a/llvm/test/CodeGen/LoongArch/alloca.ll
+++ b/llvm/test/CodeGen/LoongArch/alloca.ll
@@ -126,8 +126,7 @@ define void @alloca_callframe(i32 %n) nounwind {
; LA32-NEXT: st.w $a1, $sp, 8
; LA32-NEXT: ori $a1, $zero, 10
; LA32-NEXT: st.w $a1, $sp, 4
-; LA32-NEXT: ori $a1, $zero, 9
-; LA32-NEXT: st.w $a1, $sp, 0
+; LA32-NEXT: ori $t0, $zero, 9
; LA32-NEXT: ori $a1, $zero, 2
; LA32-NEXT: ori $a2, $zero, 3
; LA32-NEXT: ori $a3, $zero, 4
@@ -135,6 +134,7 @@ define void @alloca_callframe(i32 %n) nounwind {
; LA32-NEXT: ori $a5, $zero, 6
; LA32-NEXT: ori $a6, $zero, 7
; LA32-NEXT: ori $a7, $zero, 8
+; LA32-NEXT: st.w $t0, $sp, 0
; LA32-NEXT: bl %plt(func)
; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: addi.w $sp, $fp, -16
@@ -162,8 +162,7 @@ define void @alloca_callframe(i32 %n) nounwind {
; LA64-NEXT: st.d $a1, $sp, 16
; LA64-NEXT: ori $a1, $zero, 10
; LA64-NEXT: st.d $a1, $sp, 8
-; LA64-NEXT: ori $a1, $zero, 9
-; LA64-NEXT: st.d $a1, $sp, 0
+; LA64-NEXT: ori $t0, $zero, 9
; LA64-NEXT: ori $a1, $zero, 2
; LA64-NEXT: ori $a2, $zero, 3
; LA64-NEXT: ori $a3, $zero, 4
@@ -171,6 +170,7 @@ define void @alloca_callframe(i32 %n) nounwind {
; LA64-NEXT: ori $a5, $zero, 6
; LA64-NEXT: ori $a6, $zero, 7
; LA64-NEXT: ori $a7, $zero, 8
+; LA64-NEXT: st.d $t0, $sp, 0
; LA64-NEXT: bl %plt(func)
; LA64-NEXT: addi.d $sp, $sp, 32
; LA64-NEXT: addi.d $sp, $fp, -16
diff --git a/llvm/test/CodeGen/LoongArch/alsl.ll b/llvm/test/CodeGen/LoongArch/alsl.ll
index 650f504dcaf83a..177e37de0952d7 100644
--- a/llvm/test/CodeGen/LoongArch/alsl.ll
+++ b/llvm/test/CodeGen/LoongArch/alsl.ll
@@ -53,12 +53,12 @@ entry:
define i64 @alsl_i64(i64 signext %a, i64 signext %b) nounwind {
; LA32-LABEL: alsl_i64:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: slli.w $a1, $a1, 4
; LA32-NEXT: srli.w $a4, $a0, 28
+; LA32-NEXT: slli.w $a1, $a1, 4
; LA32-NEXT: or $a1, $a1, $a4
-; LA32-NEXT: add.w $a1, $a3, $a1
; LA32-NEXT: alsl.w $a0, $a0, $a2, 4
; LA32-NEXT: sltu $a2, $a0, $a2
+; LA32-NEXT: add.w $a1, $a3, $a1
; LA32-NEXT: add.w $a1, $a1, $a2
; LA32-NEXT: ret
;
@@ -189,14 +189,14 @@ entry:
define i64 @mul_add_i64(i64 signext %a, i64 signext %b) nounwind {
; LA32-LABEL: mul_add_i64:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: slli.w $a4, $a1, 4
-; LA32-NEXT: sub.w $a1, $a4, $a1
; LA32-NEXT: ori $a4, $zero, 15
; LA32-NEXT: mulh.wu $a4, $a0, $a4
+; LA32-NEXT: slli.w $a5, $a1, 4
+; LA32-NEXT: sub.w $a1, $a5, $a1
; LA32-NEXT: add.w $a1, $a4, $a1
+; LA32-NEXT: slli.w $a4, $a0, 4
+; LA32-NEXT: sub.w $a0, $a4, $a0
; LA32-NEXT: add.w $a1, $a3, $a1
-; LA32-NEXT: slli.w $a3, $a0, 4
-; LA32-NEXT: sub.w $a0, $a3, $a0
; LA32-NEXT: add.w $a0, $a2, $a0
; LA32-NEXT: sltu $a2, $a0, $a2
; LA32-NEXT: add.w $a1, $a1, $a2
@@ -342,9 +342,9 @@ define i64 @mul_add_neg_i64(i64 signext %a, i64 signext %b) nounwind {
; LA32-NEXT: mulh.wu $a4, $a0, $a4
; LA32-NEXT: sub.w $a4, $a4, $a0
; LA32-NEXT: add.w $a1, $a4, $a1
+; LA32-NEXT: slli.w $a4, $a0, 4
+; LA32-NEXT: sub.w $a0, $a0, $a4
; LA32-NEXT: add.w $a1, $a3, $a1
-; LA32-NEXT: slli.w $a3, $a0, 4
-; LA32-NEXT: sub.w $a0, $a0, $a3
; LA32-NEXT: add.w $a0, $a2, $a0
; LA32-NEXT: sltu $a2, $a0, $a2
; LA32-NEXT: add.w $a1, $a1, $a2
diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
index b84c1093eb75f2..bf48c0df3e4961 100644
--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
@@ -4,34 +4,34 @@
define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; LA64-LABEL: atomicrmw_uinc_wrap_i8:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT: ld.w $a2, $a0, 0
-; LA64-NEXT: ori $a4, $zero, 255
-; LA64-NEXT: sll.w $a4, $a4, $a3
-; LA64-NEXT: andi $a3, $a3, 24
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a4, $a3, $a2
+; LA64-NEXT: ld.w $a3, $a0, 0
+; LA64-NEXT: andi $a2, $a2, 24
; LA64-NEXT: nor $a4, $a4, $zero
; LA64-NEXT: andi $a1, $a1, 255
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB0_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB0_3 Depth 2
-; LA64-NEXT: srl.w $a5, $a2, $a3
-; LA64-NEXT: andi $a6, $a5, 255
-; LA64-NEXT: sltu $a6, $a6, $a1
+; LA64-NEXT: srl.w $a5, $a3, $a2
+; LA64-NEXT: addi.w $a6, $a3, 0
+; LA64-NEXT: andi $a7, $a5, 255
; LA64-NEXT: addi.d $a5, $a5, 1
-; LA64-NEXT: xori $a6, $a6, 1
-; LA64-NEXT: masknez $a5, $a5, $a6
+; LA64-NEXT: sltu $a7, $a7, $a1
+; LA64-NEXT: xori $a7, $a7, 1
+; LA64-NEXT: masknez $a5, $a5, $a7
; LA64-NEXT: andi $a5, $a5, 255
-; LA64-NEXT: sll.w $a5, $a5, $a3
-; LA64-NEXT: and $a6, $a2, $a4
-; LA64-NEXT: or $a5, $a6, $a5
-; LA64-NEXT: addi.w $a6, $a2, 0
+; LA64-NEXT: sll.w $a5, $a5, $a2
+; LA64-NEXT: and $a3, $a3, $a4
+; LA64-NEXT: or $a5, $a3, $a5
; LA64-NEXT: .LBB0_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB0_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a2, $a0, 0
-; LA64-NEXT: bne $a2, $a6, .LBB0_5
+; LA64-NEXT: ll.w $a3, $a0, 0
+; LA64-NEXT: bne $a3, $a6, .LBB0_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2
; LA64-NEXT: move $a7, $a5
@@ -43,9 +43,9 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB0_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1
-; LA64-NEXT: bne $a2, $a6, .LBB0_1
+; LA64-NEXT: bne $a3, $a6, .LBB0_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a2, $a3
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst
ret i8 %result
@@ -54,35 +54,35 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; LA64-LABEL: atomicrmw_uinc_wrap_i16:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT: ld.w $a2, $a0, 0
-; LA64-NEXT: lu12i.w $a4, 15
-; LA64-NEXT: ori $a4, $a4, 4095
-; LA64-NEXT: sll.w $a4, $a4, $a3
-; LA64-NEXT: andi $a3, $a3, 24
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a4, $a3, $a2
+; LA64-NEXT: ld.w $a3, $a0, 0
+; LA64-NEXT: andi $a2, $a2, 24
; LA64-NEXT: nor $a4, $a4, $zero
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB1_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB1_3 Depth 2
-; LA64-NEXT: srl.w $a5, $a2, $a3
-; LA64-NEXT: bstrpick.d $a6, $a5, 15, 0
-; LA64-NEXT: sltu $a6, $a6, $a1
+; LA64-NEXT: srl.w $a5, $a3, $a2
+; LA64-NEXT: addi.w $a6, $a3, 0
+; LA64-NEXT: bstrpick.d $a7, $a5, 15, 0
; LA64-NEXT: addi.d $a5, $a5, 1
-; LA64-NEXT: xori $a6, $a6, 1
-; LA64-NEXT: masknez $a5, $a5, $a6
+; LA64-NEXT: sltu $a7, $a7, $a1
+; LA64-NEXT: xori $a7, $a7, 1
+; LA64-NEXT: masknez $a5, $a5, $a7
; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
-; LA64-NEXT: sll.w $a5, $a5, $a3
-; LA64-NEXT: and $a6, $a2, $a4
-; LA64-NEXT: or $a5, $a6, $a5
-; LA64-NEXT: addi.w $a6, $a2, 0
+; LA64-NEXT: sll.w $a5, $a5, $a2
+; LA64-NEXT: and $a3, $a3, $a4
+; LA64-NEXT: or $a5, $a3, $a5
; LA64-NEXT: .LBB1_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB1_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a2, $a0, 0
-; LA64-NEXT: bne $a2, $a6, .LBB1_5
+; LA64-NEXT: ll.w $a3, $a0, 0
+; LA64-NEXT: bne $a3, $a6, .LBB1_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2
; LA64-NEXT: move $a7, $a5
@@ -94,9 +94,9 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB1_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1
-; LA64-NEXT: bne $a2, $a6, .LBB1_1
+; LA64-NEXT: bne $a3, $a6, .LBB1_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a2, $a3
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst
ret i16 %result
@@ -111,19 +111,19 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; LA64-NEXT: .LBB2_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB2_3 Depth 2
-; LA64-NEXT: addi.w $a3, $a2, 0
-; LA64-NEXT: sltu $a4, $a3, $a1
-; LA64-NEXT: xori $a4, $a4, 1
-; LA64-NEXT: addi.d $a2, $a2, 1
-; LA64-NEXT: masknez $a4, $a2, $a4
+; LA64-NEXT: addi.d $a3, $a2, 1
+; LA64-NEXT: addi.w $a4, $a2, 0
+; LA64-NEXT: sltu $a2, $a4, $a1
+; LA64-NEXT: xori $a2, $a2, 1
+; LA64-NEXT: masknez $a3, $a3, $a2
; LA64-NEXT: .LBB2_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB2_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
; LA64-NEXT: ll.w $a2, $a0, 0
-; LA64-NEXT: bne $a2, $a3, .LBB2_5
+; LA64-NEXT: bne $a2, $a4, .LBB2_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2
-; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: move $a5, $a3
; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB2_3
; LA64-NEXT: b .LBB2_6
@@ -132,7 +132,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB2_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1
-; LA64-NEXT: bne $a2, $a3, .LBB2_1
+; LA64-NEXT: bne $a2, $a4, .LBB2_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
; LA64-NEXT: move $a0, $a2
; LA64-NEXT: ret
@@ -149,10 +149,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB3_3 Depth 2
; LA64-NEXT: move $a3, $a2
-; LA64-NEXT: sltu $a2, $a2, $a1
-; LA64-NEXT: xori $a2, $a2, 1
-; LA64-NEXT: addi.d $a4, $a3, 1
-; LA64-NEXT: masknez $a4, $a4, $a2
+; LA64-NEXT: addi.d $a2, $a2, 1
+; LA64-NEXT: sltu $a4, $a3, $a1
+; LA64-NEXT: xori $a4, $a4, 1
+; LA64-NEXT: masknez $a4, $a2, $a4
; LA64-NEXT: .LBB3_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB3_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
@@ -180,39 +180,39 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; LA64-LABEL: atomicrmw_udec_wrap_i8:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: slli.d $a4, $a0, 3
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT: ld.w $a2, $a0, 0
-; LA64-NEXT: ori $a4, $zero, 255
-; LA64-NEXT: sll.w $a4, $a4, $a3
-; LA64-NEXT: andi $a3, $a3, 24
+; LA64-NEXT: andi $a2, $a4, 24
+; LA64-NEXT: ori $a5, $zero, 255
+; LA64-NEXT: ld.w $a3, $a0, 0
+; LA64-NEXT: sll.w $a4, $a5, $a4
; LA64-NEXT: nor $a4, $a4, $zero
; LA64-NEXT: andi $a5, $a1, 255
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB4_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB4_3 Depth 2
-; LA64-NEXT: srl.w $a6, $a2, $a3
-; LA64-NEXT: andi $a7, $a6, 255
-; LA64-NEXT: sltu $t0, $a5, $a7
+; LA64-NEXT: srl.w $a6, $a3, $a2
+; LA64-NEXT: addi.w $a7, $a3, 0
+; LA64-NEXT: andi $t0, $a6, 255
; LA64-NEXT: addi.d $a6, $a6, -1
+; LA64-NEXT: sltui $t1, $t0, 1
+; LA64-NEXT: sltu $t0, $a5, $t0
; LA64-NEXT: masknez $a6, $a6, $t0
; LA64-NEXT: maskeqz $t0, $a1, $t0
; LA64-NEXT: or $a6, $t0, $a6
-; LA64-NEXT: sltui $a7, $a7, 1
-; LA64-NEXT: masknez $a6, $a6, $a7
-; LA64-NEXT: maskeqz $a7, $a1, $a7
-; LA64-NEXT: or $a6, $a7, $a6
+; LA64-NEXT: masknez $a6, $a6, $t1
+; LA64-NEXT: maskeqz $t0, $a1, $t1
+; LA64-NEXT: or $a6, $t0, $a6
; LA64-NEXT: andi $a6, $a6, 255
-; LA64-NEXT: sll.w $a6, $a6, $a3
-; LA64-NEXT: and $a7, $a2, $a4
-; LA64-NEXT: or $a6, $a7, $a6
-; LA64-NEXT: addi.w $a7, $a2, 0
+; LA64-NEXT: sll.w $a6, $a6, $a2
+; LA64-NEXT: and $a3, $a3, $a4
+; LA64-NEXT: or $a6, $a3, $a6
; LA64-NEXT: .LBB4_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB4_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a2, $a0, 0
-; LA64-NEXT: bne $a2, $a7, .LBB4_5
+; LA64-NEXT: ll.w $a3, $a0, 0
+; LA64-NEXT: bne $a3, $a7, .LBB4_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2
; LA64-NEXT: move $t0, $a6
@@ -224,9 +224,9 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB4_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
-; LA64-NEXT: bne $a2, $a7, .LBB4_1
+; LA64-NEXT: bne $a3, $a7, .LBB4_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a2, $a3
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst
ret i8 %result
@@ -235,40 +235,40 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; LA64-LABEL: atomicrmw_udec_wrap_i16:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: slli.d $a4, $a0, 3
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT: ld.w $a2, $a0, 0
-; LA64-NEXT: lu12i.w $a4, 15
-; LA64-NEXT: ori $a4, $a4, 4095
-; LA64-NEXT: sll.w $a4, $a4, $a3
-; LA64-NEXT: andi $a3, $a3, 24
+; LA64-NEXT: andi $a2, $a4, 24
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a5, $a3, 4095
+; LA64-NEXT: ld.w $a3, $a0, 0
+; LA64-NEXT: sll.w $a4, $a5, $a4
; LA64-NEXT: nor $a4, $a4, $zero
; LA64-NEXT: bstrpick.d $a5, $a1, 15, 0
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB5_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB5_3 Depth 2
-; LA64-NEXT: srl.w $a6, $a2, $a3
-; LA64-NEXT: bstrpick.d $a7, $a6, 15, 0
-; LA64-NEXT: sltu $t0, $a5, $a7
+; LA64-NEXT: srl.w $a6, $a3, $a2
+; LA64-NEXT: addi.w $a7, $a3, 0
+; LA64-NEXT: bstrpick.d $t0, $a6, 15, 0
; LA64-NEXT: addi.d $a6, $a6, -1
+; LA64-NEXT: sltui $t1, $t0, 1
+; LA64-NEXT: sltu $t0, $a5, $t0
; LA64-NEXT: masknez $a6, $a6, $t0
; LA64-NEXT: maskeqz $t0, $a1, $t0
; LA64-NEXT: or $a6, $t0, $a6
-; LA64-NEXT: sltui $a7, $a7, 1
-; LA64-NEXT: masknez $a6, $a6, $a7
-; LA64-NEXT: maskeqz $a7, $a1, $a7
-; LA64-NEXT: or $a6, $a7, $a6
+; LA64-NEXT: masknez $a6, $a6, $t1
+; LA64-NEXT: maskeqz $t0, $a1, $t1
+; LA64-NEXT: or $a6, $t0, $a6
; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0
-; LA64-NEXT: sll.w $a6, $a6, $a3
-; LA64-NEXT: and $a7, $a2, $a4
-; LA64-NEXT: or $a6, $a7, $a6
-; LA64-NEXT: addi.w $a7, $a2, 0
+; LA64-NEXT: sll.w $a6, $a6, $a2
+; LA64-NEXT: and $a3, $a3, $a4
+; LA64-NEXT: or $a6, $a3, $a6
; LA64-NEXT: .LBB5_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB5_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a2, $a0, 0
-; LA64-NEXT: bne $a2, $a7, .LBB5_5
+; LA64-NEXT: ll.w $a3, $a0, 0
+; LA64-NEXT: bne $a3, $a7, .LBB5_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2
; LA64-NEXT: move $t0, $a6
@@ -280,9 +280,9 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB5_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
-; LA64-NEXT: bne $a2, $a7, .LBB5_1
+; LA64-NEXT: bne $a3, $a7, .LBB5_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a2, $a3
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst
ret i16 %result
@@ -297,24 +297,24 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; LA64-NEXT: .LBB6_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB6_3 Depth 2
-; LA64-NEXT: addi.w $a4, $a2, 0
-; LA64-NEXT: sltu $a5, $a3, $a4
-; LA64-NEXT: addi.d $a2, $a2, -1
-; LA64-NEXT: masknez $a2, $a2, $a5
-; LA64-NEXT: maskeqz $a5, $a1, $a5
-; LA64-NEXT: or $a2, $a5, $a2
-; LA64-NEXT: sltui $a5, $a4, 1
-; LA64-NEXT: masknez $a2, $a2, $a5
-; LA64-NEXT: maskeqz $a5, $a1, $a5
-; LA64-NEXT: or $a5, $a5, $a2
+; LA64-NEXT: addi.d $a4, $a2, -1
+; LA64-NEXT: addi.w $a5, $a2, 0
+; LA64-NEXT: sltui $a2, $a5, 1
+; LA64-NEXT: sltu $a6, $a3, $a5
+; LA64-NEXT: masknez $a4, $a4, $a6
+; LA64-NEXT: maskeqz $a6, $a1, $a6
+; LA64-NEXT: or $a4, $a6, $a4
+; LA64-NEXT: masknez $a4, $a4, $a2
+; LA64-NEXT: maskeqz $a2, $a1, $a2
+; LA64-NEXT: or $a4, $a2, $a4
; LA64-NEXT: .LBB6_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB6_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
; LA64-NEXT: ll.w $a2, $a0, 0
-; LA64-NEXT: bne $a2, $a4, .LBB6_5
+; LA64-NEXT: bne $a2, $a5, .LBB6_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2
-; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: move $a6, $a4
; LA64-NEXT: sc.w $a6, $a0, 0
; LA64-NEXT: beqz $a6, .LBB6_3
; LA64-NEXT: b .LBB6_6
@@ -323,7 +323,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB6_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
-; LA64-NEXT: bne $a2, $a4, .LBB6_1
+; LA64-NEXT: bne $a2, $a5, .LBB6_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
; LA64-NEXT: move $a0, $a2
; LA64-NEXT: ret
@@ -340,12 +340,12 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB7_3 Depth 2
; LA64-NEXT: move $a3, $a2
-; LA64-NEXT: sltu $a2, $a1, $a2
-; LA64-NEXT: addi.d $a4, $a3, -1
-; LA64-NEXT: masknez $a4, $a4, $a2
-; LA64-NEXT: maskeqz $a2, $a1, $a2
-; LA64-NEXT: or $a2, $a2, $a4
+; LA64-NEXT: addi.d $a2, $a2, -1
; LA64-NEXT: sltui $a4, $a3, 1
+; LA64-NEXT: sltu $a5, $a1, $a3
+; LA64-NEXT: masknez $a2, $a2, $a5
+; LA64-NEXT: maskeqz $a5, $a1, $a5
+; LA64-NEXT: or $a2, $a5, $a2
; LA64-NEXT: masknez $a2, $a2, $a4
; LA64-NEXT: maskeqz $a4, $a1, $a4
; LA64-NEXT: or $a4, $a4, $a2
diff --git a/llvm/test/CodeGen/LoongArch/bitreverse.ll b/llvm/test/CodeGen/LoongArch/bitreverse.ll
index 259d8565c68420..fcf523aa3c883a 100644
--- a/llvm/test/CodeGen/LoongArch/bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/bitreverse.ll
@@ -129,30 +129,30 @@ define i48 @test_bitreverse_i48(i48 %a) nounwind {
define i77 @test_bitreverse_i77(i77 %a) nounwind {
; LA32-LABEL: test_bitreverse_i77:
; LA32: # %bb.0:
-; LA32-NEXT: ld.w $a2, $a1, 0
+; LA32-NEXT: ld.w $a2, $a1, 4
+; LA32-NEXT: ld.w $a3, $a1, 8
+; LA32-NEXT: ld.w $a1, $a1, 0
; LA32-NEXT: bitrev.w $a2, $a2
-; LA32-NEXT: ld.w $a3, $a1, 4
+; LA32-NEXT: slli.w $a4, $a2, 13
; LA32-NEXT: bitrev.w $a3, $a3
-; LA32-NEXT: srli.w $a4, $a3, 19
-; LA32-NEXT: slli.w $a5, $a2, 13
-; LA32-NEXT: or $a4, $a5, $a4
+; LA32-NEXT: srli.w $a3, $a3, 19
+; LA32-NEXT: or $a3, $a3, $a4
; L...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/83759
More information about the llvm-commits
mailing list