[llvm] [LoongArch] Set scheduler to register pressure (PR #95741)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 16 23:03:24 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: hev (heiher)
<details>
<summary>Changes</summary>
Performance measurement results indicate that the register pressure scheduling preference yields the best performance on both the 3A5000 and 3A6000 micro-architectures.
---
Patch is 632.65 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95741.diff
80 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+3)
- (modified) llvm/lib/Target/LoongArch/LoongArchSubtarget.h (+1)
- (modified) llvm/test/CodeGen/LoongArch/alsl.ll (+11-10)
- (modified) llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll (+56-56)
- (modified) llvm/test/CodeGen/LoongArch/bitreverse.ll (+22-22)
- (modified) llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/bswap.ll (+20-20)
- (modified) llvm/test/CodeGen/LoongArch/bytepick.ll (+9-6)
- (modified) llvm/test/CodeGen/LoongArch/calling-conv-common.ll (+63-63)
- (modified) llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll (+8-8)
- (modified) llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll (+30-30)
- (modified) llvm/test/CodeGen/LoongArch/fcopysign.ll (+4-4)
- (modified) llvm/test/CodeGen/LoongArch/gep-imm.ll (+2-2)
- (modified) llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll (+11-11)
- (modified) llvm/test/CodeGen/LoongArch/ghc-cc.ll (+48-48)
- (modified) llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll (+11-11)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/and.ll (+21-21)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll (+42-42)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll (+210-250)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll (+140-140)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll (+180-180)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll (+3-3)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll (+3-3)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll (+111-129)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll (+36-36)
- (modified) llvm/test/CodeGen/LoongArch/lasx/build-vector.ll (+31-31)
- (modified) llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll (+184-184)
- (modified) llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll (+184-184)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll (+84-84)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll (+72-72)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/mulh.ll (+24-24)
- (modified) llvm/test/CodeGen/LoongArch/lasx/vselect.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lsx/build-vector.ll (+20-20)
- (modified) llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll (+184-184)
- (modified) llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll (+184-184)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll (+84-84)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll (+72-72)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/lsx/mulh.ll (+24-24)
- (modified) llvm/test/CodeGen/LoongArch/lsx/vselect.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/rotl-rotr.ll (+185-179)
- (modified) llvm/test/CodeGen/LoongArch/select-to-shiftand.ll (+3-3)
- (modified) llvm/test/CodeGen/LoongArch/sextw-removal.ll (+113-109)
- (modified) llvm/test/CodeGen/LoongArch/smul-with-overflow.ll (+300-304)
- (modified) llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll (+6-6)
- (modified) llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll (+38-24)
- (modified) llvm/test/CodeGen/LoongArch/vector-fp-imm.ll (+595-717)
- (modified) llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected (+17-16)
- (modified) llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected (+17-16)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 1721287dab4dd..c0ef6ffa756af 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -367,6 +367,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
+
+ // Set scheduler.
+ setSchedulingPreference(Sched::RegPressure);
}
bool LoongArchTargetLowering::isOffsetFoldingLegal(
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
index a8752c8070aa6..86a8e0ed2d18b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -105,6 +105,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
unsigned getMaxBytesForAlignment() const { return MaxBytesForAlignment; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
bool enableMachineScheduler() const override { return true; }
+ bool enableMachineSchedDefaultSched() const override { return false; }
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/LoongArch/alsl.ll b/llvm/test/CodeGen/LoongArch/alsl.ll
index 34baccc60d547..92288059c7744 100644
--- a/llvm/test/CodeGen/LoongArch/alsl.ll
+++ b/llvm/test/CodeGen/LoongArch/alsl.ll
@@ -53,13 +53,14 @@ entry:
define i64 @alsl_i64(i64 signext %a, i64 signext %b) nounwind {
; LA32-LABEL: alsl_i64:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: srli.w $a4, $a0, 28
+; LA32-NEXT: alsl.w $a4, $a0, $a2, 4
+; LA32-NEXT: sltu $a2, $a4, $a2
+; LA32-NEXT: srli.w $a0, $a0, 28
; LA32-NEXT: slli.w $a1, $a1, 4
-; LA32-NEXT: or $a1, $a1, $a4
-; LA32-NEXT: alsl.w $a0, $a0, $a2, 4
-; LA32-NEXT: sltu $a2, $a0, $a2
-; LA32-NEXT: add.w $a1, $a3, $a1
-; LA32-NEXT: add.w $a1, $a1, $a2
+; LA32-NEXT: or $a0, $a1, $a0
+; LA32-NEXT: add.w $a0, $a3, $a0
+; LA32-NEXT: add.w $a1, $a0, $a2
+; LA32-NEXT: move $a0, $a4
; LA32-NEXT: ret
;
; LA64-LABEL: alsl_i64:
@@ -194,9 +195,9 @@ define i64 @mul_add_i64(i64 signext %a, i64 signext %b) nounwind {
; LA32-NEXT: slli.w $a5, $a1, 4
; LA32-NEXT: sub.w $a1, $a5, $a1
; LA32-NEXT: add.w $a1, $a4, $a1
-; LA32-NEXT: slli.w $a4, $a0, 4
-; LA32-NEXT: sub.w $a0, $a4, $a0
; LA32-NEXT: add.w $a1, $a3, $a1
+; LA32-NEXT: slli.w $a3, $a0, 4
+; LA32-NEXT: sub.w $a0, $a3, $a0
; LA32-NEXT: add.w $a0, $a2, $a0
; LA32-NEXT: sltu $a2, $a0, $a2
; LA32-NEXT: add.w $a1, $a1, $a2
@@ -342,9 +343,9 @@ define i64 @mul_add_neg_i64(i64 signext %a, i64 signext %b) nounwind {
; LA32-NEXT: mulh.wu $a4, $a0, $a4
; LA32-NEXT: sub.w $a4, $a4, $a0
; LA32-NEXT: add.w $a1, $a4, $a1
-; LA32-NEXT: slli.w $a4, $a0, 4
-; LA32-NEXT: sub.w $a0, $a0, $a4
; LA32-NEXT: add.w $a1, $a3, $a1
+; LA32-NEXT: slli.w $a3, $a0, 4
+; LA32-NEXT: sub.w $a0, $a0, $a3
; LA32-NEXT: add.w $a0, $a2, $a0
; LA32-NEXT: sltu $a2, $a0, $a2
; LA32-NEXT: add.w $a1, $a1, $a2
diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
index 854518ed1fc97..6c73eb8c71e51 100644
--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
@@ -4,13 +4,13 @@
define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; LA64-LABEL: atomicrmw_uinc_wrap_i8:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT: andi $a2, $a3, 24
-; LA64-NEXT: ori $a5, $zero, 255
; LA64-NEXT: ld.w $a4, $a0, 0
-; LA64-NEXT: sll.w $a3, $a5, $a3
; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: andi $a2, $a2, 24
; LA64-NEXT: andi $a1, $a1, 255
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB0_1: # %atomicrmw.start
@@ -18,11 +18,11 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: # Child Loop BB0_3 Depth 2
; LA64-NEXT: move $a5, $a4
; LA64-NEXT: srl.w $a4, $a4, $a2
-; LA64-NEXT: andi $a6, $a4, 255
-; LA64-NEXT: addi.d $a4, $a4, 1
-; LA64-NEXT: sltu $a6, $a6, $a1
-; LA64-NEXT: xori $a6, $a6, 1
-; LA64-NEXT: masknez $a4, $a4, $a6
+; LA64-NEXT: addi.d $a6, $a4, 1
+; LA64-NEXT: andi $a4, $a4, 255
+; LA64-NEXT: sltu $a4, $a4, $a1
+; LA64-NEXT: xori $a4, $a4, 1
+; LA64-NEXT: masknez $a4, $a6, $a4
; LA64-NEXT: andi $a4, $a4, 255
; LA64-NEXT: sll.w $a4, $a4, $a2
; LA64-NEXT: and $a6, $a5, $a3
@@ -54,14 +54,14 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; LA64-LABEL: atomicrmw_uinc_wrap_i16:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT: andi $a2, $a3, 24
-; LA64-NEXT: lu12i.w $a4, 15
-; LA64-NEXT: ori $a5, $a4, 4095
; LA64-NEXT: ld.w $a4, $a0, 0
-; LA64-NEXT: sll.w $a3, $a5, $a3
; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: andi $a2, $a2, 24
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB1_1: # %atomicrmw.start
@@ -69,11 +69,11 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: # Child Loop BB1_3 Depth 2
; LA64-NEXT: move $a5, $a4
; LA64-NEXT: srl.w $a4, $a4, $a2
-; LA64-NEXT: bstrpick.d $a6, $a4, 15, 0
-; LA64-NEXT: addi.d $a4, $a4, 1
-; LA64-NEXT: sltu $a6, $a6, $a1
-; LA64-NEXT: xori $a6, $a6, 1
-; LA64-NEXT: masknez $a4, $a4, $a6
+; LA64-NEXT: addi.d $a6, $a4, 1
+; LA64-NEXT: bstrpick.d $a4, $a4, 15, 0
+; LA64-NEXT: sltu $a4, $a4, $a1
+; LA64-NEXT: xori $a4, $a4, 1
+; LA64-NEXT: masknez $a4, $a6, $a4
; LA64-NEXT: bstrpick.d $a4, $a4, 15, 0
; LA64-NEXT: sll.w $a4, $a4, $a2
; LA64-NEXT: and $a6, $a5, $a3
@@ -112,10 +112,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB2_3 Depth 2
; LA64-NEXT: move $a3, $a2
-; LA64-NEXT: addi.w $a2, $a2, 1
-; LA64-NEXT: sltu $a4, $a3, $a1
-; LA64-NEXT: xori $a4, $a4, 1
-; LA64-NEXT: masknez $a4, $a2, $a4
+; LA64-NEXT: sltu $a2, $a2, $a1
+; LA64-NEXT: xori $a2, $a2, 1
+; LA64-NEXT: addi.w $a4, $a3, 1
+; LA64-NEXT: masknez $a4, $a4, $a2
; LA64-NEXT: .LBB2_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB2_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
@@ -149,10 +149,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB3_3 Depth 2
; LA64-NEXT: move $a3, $a2
-; LA64-NEXT: addi.d $a2, $a2, 1
-; LA64-NEXT: sltu $a4, $a3, $a1
-; LA64-NEXT: xori $a4, $a4, 1
-; LA64-NEXT: masknez $a4, $a2, $a4
+; LA64-NEXT: sltu $a2, $a2, $a1
+; LA64-NEXT: xori $a2, $a2, 1
+; LA64-NEXT: addi.d $a4, $a3, 1
+; LA64-NEXT: masknez $a4, $a4, $a2
; LA64-NEXT: .LBB3_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB3_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
@@ -180,13 +180,13 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; LA64-LABEL: atomicrmw_udec_wrap_i8:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT: andi $a2, $a3, 24
-; LA64-NEXT: ori $a4, $zero, 255
; LA64-NEXT: ld.w $a5, $a0, 0
-; LA64-NEXT: sll.w $a3, $a4, $a3
; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: andi $a2, $a2, 24
; LA64-NEXT: andi $a4, $a1, 255
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB4_1: # %atomicrmw.start
@@ -195,15 +195,15 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: move $a6, $a5
; LA64-NEXT: srl.w $a5, $a5, $a2
; LA64-NEXT: andi $a7, $a5, 255
+; LA64-NEXT: sltu $t0, $a4, $a7
; LA64-NEXT: addi.d $a5, $a5, -1
-; LA64-NEXT: sltui $t0, $a7, 1
-; LA64-NEXT: sltu $a7, $a4, $a7
+; LA64-NEXT: masknez $a5, $a5, $t0
+; LA64-NEXT: maskeqz $t0, $a1, $t0
+; LA64-NEXT: or $a5, $t0, $a5
+; LA64-NEXT: sltui $a7, $a7, 1
; LA64-NEXT: masknez $a5, $a5, $a7
; LA64-NEXT: maskeqz $a7, $a1, $a7
; LA64-NEXT: or $a5, $a7, $a5
-; LA64-NEXT: masknez $a5, $a5, $t0
-; LA64-NEXT: maskeqz $a7, $a1, $t0
-; LA64-NEXT: or $a5, $a7, $a5
; LA64-NEXT: andi $a5, $a5, 255
; LA64-NEXT: sll.w $a5, $a5, $a2
; LA64-NEXT: and $a7, $a6, $a3
@@ -235,14 +235,14 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; LA64-LABEL: atomicrmw_udec_wrap_i16:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a2
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT: andi $a2, $a3, 24
-; LA64-NEXT: lu12i.w $a4, 15
-; LA64-NEXT: ori $a4, $a4, 4095
; LA64-NEXT: ld.w $a5, $a0, 0
-; LA64-NEXT: sll.w $a3, $a4, $a3
; LA64-NEXT: nor $a3, $a3, $zero
+; LA64-NEXT: andi $a2, $a2, 24
; LA64-NEXT: bstrpick.d $a4, $a1, 15, 0
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB5_1: # %atomicrmw.start
@@ -251,15 +251,15 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: move $a6, $a5
; LA64-NEXT: srl.w $a5, $a5, $a2
; LA64-NEXT: bstrpick.d $a7, $a5, 15, 0
+; LA64-NEXT: sltu $t0, $a4, $a7
; LA64-NEXT: addi.d $a5, $a5, -1
-; LA64-NEXT: sltui $t0, $a7, 1
-; LA64-NEXT: sltu $a7, $a4, $a7
+; LA64-NEXT: masknez $a5, $a5, $t0
+; LA64-NEXT: maskeqz $t0, $a1, $t0
+; LA64-NEXT: or $a5, $t0, $a5
+; LA64-NEXT: sltui $a7, $a7, 1
; LA64-NEXT: masknez $a5, $a5, $a7
; LA64-NEXT: maskeqz $a7, $a1, $a7
; LA64-NEXT: or $a5, $a7, $a5
-; LA64-NEXT: masknez $a5, $a5, $t0
-; LA64-NEXT: maskeqz $a7, $a1, $t0
-; LA64-NEXT: or $a5, $a7, $a5
; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
; LA64-NEXT: sll.w $a5, $a5, $a2
; LA64-NEXT: and $a7, $a6, $a3
@@ -298,12 +298,12 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB6_3 Depth 2
; LA64-NEXT: move $a4, $a2
-; LA64-NEXT: addi.w $a2, $a2, -1
+; LA64-NEXT: sltu $a2, $a3, $a2
+; LA64-NEXT: addi.w $a5, $a4, -1
+; LA64-NEXT: masknez $a5, $a5, $a2
+; LA64-NEXT: maskeqz $a2, $a1, $a2
+; LA64-NEXT: or $a2, $a2, $a5
; LA64-NEXT: sltui $a5, $a4, 1
-; LA64-NEXT: sltu $a6, $a3, $a4
-; LA64-NEXT: masknez $a2, $a2, $a6
-; LA64-NEXT: maskeqz $a6, $a1, $a6
-; LA64-NEXT: or $a2, $a6, $a2
; LA64-NEXT: masknez $a2, $a2, $a5
; LA64-NEXT: maskeqz $a5, $a1, $a5
; LA64-NEXT: or $a5, $a5, $a2
@@ -340,12 +340,12 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB7_3 Depth 2
; LA64-NEXT: move $a3, $a2
-; LA64-NEXT: addi.d $a2, $a2, -1
+; LA64-NEXT: sltu $a2, $a1, $a2
+; LA64-NEXT: addi.d $a4, $a3, -1
+; LA64-NEXT: masknez $a4, $a4, $a2
+; LA64-NEXT: maskeqz $a2, $a1, $a2
+; LA64-NEXT: or $a2, $a2, $a4
; LA64-NEXT: sltui $a4, $a3, 1
-; LA64-NEXT: sltu $a5, $a1, $a3
-; LA64-NEXT: masknez $a2, $a2, $a5
-; LA64-NEXT: maskeqz $a5, $a1, $a5
-; LA64-NEXT: or $a2, $a5, $a2
; LA64-NEXT: masknez $a2, $a2, $a4
; LA64-NEXT: maskeqz $a4, $a1, $a4
; LA64-NEXT: or $a4, $a4, $a2
diff --git a/llvm/test/CodeGen/LoongArch/bitreverse.ll b/llvm/test/CodeGen/LoongArch/bitreverse.ll
index 78d5c7e4a7977..d2e90f2e85a3e 100644
--- a/llvm/test/CodeGen/LoongArch/bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/bitreverse.ll
@@ -129,22 +129,22 @@ define i48 @test_bitreverse_i48(i48 %a) nounwind {
define i77 @test_bitreverse_i77(i77 %a) nounwind {
; LA32-LABEL: test_bitreverse_i77:
; LA32: # %bb.0:
-; LA32-NEXT: ld.w $a2, $a1, 4
-; LA32-NEXT: ld.w $a3, $a1, 8
-; LA32-NEXT: ld.w $a1, $a1, 0
+; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: bitrev.w $a2, $a2
-; LA32-NEXT: slli.w $a4, $a2, 13
+; LA32-NEXT: srli.w $a3, $a2, 19
+; LA32-NEXT: st.h $a3, $a0, 8
+; LA32-NEXT: ld.w $a3, $a1, 4
; LA32-NEXT: bitrev.w $a3, $a3
-; LA32-NEXT: srli.w $a3, $a3, 19
-; LA32-NEXT: or $a3, $a3, $a4
-; LA32-NEXT: srli.w $a2, $a2, 19
+; LA32-NEXT: srli.w $a4, $a3, 19
+; LA32-NEXT: slli.w $a2, $a2, 13
+; LA32-NEXT: or $a2, $a2, $a4
+; LA32-NEXT: st.w $a2, $a0, 4
+; LA32-NEXT: ld.w $a1, $a1, 8
+; LA32-NEXT: slli.w $a2, $a3, 13
; LA32-NEXT: bitrev.w $a1, $a1
-; LA32-NEXT: slli.w $a4, $a1, 13
-; LA32-NEXT: or $a2, $a4, $a2
; LA32-NEXT: srli.w $a1, $a1, 19
-; LA32-NEXT: st.h $a1, $a0, 8
-; LA32-NEXT: st.w $a2, $a0, 4
-; LA32-NEXT: st.w $a3, $a0, 0
+; LA32-NEXT: or $a1, $a1, $a2
+; LA32-NEXT: st.w $a1, $a0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: test_bitreverse_i77:
@@ -163,18 +163,18 @@ define i77 @test_bitreverse_i77(i77 %a) nounwind {
define i128 @test_bitreverse_i128(i128 %a) nounwind {
; LA32-LABEL: test_bitreverse_i128:
; LA32: # %bb.0:
-; LA32-NEXT: ld.w $a2, $a1, 12
-; LA32-NEXT: ld.w $a3, $a1, 8
-; LA32-NEXT: ld.w $a4, $a1, 4
-; LA32-NEXT: ld.w $a1, $a1, 0
+; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: bitrev.w $a2, $a2
-; LA32-NEXT: bitrev.w $a3, $a3
-; LA32-NEXT: bitrev.w $a4, $a4
+; LA32-NEXT: st.w $a2, $a0, 12
+; LA32-NEXT: ld.w $a2, $a1, 4
+; LA32-NEXT: bitrev.w $a2, $a2
+; LA32-NEXT: st.w $a2, $a0, 8
+; LA32-NEXT: ld.w $a2, $a1, 8
+; LA32-NEXT: bitrev.w $a2, $a2
+; LA32-NEXT: st.w $a2, $a0, 4
+; LA32-NEXT: ld.w $a1, $a1, 12
; LA32-NEXT: bitrev.w $a1, $a1
-; LA32-NEXT: st.w $a1, $a0, 12
-; LA32-NEXT: st.w $a4, $a0, 8
-; LA32-NEXT: st.w $a3, $a0, 4
-; LA32-NEXT: st.w $a2, $a0, 0
+; LA32-NEXT: st.w $a1, $a0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: test_bitreverse_i128:
diff --git a/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll b/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll
index c8f9596b9b0c1..88fda30efeacd 100644
--- a/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll
@@ -114,17 +114,17 @@ define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind {
define i32 @pr55484(i32 %0) {
; LA32-LABEL: pr55484:
; LA32: # %bb.0:
-; LA32-NEXT: srli.w $a1, $a0, 8
-; LA32-NEXT: slli.w $a0, $a0, 8
-; LA32-NEXT: or $a0, $a1, $a0
+; LA32-NEXT: slli.w $a1, $a0, 8
+; LA32-NEXT: srli.w $a0, $a0, 8
+; LA32-NEXT: or $a0, $a0, $a1
; LA32-NEXT: ext.w.h $a0, $a0
; LA32-NEXT: ret
;
; LA64-LABEL: pr55484:
; LA64: # %bb.0:
-; LA64-NEXT: srli.d $a1, $a0, 8
-; LA64-NEXT: slli.d $a0, $a0, 8
-; LA64-NEXT: or $a0, $a1, $a0
+; LA64-NEXT: slli.d $a1, $a0, 8
+; LA64-NEXT: srli.d $a0, $a0, 8
+; LA64-NEXT: or $a0, $a0, $a1
; LA64-NEXT: ext.w.h $a0, $a0
; LA64-NEXT: ret
%2 = lshr i32 %0, 8
diff --git a/llvm/test/CodeGen/LoongArch/bswap.ll b/llvm/test/CodeGen/LoongArch/bswap.ll
index 122dab7fb4963..47e2172df1497 100644
--- a/llvm/test/CodeGen/LoongArch/bswap.ll
+++ b/llvm/test/CodeGen/LoongArch/bswap.ll
@@ -83,20 +83,20 @@ define i48 @test_bswap_i48(i48 %a) nounwind {
define i80 @test_bswap_i80(i80 %a) nounwind {
; LA32-LABEL: test_bswap_i80:
; LA32: # %bb.0:
-; LA32-NEXT: ld.w $a2, $a1, 4
-; LA32-NEXT: ld.w $a3, $a1, 8
-; LA32-NEXT: ld.w $a1, $a1, 0
+; LA32-NEXT: ld.w $a2, $a1, 0
+; LA32-NEXT: ld.w $a3, $a1, 4
; LA32-NEXT: revb.2h $a2, $a2
; LA32-NEXT: rotri.w $a2, $a2, 16
; LA32-NEXT: revb.2h $a3, $a3
; LA32-NEXT: rotri.w $a3, $a3, 16
-; LA32-NEXT: bytepick.w $a3, $a3, $a2, 2
+; LA32-NEXT: bytepick.w $a4, $a3, $a2, 2
+; LA32-NEXT: st.w $a4, $a0, 4
+; LA32-NEXT: ld.w $a1, $a1, 8
; LA32-NEXT: revb.2h $a1, $a1
; LA32-NEXT: rotri.w $a1, $a1, 16
-; LA32-NEXT: bytepick.w $a2, $a2, $a1, 2
-; LA32-NEXT: srli.w $a1, $a1, 16
-; LA32-NEXT: st.w $a2, $a0, 4
-; LA32-NEXT: st.w $a3, $a0, 0
+; LA32-NEXT: bytepick.w $a1, $a1, $a3, 2
+; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: srli.w $a1, $a2, 16
; LA32-NEXT: st.h $a1, $a0, 8
; LA32-NEXT: ret
;
@@ -114,22 +114,22 @@ define i80 @test_bswap_i80(i80 %a) nounwind {
define i128 @test_bswap_i128(i128 %a) nounwind {
; LA32-LABEL: test_bswap_i128:
; LA32: # %bb.0:
-; LA32-NEXT: ld.w $a2, $a1, 12
-; LA32-NEXT: ld.w $a3, $a1, 0
-; LA32-NEXT: ld.w $a4, $a1, 8
-; LA32-NEXT: ld.w $a1, $a1, 4
+; LA32-NEXT: ld.w $a2, $a1, 0
+; LA32-NEXT: revb.2h $a2, $a2
+; LA32-NEXT: rotri.w $a2, $a2, 16
+; LA32-NEXT: st.w $a2, $a0, 12
+; LA32-NEXT: ld.w $a2, $a1, 4
; LA32-NEXT: revb.2h $a2, $a2
; LA32-NEXT: rotri.w $a2, $a2, 16
-; LA32-NEXT: revb.2h $a4, $a4
-; LA32-NEXT: rotri.w $a4, $a4, 16
+; LA32-NEXT: st.w $a2, $a0, 8
+; LA32-NEXT: ld.w $a2, $a1, 8
+; LA32-NEXT: revb.2h $a2, $a2
+; LA32-NEXT: rotri.w $a2, $a2, 16
+; LA32-NEXT: st.w $a2, $a0, 4
+; LA32-NEXT: ld.w $a1, $a1, 12
; LA32-NEXT: revb.2h $a1, $a1
; LA32-NEXT: rotri.w $a1, $a1, 16
-; LA32-NEXT: revb.2h $a3, $a3
-; LA32-NEXT: rotri.w $a3, $a3, 16
-; LA32-NEXT: st.w $a3, $a0, 12
-; LA32-NEXT: st.w $a1, $a0, 8
-; LA32-NEXT: st.w $a4, $a0, 4
-; LA32-NEXT: st.w $a2, $a0, 0
+; LA32-NEXT: st.w $a1, $a0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: test_bswap_i128:
diff --git a/llvm/test/CodeGen/LoongArch/bytepick.ll b/llvm/test/CodeGen/LoongArch/bytepick.ll
index 22a78bcd56119..cb728619a8e97 100644
--- a/llvm/test/CodeGen/LoongArch/bytepick.ll
+++ b/llvm/test/CodeGen/LoongArch/bytepick.ll
@@ -14,8 +14,8 @@ define i32 @pick_i32_1(i32 %a, i32 %b) {
;
; LA64-LABEL: pick_i32_1:
; LA64: # %bb.0:
-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 24
; LA64-NEXT: slli.d $a0, $a0, 8
+; LA64-NEXT: bstrpick.d $a1, $a1, 31, 24
; LA64-NEXT: or $a0, $a1, $a0
; LA64-NEXT: ret
%1 = lshr i32 %b, 24
@@ -52,8 +52,8 @@ define i32 @pick_i32_2(i32 %a, i32 %b) {
;
; LA64-LABEL: pick_i32_2:
; LA64: # %bb.0:
-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 16
; LA64-NEXT: slli.d $a0, $a0, 16
+; LA64-NEXT: bstrpick.d $a1, $a1, 31, 16
; LA64-NEXT: or $a0, $a1, $a0
; LA64-NEXT: ret
%1 = lshr i32 %b, 16
@@ -90,8 +90,8 @@ define i32 @pick_i32_3(i32 %a, i32 %b) {
;
; LA64-LABEL: pick_i32_3:
; LA64: # %bb.0:
-; LA64-NEXT: bstrpick.d $a1, $a1, 31, 8
; LA64-NEXT: slli.d $a0, $a0, 24
+; LA64-NEXT: bstrpick.d $a1, $a1, 31, 8
; LA64-NEXT: or $a0, $a1, $a0
; LA64-NEXT: ret
%1 = lshr i32 %b, 8
@@ -123,8 +123,9 @@ define signext i32 @pick_i32_3_sext(i32 %a, i32 %b) {
define i64 @pick_i64_1(i64 %a, i64 %b) {
; LA32-LABEL: pick_i64_1:
; LA32: # %bb.0:
+; LA32-NEXT: bytepick.w $a2, $a3, $a0, 1
; LA32-NEXT: bytepick.w $a1, $a0, $a1, 1
-; LA32-NEXT: bytepick.w $a0, $a3, $a0, 1
+; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: pick_i64_1:
@@ -142,8 +143,9 @@ define i64 @pick_i64_1(i64 %a, i64 %b) {
define i64 @pick_i64_2(i64 %a, i64 %b) {
; LA32-LABEL: pick_i64_2:
; LA32: # %bb.0:
+; LA32-NEXT: bytepick.w $a2, $a3, $a0, 2
; LA32-NEXT: bytepick.w $a1, $a0, $a1, 2
-; LA32-NEXT: bytepick.w $a0, $a3, $a0, 2
+; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: pick_i64_2:
@@ -161,8 +163,9 @@ define i64 @pick_i64_2(i64 %a, i64 %b) {
define i64 @pick_i64_3(i64 %a, i64 %b) {
; LA32-LABEL: pick_i64_3:
; LA32: # %bb.0:
+; LA32-NEXT: bytepick.w $a2, $a3, $a0, 3
; LA32-NEXT: bytepick.w $a1, $a0, $a1, 3
-; LA32-NEXT: bytepick.w $a0, $a3, $a0, 3
+; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: pick_i64_3:
diff --git a/llvm/test/CodeGen/...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/95741
More information about the llvm-commits
mailing list