[llvm] 868f23f - [LoongArch] Override hooks to enable sink-and-fold support in MachineSink (#163721)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 5 22:02:59 PST 2025
Author: ZhaoQi
Date: 2025-11-06T14:02:55+08:00
New Revision: 868f23f8cc1aec26c40f070d85f70ed3cb6b72cb
URL: https://github.com/llvm/llvm-project/commit/868f23f8cc1aec26c40f070d85f70ed3cb6b72cb
DIFF: https://github.com/llvm/llvm-project/commit/868f23f8cc1aec26c40f070d85f70ed3cb6b72cb.diff
LOG: [LoongArch] Override hooks to enable sink-and-fold support in MachineSink (#163721)
Add option `loongarch-enable-sink-fold` to enable sink-fold and set
`true` as default. This pass can fold `addi+load/store` to a single
`load/store` with offset.
Added:
Modified:
llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
llvm/test/CodeGen/LoongArch/ldptr.ll
llvm/test/CodeGen/LoongArch/sink-fold-addi.ll
llvm/test/CodeGen/LoongArch/stptr.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index c89212dae72d9..90a4723c9a3ed 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -756,6 +756,155 @@ LoongArchInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
return ArrayRef(TargetFlags);
}
+bool LoongArchInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
+ Register Reg,
+ const MachineInstr &AddrI,
+ ExtAddrMode &AM) const {
+ enum MemIOffsetType {
+ Imm14Shift2,
+ Imm12,
+ Imm11Shift1,
+ Imm10Shift2,
+ Imm9Shift3,
+ Imm8,
+ Imm8Shift1,
+ Imm8Shift2,
+ Imm8Shift3
+ };
+
+ MemIOffsetType OT;
+ switch (MemI.getOpcode()) {
+ default:
+ return false;
+ case LoongArch::LDPTR_W:
+ case LoongArch::LDPTR_D:
+ case LoongArch::STPTR_W:
+ case LoongArch::STPTR_D:
+ OT = Imm14Shift2;
+ break;
+ case LoongArch::LD_B:
+ case LoongArch::LD_H:
+ case LoongArch::LD_W:
+ case LoongArch::LD_D:
+ case LoongArch::LD_BU:
+ case LoongArch::LD_HU:
+ case LoongArch::LD_WU:
+ case LoongArch::ST_B:
+ case LoongArch::ST_H:
+ case LoongArch::ST_W:
+ case LoongArch::ST_D:
+ case LoongArch::FLD_S:
+ case LoongArch::FLD_D:
+ case LoongArch::FST_S:
+ case LoongArch::FST_D:
+ case LoongArch::VLD:
+ case LoongArch::VST:
+ case LoongArch::XVLD:
+ case LoongArch::XVST:
+ case LoongArch::VLDREPL_B:
+ case LoongArch::XVLDREPL_B:
+ OT = Imm12;
+ break;
+ case LoongArch::VLDREPL_H:
+ case LoongArch::XVLDREPL_H:
+ OT = Imm11Shift1;
+ break;
+ case LoongArch::VLDREPL_W:
+ case LoongArch::XVLDREPL_W:
+ OT = Imm10Shift2;
+ break;
+ case LoongArch::VLDREPL_D:
+ case LoongArch::XVLDREPL_D:
+ OT = Imm9Shift3;
+ break;
+ case LoongArch::VSTELM_B:
+ case LoongArch::XVSTELM_B:
+ OT = Imm8;
+ break;
+ case LoongArch::VSTELM_H:
+ case LoongArch::XVSTELM_H:
+ OT = Imm8Shift1;
+ break;
+ case LoongArch::VSTELM_W:
+ case LoongArch::XVSTELM_W:
+ OT = Imm8Shift2;
+ break;
+ case LoongArch::VSTELM_D:
+ case LoongArch::XVSTELM_D:
+ OT = Imm8Shift3;
+ break;
+ }
+
+ if (MemI.getOperand(0).getReg() == Reg)
+ return false;
+
+ if ((AddrI.getOpcode() != LoongArch::ADDI_W &&
+ AddrI.getOpcode() != LoongArch::ADDI_D) ||
+ !AddrI.getOperand(1).isReg() || !AddrI.getOperand(2).isImm())
+ return false;
+
+ int64_t OldOffset = MemI.getOperand(2).getImm();
+ int64_t Disp = AddrI.getOperand(2).getImm();
+ int64_t NewOffset = OldOffset + Disp;
+ if (!STI.is64Bit())
+ NewOffset = SignExtend64<32>(NewOffset);
+
+ if (!(OT == Imm14Shift2 && isShiftedInt<14, 2>(NewOffset) && STI.hasUAL()) &&
+ !(OT == Imm12 && isInt<12>(NewOffset)) &&
+ !(OT == Imm11Shift1 && isShiftedInt<11, 1>(NewOffset)) &&
+ !(OT == Imm10Shift2 && isShiftedInt<10, 2>(NewOffset)) &&
+ !(OT == Imm9Shift3 && isShiftedInt<9, 3>(NewOffset)) &&
+ !(OT == Imm8 && isInt<8>(NewOffset)) &&
+ !(OT == Imm8Shift1 && isShiftedInt<8, 1>(NewOffset)) &&
+ !(OT == Imm8Shift2 && isShiftedInt<8, 2>(NewOffset)) &&
+ !(OT == Imm8Shift3 && isShiftedInt<8, 3>(NewOffset)))
+ return false;
+
+ AM.BaseReg = AddrI.getOperand(1).getReg();
+ AM.ScaledReg = 0;
+ AM.Scale = 0;
+ AM.Displacement = NewOffset;
+ AM.Form = ExtAddrMode::Formula::Basic;
+ return true;
+}
+
+MachineInstr *
+LoongArchInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
+ const ExtAddrMode &AM) const {
+ const DebugLoc &DL = MemI.getDebugLoc();
+ MachineBasicBlock &MBB = *MemI.getParent();
+
+ assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
+ "Addressing mode not supported for folding");
+
+ unsigned MemIOp = MemI.getOpcode();
+ switch (MemIOp) {
+ default:
+ return BuildMI(MBB, MemI, DL, get(MemIOp))
+ .addReg(MemI.getOperand(0).getReg(),
+ MemI.mayLoad() ? RegState::Define : 0)
+ .addReg(AM.BaseReg)
+ .addImm(AM.Displacement)
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+ case LoongArch::VSTELM_B:
+ case LoongArch::VSTELM_H:
+ case LoongArch::VSTELM_W:
+ case LoongArch::VSTELM_D:
+ case LoongArch::XVSTELM_B:
+ case LoongArch::XVSTELM_H:
+ case LoongArch::XVSTELM_W:
+ case LoongArch::XVSTELM_D:
+ return BuildMI(MBB, MemI, DL, get(MemIOp))
+ .addReg(MemI.getOperand(0).getReg(), 0)
+ .addReg(AM.BaseReg)
+ .addImm(AM.Displacement)
+ .addImm(MemI.getOperand(3).getImm())
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+ }
+}
+
// Returns true if this is the sext.w pattern, addi.w rd, rs, 0.
bool LoongArch::isSEXT_W(const MachineInstr &MI) {
return MI.getOpcode() == LoongArch::ADDI_W && MI.getOperand(1).isReg() &&
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
index f25958a32bec4..f69a558bdeca9 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -93,6 +93,12 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo {
ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const override;
+ bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
+ const MachineInstr &AddrI,
+ ExtAddrMode &AM) const override;
+ MachineInstr *emitLdStWithAddr(MachineInstr &MemI,
+ const ExtAddrMode &AM) const override;
+
protected:
const LoongArchSubtarget &STI;
};
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index 9de4c9d83792b..92a9388e5cb7b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -62,6 +62,11 @@ static cl::opt<bool>
cl::desc("Enable the merge base offset pass"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ EnableSinkFold("loongarch-enable-sink-fold",
+ cl::desc("Enable sinking and folding of instruction copies"),
+ cl::init(true), cl::Hidden);
+
static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
return RM.value_or(Reloc::Static);
}
@@ -146,7 +151,9 @@ namespace {
class LoongArchPassConfig : public TargetPassConfig {
public:
LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+ setEnableSinkAndFold(EnableSinkFold);
+ }
LoongArchTargetMachine &getLoongArchTargetMachine() const {
return getTM<LoongArchTargetMachine>();
diff --git a/llvm/test/CodeGen/LoongArch/ldptr.ll b/llvm/test/CodeGen/LoongArch/ldptr.ll
index c3656a6bdafba..9bafa10c47e3f 100644
--- a/llvm/test/CodeGen/LoongArch/ldptr.ll
+++ b/llvm/test/CodeGen/LoongArch/ldptr.ll
@@ -24,8 +24,7 @@ define signext i32 @ldptr_w(ptr %p) nounwind {
; LA32-LABEL: ldptr_w:
; LA32: # %bb.0: # %entry
; LA32-NEXT: addi.w $a0, $a0, 2047
-; LA32-NEXT: addi.w $a0, $a0, 1
-; LA32-NEXT: ld.w $a0, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 1
; LA32-NEXT: ret
;
; LA64-LABEL: ldptr_w:
@@ -81,10 +80,9 @@ entry:
define i64 @ldptr_d(ptr %p) nounwind {
; LA32-LABEL: ldptr_d:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $a0, $a0, 2047
-; LA32-NEXT: addi.w $a1, $a0, 1
-; LA32-NEXT: ld.w $a0, $a1, 0
-; LA32-NEXT: ld.w $a1, $a1, 4
+; LA32-NEXT: addi.w $a1, $a0, 2047
+; LA32-NEXT: ld.w $a0, $a1, 1
+; LA32-NEXT: ld.w $a1, $a1, 5
; LA32-NEXT: ret
;
; LA64-LABEL: ldptr_d:
diff --git a/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll
index 9a806a12f7de6..93f73e5cd30ff 100644
--- a/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll
+++ b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll
@@ -25,14 +25,13 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: move $s1, $a2
; LA32-NEXT: slli.w $a1, $a0, 4
; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
-; LA32-NEXT: add.w $a0, $a4, $a0
; LA32-NEXT: sltui $a1, $a3, 1
; LA32-NEXT: slti $a2, $a3, 0
; LA32-NEXT: masknez $a2, $a2, $a1
; LA32-NEXT: sltui $a3, $s1, 1
; LA32-NEXT: maskeqz $a1, $a3, $a1
; LA32-NEXT: or $a1, $a1, $a2
-; LA32-NEXT: addi.w $s2, $a0, 8
+; LA32-NEXT: add.w $s2, $a4, $a0
; LA32-NEXT: bnez $a1, .LBB0_3
; LA32-NEXT: # %bb.1: # %for.body.preheader
; LA32-NEXT: move $fp, $a4
@@ -45,8 +44,8 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl f
-; LA32-NEXT: ld.w $a0, $s2, 4
-; LA32-NEXT: ld.w $a1, $s2, 0
+; LA32-NEXT: ld.w $a0, $s2, 12
+; LA32-NEXT: ld.w $a1, $s2, 8
; LA32-NEXT: add.w $a0, $a0, $s6
; LA32-NEXT: add.w $s3, $a1, $s3
; LA32-NEXT: sltu $a1, $s3, $a1
@@ -63,8 +62,8 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: move $s3, $zero
; LA32-NEXT: move $s6, $zero
; LA32-NEXT: .LBB0_4: # %for.cond.cleanup
-; LA32-NEXT: st.w $s3, $s2, 0
-; LA32-NEXT: st.w $s6, $s2, 4
+; LA32-NEXT: st.w $s3, $s2, 8
+; LA32-NEXT: st.w $s6, $s2, 12
; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
@@ -88,8 +87,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $s0, $a1
; LA64-NEXT: slli.d $a1, $a0, 4
; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
-; LA64-NEXT: add.d $a0, $a2, $a0
-; LA64-NEXT: addi.d $s1, $a0, 8
+; LA64-NEXT: add.d $s1, $a2, $a0
; LA64-NEXT: blez $s0, .LBB0_3
; LA64-NEXT: # %bb.1: # %for.body.preheader
; LA64-NEXT: move $fp, $a2
@@ -100,7 +98,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(f)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: ld.d $a0, $s1, 0
+; LA64-NEXT: ld.d $a0, $s1, 8
; LA64-NEXT: addi.d $s0, $s0, -1
; LA64-NEXT: add.d $s2, $a0, $s2
; LA64-NEXT: bnez $s0, .LBB0_2
@@ -108,7 +106,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: .LBB0_3:
; LA64-NEXT: move $s2, $zero
; LA64-NEXT: .LBB0_4: # %for.cond.cleanup
-; LA64-NEXT: st.d $s2, $s1, 0
+; LA64-NEXT: st.d $s2, $s1, 8
; LA64-NEXT: ld.d $s2, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
@@ -153,14 +151,13 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: move $s1, $a2
; LA32-NEXT: slli.w $a1, $a0, 4
; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
-; LA32-NEXT: add.w $a0, $a4, $a0
; LA32-NEXT: sltui $a1, $a3, 1
; LA32-NEXT: slti $a2, $a3, 0
; LA32-NEXT: masknez $a2, $a2, $a1
; LA32-NEXT: sltui $a3, $s1, 1
; LA32-NEXT: maskeqz $a1, $a3, $a1
; LA32-NEXT: or $a1, $a1, $a2
-; LA32-NEXT: addi.w $s2, $a0, 16
+; LA32-NEXT: add.w $s2, $a4, $a0
; LA32-NEXT: bnez $a1, .LBB1_3
; LA32-NEXT: # %bb.1: # %for.body.preheader
; LA32-NEXT: move $fp, $a4
@@ -172,7 +169,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl f
-; LA32-NEXT: fld.s $fa0, $s2, 0
+; LA32-NEXT: fld.s $fa0, $s2, 16
; LA32-NEXT: addi.w $s3, $s3, 1
; LA32-NEXT: sltui $a0, $s3, 1
; LA32-NEXT: add.w $s4, $s4, $a0
@@ -185,7 +182,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: .LBB1_3:
; LA32-NEXT: movgr2fr.w $fs0, $zero
; LA32-NEXT: .LBB1_4: # %for.cond.cleanup
-; LA32-NEXT: fst.s $fs0, $s2, 0
+; LA32-NEXT: fst.s $fs0, $s2, 16
; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
@@ -208,8 +205,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $s0, $a1
; LA64-NEXT: slli.d $a1, $a0, 4
; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
-; LA64-NEXT: add.d $a0, $a2, $a0
-; LA64-NEXT: addi.d $s1, $a0, 16
+; LA64-NEXT: add.d $s1, $a2, $a0
; LA64-NEXT: blez $s0, .LBB1_3
; LA64-NEXT: # %bb.1: # %for.body.preheader
; LA64-NEXT: move $fp, $a2
@@ -220,7 +216,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(f)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: fld.s $fa0, $s1, 0
+; LA64-NEXT: fld.s $fa0, $s1, 16
; LA64-NEXT: addi.d $s0, $s0, -1
; LA64-NEXT: fadd.s $fs0, $fa0, $fs0
; LA64-NEXT: bnez $s0, .LBB1_2
@@ -228,7 +224,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: .LBB1_3:
; LA64-NEXT: movgr2fr.w $fs0, $zero
; LA64-NEXT: .LBB1_4: # %for.cond.cleanup
-; LA64-NEXT: fst.s $fs0, $s1, 0
+; LA64-NEXT: fst.s $fs0, $s1, 16
; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
@@ -271,14 +267,13 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: move $s0, $a3
; LA32-NEXT: move $s1, $a2
; LA32-NEXT: slli.w $a0, $a0, 6
-; LA32-NEXT: add.w $a0, $a4, $a0
; LA32-NEXT: sltui $a1, $a3, 1
; LA32-NEXT: slti $a2, $a3, 0
; LA32-NEXT: masknez $a2, $a2, $a1
; LA32-NEXT: sltui $a3, $s1, 1
; LA32-NEXT: maskeqz $a1, $a3, $a1
; LA32-NEXT: or $a1, $a1, $a2
-; LA32-NEXT: addi.w $s2, $a0, 16
+; LA32-NEXT: add.w $s2, $a4, $a0
; LA32-NEXT: bnez $a1, .LBB2_3
; LA32-NEXT: # %bb.1: # %for.body.preheader
; LA32-NEXT: move $fp, $a4
@@ -291,7 +286,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl f
-; LA32-NEXT: vld $vr0, $s2, 0
+; LA32-NEXT: vld $vr0, $s2, 16
; LA32-NEXT: addi.w $s3, $s3, 1
; LA32-NEXT: sltui $a0, $s3, 1
; LA32-NEXT: add.w $s4, $s4, $a0
@@ -307,7 +302,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: .LBB2_3:
; LA32-NEXT: vrepli.b $vr0, 0
; LA32-NEXT: .LBB2_4: # %for.cond.cleanup
-; LA32-NEXT: vst $vr0, $s2, 0
+; LA32-NEXT: vst $vr0, $s2, 16
; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
@@ -326,8 +321,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
; LA64-NEXT: slli.d $a0, $a0, 6
-; LA64-NEXT: add.d $a0, $a2, $a0
-; LA64-NEXT: addi.d $s1, $a0, 16
+; LA64-NEXT: add.d $s1, $a2, $a0
; LA64-NEXT: blez $a1, .LBB2_3
; LA64-NEXT: # %bb.1: # %for.body.preheader
; LA64-NEXT: move $fp, $a2
@@ -340,7 +334,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(f)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: vld $vr0, $s1, 0
+; LA64-NEXT: vld $vr0, $s1, 16
; LA64-NEXT: addi.d $s0, $s0, -1
; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
; LA64-NEXT: vadd.w $vr1, $vr0, $vr1
@@ -351,7 +345,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: .LBB2_3:
; LA64-NEXT: vrepli.b $vr0, 0
; LA64-NEXT: .LBB2_4: # %for.cond.cleanup
-; LA64-NEXT: vst $vr0, $s1, 0
+; LA64-NEXT: vst $vr0, $s1, 16
; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -393,14 +387,13 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: move $s0, $a3
; LA32-NEXT: move $s1, $a2
; LA32-NEXT: slli.w $a0, $a0, 6
-; LA32-NEXT: add.w $a0, $a4, $a0
; LA32-NEXT: sltui $a1, $a3, 1
; LA32-NEXT: slti $a2, $a3, 0
; LA32-NEXT: masknez $a2, $a2, $a1
; LA32-NEXT: sltui $a3, $s1, 1
; LA32-NEXT: maskeqz $a1, $a3, $a1
; LA32-NEXT: or $a1, $a1, $a2
-; LA32-NEXT: addi.w $s2, $a0, 32
+; LA32-NEXT: add.w $s2, $a4, $a0
; LA32-NEXT: bnez $a1, .LBB3_3
; LA32-NEXT: # %bb.1: # %for.body.preheader
; LA32-NEXT: move $fp, $a4
@@ -413,7 +406,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl f
-; LA32-NEXT: xvld $xr0, $s2, 0
+; LA32-NEXT: xvld $xr0, $s2, 32
; LA32-NEXT: addi.w $s3, $s3, 1
; LA32-NEXT: sltui $a0, $s3, 1
; LA32-NEXT: add.w $s4, $s4, $a0
@@ -429,7 +422,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: .LBB3_3:
; LA32-NEXT: xvrepli.b $xr0, 0
; LA32-NEXT: .LBB3_4: # %for.cond.cleanup
-; LA32-NEXT: xvst $xr0, $s2, 0
+; LA32-NEXT: xvst $xr0, $s2, 32
; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload
@@ -448,8 +441,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill
; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill
; LA64-NEXT: slli.d $a0, $a0, 6
-; LA64-NEXT: add.d $a0, $a2, $a0
-; LA64-NEXT: addi.d $s1, $a0, 32
+; LA64-NEXT: add.d $s1, $a2, $a0
; LA64-NEXT: blez $a1, .LBB3_3
; LA64-NEXT: # %bb.1: # %for.body.preheader
; LA64-NEXT: move $fp, $a2
@@ -462,7 +454,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(f)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: xvld $xr0, $s1, 0
+; LA64-NEXT: xvld $xr0, $s1, 32
; LA64-NEXT: addi.d $s0, $s0, -1
; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
; LA64-NEXT: xvadd.h $xr1, $xr0, $xr1
@@ -473,7 +465,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: .LBB3_3:
; LA64-NEXT: xvrepli.b $xr0, 0
; LA64-NEXT: .LBB3_4: # %for.cond.cleanup
-; LA64-NEXT: xvst $xr0, $s1, 0
+; LA64-NEXT: xvst $xr0, $s1, 32
; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
@@ -516,14 +508,13 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: move $s1, $a2
; LA32-NEXT: slli.w $a1, $a0, 4
; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
-; LA32-NEXT: add.w $a0, $a4, $a0
; LA32-NEXT: sltui $a1, $a3, 1
; LA32-NEXT: slti $a2, $a3, 0
; LA32-NEXT: masknez $a2, $a2, $a1
; LA32-NEXT: sltui $a3, $s1, 1
; LA32-NEXT: maskeqz $a1, $a3, $a1
; LA32-NEXT: or $a1, $a1, $a2
-; LA32-NEXT: addi.w $s2, $a0, 16
+; LA32-NEXT: add.w $s2, $a4, $a0
; LA32-NEXT: bnez $a1, .LBB4_3
; LA32-NEXT: # %bb.1: # %for.body.preheader
; LA32-NEXT: move $fp, $a4
@@ -536,7 +527,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl f
-; LA32-NEXT: vldrepl.b $vr0, $s2, 0
+; LA32-NEXT: vldrepl.b $vr0, $s2, 16
; LA32-NEXT: addi.w $s3, $s3, 1
; LA32-NEXT: sltui $a0, $s3, 1
; LA32-NEXT: add.w $s4, $s4, $a0
@@ -552,7 +543,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: .LBB4_3:
; LA32-NEXT: vrepli.b $vr0, 0
; LA32-NEXT: .LBB4_4: # %for.cond.cleanup
-; LA32-NEXT: vstelm.b $vr0, $s2, 0, 1
+; LA32-NEXT: vstelm.b $vr0, $s2, 16, 1
; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
@@ -573,8 +564,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $s0, $a1
; LA64-NEXT: slli.d $a1, $a0, 4
; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
-; LA64-NEXT: add.d $a0, $a2, $a0
-; LA64-NEXT: addi.d $s1, $a0, 16
+; LA64-NEXT: add.d $s1, $a2, $a0
; LA64-NEXT: blez $s0, .LBB4_3
; LA64-NEXT: # %bb.1: # %for.body.preheader
; LA64-NEXT: move $fp, $a2
@@ -586,7 +576,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(f)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: vldrepl.b $vr0, $s1, 0
+; LA64-NEXT: vldrepl.b $vr0, $s1, 16
; LA64-NEXT: addi.d $s0, $s0, -1
; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
; LA64-NEXT: vadd.b $vr1, $vr0, $vr1
@@ -597,7 +587,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: .LBB4_3:
; LA64-NEXT: vrepli.b $vr0, 0
; LA64-NEXT: .LBB4_4: # %for.cond.cleanup
-; LA64-NEXT: vstelm.b $vr0, $s1, 0, 1
+; LA64-NEXT: vstelm.b $vr0, $s1, 16, 1
; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -643,14 +633,13 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: move $s1, $a2
; LA32-NEXT: slli.w $a1, $a0, 4
; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
-; LA32-NEXT: add.w $a0, $a4, $a0
; LA32-NEXT: sltui $a1, $a3, 1
; LA32-NEXT: slti $a2, $a3, 0
; LA32-NEXT: masknez $a2, $a2, $a1
; LA32-NEXT: sltui $a3, $s1, 1
; LA32-NEXT: maskeqz $a1, $a3, $a1
; LA32-NEXT: or $a1, $a1, $a2
-; LA32-NEXT: addi.w $s2, $a0, 8
+; LA32-NEXT: add.w $s2, $a4, $a0
; LA32-NEXT: bnez $a1, .LBB5_3
; LA32-NEXT: # %bb.1: # %for.body.preheader
; LA32-NEXT: move $fp, $a4
@@ -663,7 +652,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl f
-; LA32-NEXT: xvldrepl.d $xr0, $s2, 0
+; LA32-NEXT: xvldrepl.d $xr0, $s2, 8
; LA32-NEXT: addi.w $s3, $s3, 1
; LA32-NEXT: sltui $a0, $s3, 1
; LA32-NEXT: add.w $s4, $s4, $a0
@@ -679,7 +668,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
; LA32-NEXT: .LBB5_3:
; LA32-NEXT: xvrepli.b $xr0, 0
; LA32-NEXT: .LBB5_4: # %for.cond.cleanup
-; LA32-NEXT: xvstelm.d $xr0, $s2, 0, 1
+; LA32-NEXT: xvstelm.d $xr0, $s2, 8, 1
; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload
@@ -700,8 +689,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $s0, $a1
; LA64-NEXT: slli.d $a1, $a0, 4
; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
-; LA64-NEXT: add.d $a0, $a2, $a0
-; LA64-NEXT: addi.d $s1, $a0, 8
+; LA64-NEXT: add.d $s1, $a2, $a0
; LA64-NEXT: blez $s0, .LBB5_3
; LA64-NEXT: # %bb.1: # %for.body.preheader
; LA64-NEXT: move $fp, $a2
@@ -713,7 +701,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(f)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: xvldrepl.d $xr0, $s1, 0
+; LA64-NEXT: xvldrepl.d $xr0, $s1, 8
; LA64-NEXT: addi.d $s0, $s0, -1
; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
; LA64-NEXT: xvfadd.d $xr1, $xr0, $xr1
@@ -724,7 +712,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
; LA64-NEXT: .LBB5_3:
; LA64-NEXT: xvrepli.b $xr0, 0
; LA64-NEXT: .LBB5_4: # %for.cond.cleanup
-; LA64-NEXT: xvstelm.d $xr0, $s1, 0, 1
+; LA64-NEXT: xvstelm.d $xr0, $s1, 8, 1
; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/LoongArch/stptr.ll b/llvm/test/CodeGen/LoongArch/stptr.ll
index d70f9f4ba1603..23b433aa15856 100644
--- a/llvm/test/CodeGen/LoongArch/stptr.ll
+++ b/llvm/test/CodeGen/LoongArch/stptr.ll
@@ -23,8 +23,7 @@ define void @stptr_w(ptr %p, i32 signext %val) nounwind {
; LA32-LABEL: stptr_w:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $a0, $a0, 2047
-; LA32-NEXT: addi.w $a0, $a0, 1
-; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: st.w $a1, $a0, 1
; LA32-NEXT: ret
;
; LA64-LABEL: stptr_w:
@@ -77,9 +76,8 @@ define void @stptr_d(ptr %p, i64 %val) nounwind {
; LA32-LABEL: stptr_d:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $a0, $a0, 2047
-; LA32-NEXT: addi.w $a0, $a0, 1
-; LA32-NEXT: st.w $a2, $a0, 4
-; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: st.w $a2, $a0, 5
+; LA32-NEXT: st.w $a1, $a0, 1
; LA32-NEXT: ret
;
; LA64-LABEL: stptr_d:
More information about the llvm-commits
mailing list