[llvm] [LoongArch] Merge base and offset for LSX/LASX memory accesses (PR #104452)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 15 07:56:49 PDT 2024
https://github.com/heiher created https://github.com/llvm/llvm-project/pull/104452
None
>From 81d11c813e719ff6ad6f9639635a15e895e7b840 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui at loongson.cn>
Date: Thu, 15 Aug 2024 17:29:56 +0800
Subject: [PATCH] [LoongArch] Merge base and offset for LSX/LASX memory
accesses
---
.../LoongArch/LoongArchMergeBaseOffset.cpp | 24 ++++++-
.../LoongArch/can-not-realign-stack.ll | 12 ++--
.../CodeGen/LoongArch/lasx/build-vector.ll | 18 ++---
.../lasx/ir-instruction/shuffle-as-xvshuf.ll | 15 ++---
llvm/test/CodeGen/LoongArch/lasx/vselect.ll | 3 +-
.../CodeGen/LoongArch/lsx/build-vector.ll | 18 ++---
.../lsx/ir-instruction/shuffle-as-vshuf.ll | 18 ++---
llvm/test/CodeGen/LoongArch/lsx/vselect.ll | 3 +-
.../CodeGen/LoongArch/merge-base-offset.ll | 66 +++++++------------
9 files changed, 74 insertions(+), 103 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
index ae50b7a6f923e3..47071d29c2cd44 100644
--- a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
@@ -381,6 +381,14 @@ static unsigned getNewOpc(unsigned Op, bool isLarge) {
return isLarge ? LoongArch::FLDX_S : LoongArch::FLD_S;
case LoongArch::FLD_D:
return isLarge ? LoongArch::FLDX_D : LoongArch::FLD_D;
+ case LoongArch::VLD:
+ return isLarge ? LoongArch::VLDX : LoongArch::VLD;
+ case LoongArch::XVLD:
+ return isLarge ? LoongArch::XVLDX : LoongArch::XVLD;
+ case LoongArch::VLDREPL_B:
+ return LoongArch::VLDREPL_B;
+ case LoongArch::XVLDREPL_B:
+ return LoongArch::XVLDREPL_B;
case LoongArch::ST_B:
return isLarge ? LoongArch::STX_B : LoongArch::ST_B;
case LoongArch::ST_H:
@@ -395,6 +403,10 @@ static unsigned getNewOpc(unsigned Op, bool isLarge) {
return isLarge ? LoongArch::FSTX_S : LoongArch::FST_S;
case LoongArch::FST_D:
return isLarge ? LoongArch::FSTX_D : LoongArch::FST_D;
+ case LoongArch::VST:
+ return isLarge ? LoongArch::VSTX : LoongArch::VST;
+ case LoongArch::XVST:
+ return isLarge ? LoongArch::XVSTX : LoongArch::XVST;
default:
llvm_unreachable("Unexpected opcode for replacement");
}
@@ -444,6 +456,12 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
default:
LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
return false;
+ case LoongArch::VLDREPL_B:
+ case LoongArch::XVLDREPL_B:
+ // We can't do this for large pattern.
+ if (Last)
+ return false;
+ [[fallthrough]];
case LoongArch::LD_B:
case LoongArch::LD_H:
case LoongArch::LD_W:
@@ -455,6 +473,8 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
case LoongArch::LDPTR_D:
case LoongArch::FLD_S:
case LoongArch::FLD_D:
+ case LoongArch::VLD:
+ case LoongArch::XVLD:
case LoongArch::ST_B:
case LoongArch::ST_H:
case LoongArch::ST_W:
@@ -462,7 +482,9 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
case LoongArch::STPTR_W:
case LoongArch::STPTR_D:
case LoongArch::FST_S:
- case LoongArch::FST_D: {
+ case LoongArch::FST_D:
+ case LoongArch::VST:
+ case LoongArch::XVST: {
if (UseMI.getOperand(1).isFI())
return false;
// Register defined by Lo should not be the value register.
diff --git a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
index 49155a3966a842..6d5cf5a9429312 100644
--- a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
+++ b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
@@ -17,20 +17,16 @@ define dso_local noundef signext i32 @main() nounwind {
; CHECK-NEXT: addi.d $sp, $sp, -272
; CHECK-NEXT: st.d $ra, $sp, 264 # 8-byte Folded Spill
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvld $xr0, $a0, %pc_lo12(.LCPI0_0)
; CHECK-NEXT: xvst $xr0, $sp, 96 # 32-byte Folded Spill
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1)
-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_1)
-; CHECK-NEXT: xvld $xr1, $a0, 0
+; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_1)
; CHECK-NEXT: xvst $xr1, $sp, 64 # 32-byte Folded Spill
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_2)
-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_2)
-; CHECK-NEXT: xvld $xr2, $a0, 0
+; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_2)
; CHECK-NEXT: xvst $xr2, $sp, 32 # 32-byte Folded Spill
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_3)
-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_3)
-; CHECK-NEXT: xvld $xr3, $a0, 0
+; CHECK-NEXT: xvld $xr3, $a0, %pc_lo12(.LCPI0_3)
; CHECK-NEXT: xvst $xr3, $sp, 0 # 32-byte Folded Spill
; CHECK-NEXT: xvst $xr0, $sp, 136
; CHECK-NEXT: xvst $xr1, $sp, 168
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
index 3a0fd0758cb324..b06f6523e977c5 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -153,8 +153,7 @@ define void @buildvector_v32i8_const(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_v32i8_const:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0)
-; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0)
-; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI12_0)
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -166,8 +165,7 @@ define void @buildvector_v16i16_const(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_v16i16_const:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0)
-; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0)
-; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI13_0)
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -179,8 +177,7 @@ define void @buildvector_v8i32_const(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_v8i32_const:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0)
-; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0)
-; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI14_0)
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -192,8 +189,7 @@ define void @buildvector_v4i64_const(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_v4i64_const:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0)
-; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0)
-; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI15_0)
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -205,8 +201,7 @@ define void @buildvector_v2f32_const(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_v2f32_const:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0)
-; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0)
-; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI16_0)
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -218,8 +213,7 @@ define void @buildvector_v4f64_const(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_v4f64_const:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0)
-; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0)
-; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI17_0)
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll
index 4cc819018f0a8d..6a888051487157 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll
@@ -6,8 +6,7 @@ define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: shufflevector_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: xvld $xr2, $a0, 0
+; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_0)
; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
; CHECK-NEXT: ret
%c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39,
@@ -20,8 +19,7 @@ define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: shufflevector_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: xvld $xr2, $a0, 0
+; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI1_0)
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78
; CHECK-NEXT: xvpermi.d $xr1, $xr1, 78
; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0
@@ -37,8 +35,7 @@ define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: shufflevector_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: xvld $xr2, $a0, 0
+; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0)
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68
; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68
; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
@@ -53,8 +50,7 @@ define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: shufflevector_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: xvld $xr2, $a0, 0
+; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI3_0)
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 238
; CHECK-NEXT: xvpermi.d $xr1, $xr1, 238
; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
@@ -69,8 +65,7 @@ define <8 x float> @shufflevector_v8f32(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: shufflevector_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: xvld $xr2, $a0, 0
+; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI4_0)
; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
; CHECK-NEXT: xvori.b $xr0, $xr2, 0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
index 7b4c7ced4b5f36..17ba28afc81f5f 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
@@ -72,8 +72,7 @@ define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: xvld $xr2, $a1, 0
+; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI4_0)
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
index c04d7ca889f7ef..ba19fe75d7570c 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
@@ -153,8 +153,7 @@ define void @buildvector_v16i8_const(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_v16i8_const:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0)
-; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0)
-; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr0, $a1, %pc_lo12(.LCPI12_0)
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -166,8 +165,7 @@ define void @buildvector_v8i16_const(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_v8i16_const:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0)
-; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0)
-; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr0, $a1, %pc_lo12(.LCPI13_0)
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -179,8 +177,7 @@ define void @buildvector_v4i32_const(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_v4i32_const:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0)
-; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0)
-; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr0, $a1, %pc_lo12(.LCPI14_0)
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -192,8 +189,7 @@ define void @buildvector_v2i64_const(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_v2i64_const:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0)
-; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0)
-; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr0, $a1, %pc_lo12(.LCPI15_0)
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -205,8 +201,7 @@ define void @buildvector_v2f32_const(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_v2f32_const:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0)
-; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0)
-; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr0, $a1, %pc_lo12(.LCPI16_0)
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -218,8 +213,7 @@ define void @buildvector_v2f64_const(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_v2f64_const:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0)
-; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0)
-; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr0, $a1, %pc_lo12(.LCPI17_0)
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
index 55800b31446b3d..ac78a26ba43673 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
@@ -5,8 +5,7 @@ define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: shufflevector_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: vld $vr2, $a0, 0
+; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI0_0)
; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2
; CHECK-NEXT: ret
%c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15, i32 2, i32 4, i32 6, i32 8, i32 25, i32 30, i32 31, i32 31>
@@ -18,8 +17,7 @@ define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: shufflevector_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: vld $vr2, $a0, 0
+; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI1_0)
; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0
; CHECK-NEXT: vori.b $vr0, $vr2, 0
; CHECK-NEXT: ret
@@ -32,8 +30,7 @@ define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: shufflevector_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: vld $vr2, $a0, 0
+; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI2_0)
; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0
; CHECK-NEXT: vori.b $vr0, $vr2, 0
; CHECK-NEXT: ret
@@ -46,8 +43,7 @@ define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: shufflevector_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: vld $vr2, $a0, 0
+; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI3_0)
; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
; CHECK-NEXT: vori.b $vr0, $vr2, 0
; CHECK-NEXT: ret
@@ -60,8 +56,7 @@ define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: shufflevector_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: vld $vr2, $a0, 0
+; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI4_0)
; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0
; CHECK-NEXT: vori.b $vr0, $vr2, 0
; CHECK-NEXT: ret
@@ -74,8 +69,7 @@ define <2 x double> @shufflevector_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: shufflevector_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: vld $vr2, $a0, 0
+; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI5_0)
; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
; CHECK-NEXT: vori.b $vr0, $vr2, 0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
index 48ef3c14a4bf51..1c10e6c3087ad7 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
@@ -72,8 +72,7 @@ define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: vld $vr2, $a1, 0
+; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI4_0)
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
index 58a8e5d77c63fe..1e7a79beb62c61 100644
--- a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
+++ b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
@@ -542,21 +542,17 @@ define dso_local void @copy_i32x4() nounwind {
; LA32-LABEL: copy_i32x4:
; LA32: # %bb.0: # %entry
; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src)
-; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i32x4_src)
-; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vld $vr0, $a0, %pc_lo12(g_i32x4_src)
; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst)
-; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i32x4_dst)
-; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: vst $vr0, $a0, %pc_lo12(g_i32x4_dst)
; LA32-NEXT: ret
;
; LA64-LABEL: copy_i32x4:
; LA64: # %bb.0: # %entry
; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src)
-; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i32x4_src)
-; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vld $vr0, $a0, %pc_lo12(g_i32x4_src)
; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst)
-; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i32x4_dst)
-; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: vst $vr0, $a0, %pc_lo12(g_i32x4_dst)
; LA64-NEXT: ret
;
; LA64-LARGE-LABEL: copy_i32x4:
@@ -565,14 +561,12 @@ define dso_local void @copy_i32x4() nounwind {
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x4_src)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x4_src)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x4_src)
-; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: vld $vr0, $a0, 0
+; LA64-LARGE-NEXT: vldx $vr0, $a1, $a0
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst)
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x4_dst)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x4_dst)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x4_dst)
-; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: vst $vr0, $a0, 0
+; LA64-LARGE-NEXT: vstx $vr0, $a1, $a0
; LA64-LARGE-NEXT: ret
entry:
%0 = load <4 x i32>, ptr @g_i32x4_src, align 16
@@ -587,21 +581,17 @@ define dso_local void @copy_i32x8() nounwind {
; LA32-LABEL: copy_i32x8:
; LA32: # %bb.0: # %entry
; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src)
-; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i32x4_src)
-; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvld $xr0, $a0, %pc_lo12(g_i32x4_src)
; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst)
-; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i32x4_dst)
-; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: xvst $xr0, $a0, %pc_lo12(g_i32x4_dst)
; LA32-NEXT: ret
;
; LA64-LABEL: copy_i32x8:
; LA64: # %bb.0: # %entry
; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src)
-; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i32x4_src)
-; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvld $xr0, $a0, %pc_lo12(g_i32x4_src)
; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst)
-; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i32x4_dst)
-; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: xvst $xr0, $a0, %pc_lo12(g_i32x4_dst)
; LA64-NEXT: ret
;
; LA64-LARGE-LABEL: copy_i32x8:
@@ -610,14 +600,12 @@ define dso_local void @copy_i32x8() nounwind {
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x4_src)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x4_src)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x4_src)
-; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: xvld $xr0, $a0, 0
+; LA64-LARGE-NEXT: xvldx $xr0, $a1, $a0
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst)
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x4_dst)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x4_dst)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x4_dst)
-; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: xvst $xr0, $a0, 0
+; LA64-LARGE-NEXT: xvstx $xr0, $a1, $a0
; LA64-LARGE-NEXT: ret
entry:
%0 = load <8 x i32>, ptr @g_i32x4_src, align 32
@@ -631,21 +619,17 @@ define void @copy_i8_to_i8x16() {
; LA32-LABEL: copy_i8_to_i8x16:
; LA32: # %bb.0: # %entry
; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i8)
-; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i8)
-; LA32-NEXT: vldrepl.b $vr0, $a0, 0
+; LA32-NEXT: vldrepl.b $vr0, $a0, %pc_lo12(g_i8)
; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i8x16)
-; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i8x16)
-; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: vst $vr0, $a0, %pc_lo12(g_i8x16)
; LA32-NEXT: ret
;
; LA64-LABEL: copy_i8_to_i8x16:
; LA64: # %bb.0: # %entry
; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i8)
-; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i8)
-; LA64-NEXT: vldrepl.b $vr0, $a0, 0
+; LA64-NEXT: vldrepl.b $vr0, $a0, %pc_lo12(g_i8)
; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i8x16)
-; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i8x16)
-; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: vst $vr0, $a0, %pc_lo12(g_i8x16)
; LA64-NEXT: ret
;
; LA64-LARGE-LABEL: copy_i8_to_i8x16:
@@ -660,8 +644,7 @@ define void @copy_i8_to_i8x16() {
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i8x16)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i8x16)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i8x16)
-; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: vst $vr0, $a0, 0
+; LA64-LARGE-NEXT: vstx $vr0, $a1, $a0
; LA64-LARGE-NEXT: ret
entry:
%0 = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr @g_i8, i32 0)
@@ -675,21 +658,17 @@ define void @copy_i8_to_i8x32() {
; LA32-LABEL: copy_i8_to_i8x32:
; LA32: # %bb.0: # %entry
; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i8)
-; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i8)
-; LA32-NEXT: xvldrepl.b $xr0, $a0, 0
+; LA32-NEXT: xvldrepl.b $xr0, $a0, %pc_lo12(g_i8)
; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i8x32)
-; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i8x32)
-; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: xvst $xr0, $a0, %pc_lo12(g_i8x32)
; LA32-NEXT: ret
;
; LA64-LABEL: copy_i8_to_i8x32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i8)
-; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i8)
-; LA64-NEXT: xvldrepl.b $xr0, $a0, 0
+; LA64-NEXT: xvldrepl.b $xr0, $a0, %pc_lo12(g_i8)
; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i8x32)
-; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i8x32)
-; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: xvst $xr0, $a0, %pc_lo12(g_i8x32)
; LA64-NEXT: ret
;
; LA64-LARGE-LABEL: copy_i8_to_i8x32:
@@ -704,8 +683,7 @@ define void @copy_i8_to_i8x32() {
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i8x32)
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i8x32)
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i8x32)
-; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: xvst $xr0, $a0, 0
+; LA64-LARGE-NEXT: xvstx $xr0, $a1, $a0
; LA64-LARGE-NEXT: ret
entry:
%0 = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr @g_i8, i32 0)
More information about the llvm-commits
mailing list