[llvm-branch-commits] [llvm] [LoongArch] Optimize general fp build_vector lowering (PR #149486)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jul 18 02:53:12 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: ZhaoQi (zhaoqi5)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/149486.diff
8 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+3-2)
- (modified) llvm/test/CodeGen/LoongArch/lasx/build-vector.ll (-2)
- (modified) llvm/test/CodeGen/LoongArch/lasx/fpowi.ll (+17-19)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll (+4-7)
- (modified) llvm/test/CodeGen/LoongArch/llvm.exp10.ll (+3-5)
- (modified) llvm/test/CodeGen/LoongArch/llvm.sincos.ll (+12-14)
- (modified) llvm/test/CodeGen/LoongArch/lsx/build-vector.ll (-2)
- (modified) llvm/test/CodeGen/LoongArch/lsx/fpowi.ll (+16-19)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 2378664ca8155..1e2f4dd5c9e5e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2514,8 +2514,9 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
assert(ResTy.isVector());
unsigned NumElts = ResTy.getVectorNumElements();
- SDValue Vector = DAG.getUNDEF(ResTy);
- for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Vector =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Node->getOperand(0));
+ for (unsigned i = 1; i < NumElts; ++i) {
Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
Node->getOperand(i),
DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
index a4f3fe717ae25..61a915a2837cb 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -475,7 +475,6 @@ define void @buildvector_v8f32(ptr %dst, float %a0, float %a1, float %a2, float
; CHECK-NEXT: # kill: def $f2 killed $f2 def $xr2
; CHECK-NEXT: # kill: def $f1 killed $f1 def $xr1
; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
-; CHECK-NEXT: xvinsve0.w $xr0, $xr0, 0
; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1
; CHECK-NEXT: xvinsve0.w $xr0, $xr2, 2
; CHECK-NEXT: xvinsve0.w $xr0, $xr3, 3
@@ -505,7 +504,6 @@ define void @buildvector_v4f64(ptr %dst, double %a0, double %a1, double %a2, dou
; CHECK-NEXT: # kill: def $f2_64 killed $f2_64 def $xr2
; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $xr1
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; CHECK-NEXT: xvinsve0.d $xr0, $xr0, 0
; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1
; CHECK-NEXT: xvinsve0.d $xr0, $xr2, 2
; CHECK-NEXT: xvinsve0.d $xr0, $xr3, 3
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
index 014a41ac9494c..76bb55b314f4a 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
@@ -11,24 +11,23 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill
; CHECK-NEXT: addi.w $fp, $a0, 0
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
; CHECK-NEXT: movgr2fr.w $fa0, $a0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
-; CHECK-NEXT: xvinsve0.w $xr0, $xr0, 0
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
+; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0
; CHECK-NEXT: movgr2fr.w $fa0, $a0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
; CHECK-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
-; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 1
-; CHECK-NEXT: xvst $xr1, $sp, 32 # 32-byte Folded Spill
+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1
+; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2
; CHECK-NEXT: movgr2fr.w $fa0, $a0
@@ -106,44 +105,43 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind {
; CHECK-NEXT: addi.d $sp, $sp, -80
; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
-; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill
+; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
; CHECK-NEXT: addi.w $fp, $a0, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT: movgr2fr.d $fa0, $a0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; CHECK-NEXT: xvinsve0.d $xr0, $xr0, 0
-; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
+; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill
+; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
+; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
; CHECK-NEXT: movgr2fr.d $fa0, $a0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; CHECK-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
-; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 1
-; CHECK-NEXT: xvst $xr1, $sp, 32 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
+; CHECK-NEXT: xvld $xr1, $sp, 0 # 32-byte Folded Reload
+; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1
+; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill
+; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
; CHECK-NEXT: movgr2fr.d $fa0, $a0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; CHECK-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
+; CHECK-NEXT: xvld $xr1, $sp, 0 # 32-byte Folded Reload
; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 2
-; CHECK-NEXT: xvst $xr1, $sp, 32 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr1, $sp, 0 # 32-byte Folded Spill
+; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
; CHECK-NEXT: movgr2fr.d $fa0, $a0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; CHECK-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
+; CHECK-NEXT: xvld $xr1, $sp, 0 # 32-byte Folded Reload
; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 3
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
; CHECK-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
index f154dd3b8eb3c..221aba3166ed7 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
@@ -6,15 +6,12 @@
define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: shufflevector_v4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
-; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 2
-; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 1
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
-; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 2
+; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 3
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2
; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 3
-; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 3
-; CHECK-NEXT: xvori.b $xr0, $xr2, 0
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 3
; CHECK-NEXT: ret
entry:
%c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 6, i32 3, i32 7>
diff --git a/llvm/test/CodeGen/LoongArch/llvm.exp10.ll b/llvm/test/CodeGen/LoongArch/llvm.exp10.ll
index c667a3609e7f1..62ea5cba2fc26 100644
--- a/llvm/test/CodeGen/LoongArch/llvm.exp10.ll
+++ b/llvm/test/CodeGen/LoongArch/llvm.exp10.ll
@@ -196,22 +196,20 @@ define <2 x double> @exp10_v2f64(<2 x double> %x) #0 {
; LA64-NEXT: addi.d $sp, $sp, -48
; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
-; LA64-NEXT: vreplvei.d $vr0, $vr0, 0
+; LA64-NEXT: vreplvei.d $vr0, $vr0, 1
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
; LA64-NEXT: pcaddu18i $ra, %call36(exp10)
; LA64-NEXT: jirl $ra, $ra, 0
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT: vextrins.d $vr0, $vr0, 0
; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
-; LA64-NEXT: vreplvei.d $vr0, $vr0, 1
+; LA64-NEXT: vreplvei.d $vr0, $vr0, 0
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
; LA64-NEXT: pcaddu18i $ra, %call36(exp10)
; LA64-NEXT: jirl $ra, $ra, 0
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.d $vr1, $vr0, 16
-; LA64-NEXT: vori.b $vr0, $vr1, 0
+; LA64-NEXT: vextrins.d $vr0, $vr1, 16
; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
; LA64-NEXT: addi.d $sp, $sp, 48
; LA64-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/llvm.sincos.ll b/llvm/test/CodeGen/LoongArch/llvm.sincos.ll
index 38ff783d85286..6ebf48d29e89d 100644
--- a/llvm/test/CodeGen/LoongArch/llvm.sincos.ll
+++ b/llvm/test/CodeGen/LoongArch/llvm.sincos.ll
@@ -571,39 +571,37 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) #0 {
; LA64-NEXT: addi.d $sp, $sp, -64
; LA64-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
-; LA64-NEXT: vreplvei.d $vr0, $vr0, 0
-; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA64-NEXT: vreplvei.d $vr0, $vr0, 1
+; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
; LA64-NEXT: pcaddu18i $ra, %call36(sin)
; LA64-NEXT: jirl $ra, $ra, 0
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT: vextrins.d $vr0, $vr0, 0
-; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
+; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
-; LA64-NEXT: vreplvei.d $vr0, $vr0, 1
+; LA64-NEXT: vreplvei.d $vr0, $vr0, 0
; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
; LA64-NEXT: pcaddu18i $ra, %call36(sin)
; LA64-NEXT: jirl $ra, $ra, 0
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.d $vr1, $vr0, 16
-; LA64-NEXT: vst $vr1, $sp, 32 # 16-byte Folded Spill
-; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: vextrins.d $vr0, $vr1, 16
+; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA64-NEXT: vld $vr0, $sp, 32 # 16-byte Folded Reload
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
; LA64-NEXT: pcaddu18i $ra, %call36(cos)
; LA64-NEXT: jirl $ra, $ra, 0
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT: vextrins.d $vr0, $vr0, 0
-; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
; LA64-NEXT: pcaddu18i $ra, %call36(cos)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; LA64-NEXT: vextrins.d $vr1, $vr0, 16
+; LA64-NEXT: fmov.d $fa1, $fa0
; LA64-NEXT: vld $vr0, $sp, 32 # 16-byte Folded Reload
+; LA64-NEXT: vextrins.d $vr1, $vr0, 16
+; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
; LA64-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
; LA64-NEXT: addi.d $sp, $sp, 64
; LA64-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
index f723343964f5d..afc87d1575da5 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
@@ -338,7 +338,6 @@ define void @buildvector_v4f32(ptr %dst, float %a0, float %a1, float %a2, float
; CHECK-NEXT: # kill: def $f2 killed $f2 def $vr2
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1
; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
-; CHECK-NEXT: vextrins.w $vr0, $vr0, 0
; CHECK-NEXT: vextrins.w $vr0, $vr1, 16
; CHECK-NEXT: vextrins.w $vr0, $vr2, 32
; CHECK-NEXT: vextrins.w $vr0, $vr3, 48
@@ -358,7 +357,6 @@ define void @buildvector_v2f64(ptr %dst, double %a0, double %a1) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $vr1
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; CHECK-NEXT: vextrins.d $vr0, $vr0, 0
; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll b/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll
index 79663b63daf08..735dad453660e 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll
@@ -9,44 +9,43 @@ define <4 x float> @powi_v4f32(<4 x float> %va, i32 %b) nounwind {
; CHECK-NEXT: addi.d $sp, $sp, -48
; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
-; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
; CHECK-NEXT: addi.w $fp, $a0, 0
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
-; CHECK-NEXT: vextrins.w $vr0, $vr0, 0
-; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1
+; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
-; CHECK-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; CHECK-NEXT: vextrins.w $vr1, $vr0, 16
-; CHECK-NEXT: vst $vr1, $sp, 16 # 16-byte Folded Spill
-; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; CHECK-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
+; CHECK-NEXT: vextrins.w $vr0, $vr1, 16
+; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 2
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
-; CHECK-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
+; CHECK-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
; CHECK-NEXT: vextrins.w $vr1, $vr0, 32
-; CHECK-NEXT: vst $vr1, $sp, 16 # 16-byte Folded Spill
-; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; CHECK-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill
+; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
-; CHECK-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
+; CHECK-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
; CHECK-NEXT: vextrins.w $vr1, $vr0, 48
; CHECK-NEXT: vori.b $vr0, $vr1, 0
; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -68,24 +67,22 @@ define <2 x double> @powi_v2f64(<2 x double> %va, i32 %b) nounwind {
; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
; CHECK-NEXT: addi.w $fp, $a0, 0
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; CHECK-NEXT: vextrins.d $vr0, $vr0, 0
; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
; CHECK-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; CHECK-NEXT: vextrins.d $vr1, $vr0, 16
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 48
``````````
</details>
https://github.com/llvm/llvm-project/pull/149486
More information about the llvm-branch-commits
mailing list