[llvm] [LoongArch] Override cost hooks to expose more DAG combine opportunities (PR #157824)

Wed Sep 24 02:48:35 PDT 2025

https://github.com/zhaoqi5 updated https://github.com/llvm/llvm-project/pull/157824

>From ce10516869b79180cfcb8b0fcb4b499f22bf026e Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Wed, 24 Sep 2025 17:34:56 +0800
Subject: [PATCH 1/2] [LoongArch][NFC] Pre-commit scalarize fp tests and fix
 for la32

---
 .../LoongArch/LoongArchISelLowering.cpp       |  6 +-
 .../CodeGen/LoongArch/lasx/scalarize-fp.ll    | 84 +++++++++++++++++++
 .../CodeGen/LoongArch/lsx/scalarize-fp.ll     | 73 ++++++++++++++++
 3 files changed, 159 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/LoongArch/lasx/scalarize-fp.ll
 create mode 100644 llvm/test/CodeGen/LoongArch/lsx/scalarize-fp.ll

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 32baa2d111270..d32eb7df866ab 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -1610,11 +1610,9 @@ lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
     return DAG.getUNDEF(VT);
 
   assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
-  if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
-    APInt Imm(64, SplatIndex);
+  if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0))
     return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
-                       DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
-  }
+                       DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
 
   return SDValue();
 }
diff --git a/llvm/test/CodeGen/LoongArch/lasx/scalarize-fp.ll b/llvm/test/CodeGen/LoongArch/lasx/scalarize-fp.ll
new file mode 100644
index 0000000000000..6bd9c9e95f4c3
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/scalarize-fp.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define <8 x float> @fadd_elt0_v8f32(float %a) nounwind {
+; CHECK-LABEL: fadd_elt0_v8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $f0 killed $f0 def $xr0
+; CHECK-NEXT:    lu12i.w $a0, 260096
+; CHECK-NEXT:    xvreplgr2vr.w $xr1, $a0
+; CHECK-NEXT:    xvfadd.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <8 x float> poison, float %a, i32 0
+  %c = fadd <8 x float> %b, <float 1.0, float poison, float poison, float poison, float poison, float poison, float poison, float poison>
+  ret <8 x float> %c
+}
+
+define <4 x double> @fadd_elt0_v4f64(double %a) nounwind {
+; LA32-LABEL: fadd_elt0_v4f64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
+; LA32-NEXT:    vldi $vr1, -912
+; LA32-NEXT:    xvfadd.d $xr0, $xr0, $xr1
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: fadd_elt0_v4f64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
+; LA64-NEXT:    lu52i.d $a0, $zero, 1023
+; LA64-NEXT:    xvreplgr2vr.d $xr1, $a0
+; LA64-NEXT:    xvfadd.d $xr0, $xr0, $xr1
+; LA64-NEXT:    ret
+entry:
+  %b = insertelement <4 x double> poison, double %a, i32 0
+  %c = fadd <4 x double> %b, <double 1.0, double poison, double poison, double poison>
+  ret <4 x double> %c
+}
+
+define <8 x float> @fsub_splat_v8f32(float %a, float %b) nounwind {
+; CHECK-LABEL: fsub_splat_v8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vr1
+; CHECK-NEXT:    # kill: def $f0 killed $f0 def $vr0
+; CHECK-NEXT:    vfsub.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 68
+; CHECK-NEXT:    xvrepl128vei.w $xr0, $xr0, 0
+; CHECK-NEXT:    ret
+entry:
+  %insa = insertelement <8 x float> poison, float %a, i32 0
+  %insb = insertelement <8 x float> poison, float %b, i32 0
+  %va = shufflevector <8 x float> %insa, <8 x float> poison, <8 x i32> zeroinitializer
+  %vb = shufflevector <8 x float> %insb, <8 x float> poison, <8 x i32> zeroinitializer
+  %c = fsub <8 x float> %va, %vb
+  ret <8 x float> %c
+}
+
+define <4 x double> @fsub_splat_v4f64(double %a) nounwind {
+; LA32-LABEL: fsub_splat_v4f64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
+; LA32-NEXT:    vldi $vr1, -784
+; LA32-NEXT:    xvfadd.d $xr0, $xr0, $xr1
+; LA32-NEXT:    xvpermi.d $xr0, $xr0, 68
+; LA32-NEXT:    xvrepl128vei.d $xr0, $xr0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: fsub_splat_v4f64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
+; LA64-NEXT:    lu52i.d $a0, $zero, -1025
+; LA64-NEXT:    xvreplgr2vr.d $xr1, $a0
+; LA64-NEXT:    xvfadd.d $xr0, $xr0, $xr1
+; LA64-NEXT:    xvpermi.d $xr0, $xr0, 68
+; LA64-NEXT:    xvrepl128vei.d $xr0, $xr0, 0
+; LA64-NEXT:    ret
+entry:
+  %insa = insertelement <4 x double> poison, double %a, i32 0
+  %insb = insertelement <4 x double> poison, double 1.0, i32 0
+  %va = shufflevector <4 x double> %insa, <4 x double> poison, <4 x i32> zeroinitializer
+  %vb = shufflevector <4 x double> %insb, <4 x double> poison, <4 x i32> zeroinitializer
+  %c = fsub <4 x double> %va, %vb
+  ret <4 x double> %c
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/scalarize-fp.ll b/llvm/test/CodeGen/LoongArch/lsx/scalarize-fp.ll
new file mode 100644
index 0000000000000..cf5cbc3b993c6
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/scalarize-fp.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define <4 x float> @fadd_elt0_v4f32(float %a) nounwind {
+; CHECK-LABEL: fadd_elt0_v4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $f0 killed $f0 def $vr0
+; CHECK-NEXT:    lu12i.w $a0, 260096
+; CHECK-NEXT:    vreplgr2vr.w $vr1, $a0
+; CHECK-NEXT:    vfadd.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    ret
+entry:
+  %b = insertelement <4 x float> poison, float %a, i32 0
+  %c = fadd <4 x float> %b, <float 1.0, float poison, float poison, float poison>
+  ret <4 x float> %c
+}
+
+define <2 x double> @fadd_elt0_v2f64(double %a) nounwind {
+; LA32-LABEL: fadd_elt0_v2f64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0
+; LA32-NEXT:    vldi $vr1, -912
+; LA32-NEXT:    vfadd.d $vr0, $vr0, $vr1
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: fadd_elt0_v2f64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0
+; LA64-NEXT:    lu52i.d $a0, $zero, 1023
+; LA64-NEXT:    vreplgr2vr.d $vr1, $a0
+; LA64-NEXT:    vfadd.d $vr0, $vr0, $vr1
+; LA64-NEXT:    ret
+entry:
+  %b = insertelement <2 x double> poison, double %a, i32 0
+  %c = fadd <2 x double> %b, <double 1.0, double poison>
+  ret <2 x double> %c
+}
+
+define <4 x float> @fsub_splat_v4f32(float %b) nounwind {
+; CHECK-LABEL: fsub_splat_v4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $f0 killed $f0 def $vr0
+; CHECK-NEXT:    lu12i.w $a0, 260096
+; CHECK-NEXT:    vreplgr2vr.w $vr1, $a0
+; CHECK-NEXT:    vfsub.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT:    ret
+entry:
+  %insa = insertelement <4 x float> poison, float 1.0, i32 0
+  %insb = insertelement <4 x float> poison, float %b, i32 0
+  %va = shufflevector <4 x float> %insa, <4 x float> poison, <4 x i32> zeroinitializer
+  %vb = shufflevector <4 x float> %insb, <4 x float> poison, <4 x i32> zeroinitializer
+  %c = fsub <4 x float> %va, %vb
+  ret <4 x float> %c
+}
+
+define <2 x double> @fsub_splat_v2f64(double %a, double %b) nounwind {
+; CHECK-LABEL: fsub_splat_v2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $f1_64 killed $f1_64 def $vr1
+; CHECK-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0
+; CHECK-NEXT:    vfsub.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT:    ret
+entry:
+  %insa = insertelement <2 x double> poison, double %a, i32 0
+  %insb = insertelement <2 x double> poison, double %b, i32 0
+  %va = shufflevector <2 x double> %insa, <2 x double> poison, <2 x i32> zeroinitializer
+  %vb = shufflevector <2 x double> %insb, <2 x double> poison, <2 x i32> zeroinitializer
+  %c = fsub <2 x double> %va, %vb
+  ret <2 x double> %c
+}

>From d516d07c268f260e245690f89ab0f3e727389cb5 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Wed, 10 Sep 2025 17:52:19 +0800
Subject: [PATCH 2/2] [LoongArch] Override cost hooks to expose more DAG
 combine opportunities

---
 .../LoongArch/LoongArchISelLowering.cpp       | 17 ++++++
 .../Target/LoongArch/LoongArchISelLowering.h  |  4 ++
 .../CodeGen/LoongArch/lasx/scalarize-fp.ll    | 60 ++++++-------------
 .../CodeGen/LoongArch/lsx/scalarize-fp.ll     | 39 ++++--------
 4 files changed, 50 insertions(+), 70 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index d32eb7df866ab..e43d6e3f2a85c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -9303,3 +9303,20 @@ bool LoongArchTargetLowering::SimplifyDemandedBitsForTargetNode(
   return TargetLowering::SimplifyDemandedBitsForTargetNode(
       Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
 }
+
+bool LoongArchTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
+                                                      unsigned Index) const {
+  if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
+    return false;
+
+  // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
+  return Index == 0;
+}
+
+bool LoongArchTargetLowering::isExtractVecEltCheap(EVT VT,
+                                                   unsigned Index) const {
+  EVT EltVT = VT.getScalarType();
+
+  // Extract a scalar FP value from index 0 of a vector is free.
+  return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 19c85faa9f9cc..266bf744930a9 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -337,6 +337,10 @@ class LoongArchTargetLowering : public TargetLowering {
                                          TargetLoweringOpt &TLO,
                                          unsigned Depth) const override;
 
+  bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
+                               unsigned Index) const override;
+  bool isExtractVecEltCheap(EVT VT, unsigned Index) const override;
+
 private:
   /// Target-specific function used to lower LoongArch calling conventions.
   typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,
diff --git a/llvm/test/CodeGen/LoongArch/lasx/scalarize-fp.ll b/llvm/test/CodeGen/LoongArch/lasx/scalarize-fp.ll
index 6bd9c9e95f4c3..39ac647d6875c 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/scalarize-fp.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/scalarize-fp.ll
@@ -1,14 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
-; RUN: llc --mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s
 
 define <8 x float> @fadd_elt0_v8f32(float %a) nounwind {
 ; CHECK-LABEL: fadd_elt0_v8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    # kill: def $f0 killed $f0 def $xr0
-; CHECK-NEXT:    lu12i.w $a0, 260096
-; CHECK-NEXT:    xvreplgr2vr.w $xr1, $a0
-; CHECK-NEXT:    xvfadd.s $xr0, $xr0, $xr1
+; CHECK-NEXT:    vldi $vr1, -1168
+; CHECK-NEXT:    fadd.s $fa0, $fa0, $fa1
 ; CHECK-NEXT:    ret
 entry:
   %b = insertelement <8 x float> poison, float %a, i32 0
@@ -17,20 +15,11 @@ entry:
 }
 
 define <4 x double> @fadd_elt0_v4f64(double %a) nounwind {
-; LA32-LABEL: fadd_elt0_v4f64:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
-; LA32-NEXT:    vldi $vr1, -912
-; LA32-NEXT:    xvfadd.d $xr0, $xr0, $xr1
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: fadd_elt0_v4f64:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
-; LA64-NEXT:    lu52i.d $a0, $zero, 1023
-; LA64-NEXT:    xvreplgr2vr.d $xr1, $a0
-; LA64-NEXT:    xvfadd.d $xr0, $xr0, $xr1
-; LA64-NEXT:    ret
+; CHECK-LABEL: fadd_elt0_v4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vldi $vr1, -912
+; CHECK-NEXT:    fadd.d $fa0, $fa0, $fa1
+; CHECK-NEXT:    ret
 entry:
   %b = insertelement <4 x double> poison, double %a, i32 0
   %c = fadd <4 x double> %b, <double 1.0, double poison, double poison, double poison>
@@ -40,11 +29,8 @@ entry:
 define <8 x float> @fsub_splat_v8f32(float %a, float %b) nounwind {
 ; CHECK-LABEL: fsub_splat_v8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vr1
-; CHECK-NEXT:    # kill: def $f0 killed $f0 def $vr0
-; CHECK-NEXT:    vfsub.s $vr0, $vr0, $vr1
-; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 68
-; CHECK-NEXT:    xvrepl128vei.w $xr0, $xr0, 0
+; CHECK-NEXT:    fsub.s $fa0, $fa0, $fa1
+; CHECK-NEXT:    xvreplve0.w $xr0, $xr0
 ; CHECK-NEXT:    ret
 entry:
   %insa = insertelement <8 x float> poison, float %a, i32 0
@@ -56,24 +42,12 @@ entry:
 }
 
 define <4 x double> @fsub_splat_v4f64(double %a) nounwind {
-; LA32-LABEL: fsub_splat_v4f64:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
-; LA32-NEXT:    vldi $vr1, -784
-; LA32-NEXT:    xvfadd.d $xr0, $xr0, $xr1
-; LA32-NEXT:    xvpermi.d $xr0, $xr0, 68
-; LA32-NEXT:    xvrepl128vei.d $xr0, $xr0, 0
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: fsub_splat_v4f64:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $xr0
-; LA64-NEXT:    lu52i.d $a0, $zero, -1025
-; LA64-NEXT:    xvreplgr2vr.d $xr1, $a0
-; LA64-NEXT:    xvfadd.d $xr0, $xr0, $xr1
-; LA64-NEXT:    xvpermi.d $xr0, $xr0, 68
-; LA64-NEXT:    xvrepl128vei.d $xr0, $xr0, 0
-; LA64-NEXT:    ret
+; CHECK-LABEL: fsub_splat_v4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vldi $vr1, -784
+; CHECK-NEXT:    fadd.d $fa0, $fa0, $fa1
+; CHECK-NEXT:    xvreplve0.d $xr0, $xr0
+; CHECK-NEXT:    ret
 entry:
   %insa = insertelement <4 x double> poison, double %a, i32 0
   %insb = insertelement <4 x double> poison, double 1.0, i32 0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/scalarize-fp.ll b/llvm/test/CodeGen/LoongArch/lsx/scalarize-fp.ll
index cf5cbc3b993c6..b651f11596c82 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/scalarize-fp.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/scalarize-fp.ll
@@ -1,14 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
-; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s
 
 define <4 x float> @fadd_elt0_v4f32(float %a) nounwind {
 ; CHECK-LABEL: fadd_elt0_v4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    # kill: def $f0 killed $f0 def $vr0
-; CHECK-NEXT:    lu12i.w $a0, 260096
-; CHECK-NEXT:    vreplgr2vr.w $vr1, $a0
-; CHECK-NEXT:    vfadd.s $vr0, $vr0, $vr1
+; CHECK-NEXT:    vldi $vr1, -1168
+; CHECK-NEXT:    fadd.s $fa0, $fa0, $fa1
 ; CHECK-NEXT:    ret
 entry:
   %b = insertelement <4 x float> poison, float %a, i32 0
@@ -17,20 +15,11 @@ entry:
 }
 
 define <2 x double> @fadd_elt0_v2f64(double %a) nounwind {
-; LA32-LABEL: fadd_elt0_v2f64:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0
-; LA32-NEXT:    vldi $vr1, -912
-; LA32-NEXT:    vfadd.d $vr0, $vr0, $vr1
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: fadd_elt0_v2f64:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0
-; LA64-NEXT:    lu52i.d $a0, $zero, 1023
-; LA64-NEXT:    vreplgr2vr.d $vr1, $a0
-; LA64-NEXT:    vfadd.d $vr0, $vr0, $vr1
-; LA64-NEXT:    ret
+; CHECK-LABEL: fadd_elt0_v2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vldi $vr1, -912
+; CHECK-NEXT:    fadd.d $fa0, $fa0, $fa1
+; CHECK-NEXT:    ret
 entry:
   %b = insertelement <2 x double> poison, double %a, i32 0
   %c = fadd <2 x double> %b, <double 1.0, double poison>
@@ -40,10 +29,8 @@ entry:
 define <4 x float> @fsub_splat_v4f32(float %b) nounwind {
 ; CHECK-LABEL: fsub_splat_v4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    # kill: def $f0 killed $f0 def $vr0
-; CHECK-NEXT:    lu12i.w $a0, 260096
-; CHECK-NEXT:    vreplgr2vr.w $vr1, $a0
-; CHECK-NEXT:    vfsub.s $vr0, $vr1, $vr0
+; CHECK-NEXT:    vldi $vr1, -1168
+; CHECK-NEXT:    fsub.s $fa0, $fa1, $fa0
 ; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -58,9 +45,7 @@ entry:
 define <2 x double> @fsub_splat_v2f64(double %a, double %b) nounwind {
 ; CHECK-LABEL: fsub_splat_v2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    # kill: def $f1_64 killed $f1_64 def $vr1
-; CHECK-NEXT:    # kill: def $f0_64 killed $f0_64 def $vr0
-; CHECK-NEXT:    vfsub.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    fsub.d $fa0, $fa0, $fa1
 ; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0
 ; CHECK-NEXT:    ret
 entry: