[llvm] 2ce586b - [Hexagon] Handle floating point splats

Wed Dec 29 06:53:33 PST 2021

Author: Krzysztof Parzyszek
Date: 2021-12-29T06:52:24-08:00
New Revision: 2ce586bc497f7c9cc15a7f7fcba3c7169830119a

URL: https://github.com/llvm/llvm-project/commit/2ce586bc497f7c9cc15a7f7fcba3c7169830119a
DIFF: https://github.com/llvm/llvm-project/commit/2ce586bc497f7c9cc15a7f7fcba3c7169830119a.diff

LOG: [Hexagon] Handle floating point splats

Co-authored-by: Anirudh Sundar Subramaniam <quic_sanirudh at quicinc.com>

Added: 
    llvm/test/CodeGen/Hexagon/autohvx/fsplat.ll
    llvm/test/CodeGen/Hexagon/autohvx/hfnosplat_cp.ll
    llvm/test/CodeGen/Hexagon/autohvx/hfsplat.ll

Modified: 
    llvm/lib/Target/Hexagon/HexagonISelLowering.h
    llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
    llvm/lib/Target/Hexagon/HexagonPatterns.td
    llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
    llvm/test/CodeGen/Hexagon/autohvx/splat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index fd6aa06ed843..a31a697b7317 100644

--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -458,6 +458,7 @@ class HexagonTargetLowering : public TargetLowering {
                           SelectionDAG &DAG) const;
 
   SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const;

diff  --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index e189b0b49e34..28f7c5414a2a 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -96,6 +96,9 @@ HexagonTargetLowering::initializeHVXLowering() {
     // BUILD_VECTOR with f16 operands cannot be promoted without
     // promoting the result, so lower the node to vsplat or constant pool
     setOperationAction(ISD::BUILD_VECTOR,      MVT::f16,    Custom);
+    setOperationAction(ISD::SPLAT_VECTOR,      MVT::f16,    Custom);
+    setOperationAction(ISD::SPLAT_VECTOR,      MVT::v64f16, Legal);
+    setOperationAction(ISD::SPLAT_VECTOR,      MVT::v32f32, Legal);
 
     // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
     // independent) handling of it would convert it to a load, which is
@@ -1299,6 +1302,24 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
   return buildHvxVectorReg(Ops, dl, VecTy, DAG);
 }
 
+SDValue
+HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
+      const {
+  const SDLoc &dl(Op);
+  MVT VecTy = ty(Op);
+  MVT ArgTy = ty(Op.getOperand(0));
+
+  if (ArgTy == MVT::f16) {
+    MVT SplatTy =  MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
+    SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
+    SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
+    SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
+    return DAG.getBitcast(VecTy, Splat);
+  }
+
+  return SDValue();
+}
+
 SDValue
 HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
       const {
@@ -2185,6 +2206,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
     default:
       break;
     case ISD::BUILD_VECTOR:            return LowerHvxBuildVector(Op, DAG);
+    case ISD::SPLAT_VECTOR:            return LowerHvxSplatVector(Op, DAG);
     case ISD::CONCAT_VECTORS:          return LowerHvxConcatVectors(Op, DAG);
     case ISD::INSERT_SUBVECTOR:        return LowerHvxInsertSubvector(Op, DAG);
     case ISD::INSERT_VECTOR_ELT:       return LowerHvxInsertElement(Op, DAG);

diff  --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index 4ba6d4740e12..9b21eb98e2c8 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -257,6 +257,9 @@ def anyimm3: PatLeaf<(i32 AnyImm3:$Addr)>;
 
 def f32ImmPred : PatLeaf<(f32 fpimm:$F)>;
 def f64ImmPred : PatLeaf<(f64 fpimm:$F)>;
+def f32zero: PatLeaf<(f32 fpimm:$F), [{
+  return N->isExactlyValue(APFloat::getZero(APFloat::IEEEsingle(), false));
+}]>;
 
 // This complex pattern is really only to detect various forms of
 // sign-extension i32->i64. The selected value will be of type i64

diff  --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index 21e703fd5a3c..ad8029687770 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -64,7 +64,7 @@ def HexagonVPACKL:     SDNode<"HexagonISD::VPACKL",     SDTVecUnaryOp>;
 def HexagonVUNPACK:    SDNode<"HexagonISD::VUNPACK",    SDTVecUnaryOp>;
 def HexagonVUNPACKU:   SDNode<"HexagonISD::VUNPACKU",   SDTVecUnaryOp>;
 
-def vzero:  PatFrag<(ops), (splat_vector (i32 0))>;
+def vzero:  PatFrags<(ops), [(splat_vector (i32 0)), (splat_vector (f32zero))]>;
 def qtrue:  PatFrag<(ops), (HexagonQTRUE)>;
 def qfalse: PatFrag<(ops), (HexagonQFALSE)>;
 def qcat:   PatFrag<(ops node:$Qs, node:$Qt),
@@ -265,10 +265,13 @@ let Predicates = [UseHVX] in {
     // These should be preferred over a vsplat of 0.
     def: Pat<(VecI8   vzero), (V6_vd0)>;
     def: Pat<(VecI16  vzero), (V6_vd0)>;
+    def: Pat<(VecF16  vzero), (V6_vd0)>;
     def: Pat<(VecI32  vzero), (V6_vd0)>;
+    def: Pat<(VecF32  vzero), (V6_vd0)>;
     def: Pat<(VecPI8  vzero), (PS_vdd0)>;
     def: Pat<(VecPI16 vzero), (PS_vdd0)>;
     def: Pat<(VecPI32 vzero), (PS_vdd0)>;
+    def: Pat<(VecPF32 vzero), (PS_vdd0)>;
 
     def: Pat<(concat_vectors  (VecI8 vzero),  (VecI8 vzero)), (PS_vdd0)>;
     def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>;
@@ -363,6 +366,18 @@ let Predicates = [UseHVX,UseHVXV62] in {
     def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V62splatrw $Rs))>;
   }
 }
+let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
+  let AddedComplexity = 30 in {
+    def: Pat<(VecF16  (splat_vector u16_0ImmPred:$V)), (V62splatih imm:$V)>;
+    def: Pat<(VecF32  (splat_vector anyint:$V)),       (V62splatiw imm:$V)>;
+    def: Pat<(VecF32  (splat_vector f32ImmPred:$V)),   (V62splatiw (ftoi $V))>;
+  }
+  let AddedComplexity = 20 in {
+    def: Pat<(VecF16  (splat_vector I32:$Rs)), (V62splatrh $Rs)>;
+    def: Pat<(VecF32  (splat_vector I32:$Rs)), (V62splatrw $Rs)>;
+    def: Pat<(VecF32  (splat_vector F32:$Rs)), (V62splatrw $Rs)>;
+  }
+}
 
 class Vneg1<ValueType VecTy>
   : PatFrag<(ops), (VecTy (splat_vector (i32 -1)))>;

diff  --git a/llvm/test/CodeGen/Hexagon/autohvx/fsplat.ll b/llvm/test/CodeGen/Hexagon/autohvx/fsplat.ll
new file mode 100644
index 000000000000..f64674bd0e84
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/autohvx/fsplat.ll
@@ -0,0 +1,57 @@
+; RUN: llc -mtriple=hexagon < %s | FileCheck %s
+
+; Check that the vsplat instruction is generated
+; CHECK: r[[V:[0-9]+]] = ##1092616192
+; CHECK: vsplat(r[[V]])
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+; Function Attrs: nofree norecurse nounwind writeonly
+define dso_local i32 @foo(float* nocapture %0, i32 %1) local_unnamed_addr #0 {
+  %3 = icmp sgt i32 %1, 0
+  br i1 %3, label %4, label %22
+
+4:                                                ; preds = %2
+  %5 = icmp ult i32 %1, 64
+  br i1 %5, label %6, label %9
+
+6:                                                ; preds = %20, %4
+  %7 = phi float* [ %0, %4 ], [ %11, %20 ]
+  %8 = phi i32 [ 0, %4 ], [ %10, %20 ]
+  br label %23
+
+9:                                                ; preds = %4
+  %10 = and i32 %1, -64
+  %11 = getelementptr float, float* %0, i32 %10
+  br label %12
+
+12:                                               ; preds = %12, %9
+  %13 = phi i32 [ 0, %9 ], [ %18, %12 ]
+  %14 = getelementptr float, float* %0, i32 %13
+  %15 = bitcast float* %14 to <32 x float>*
+  store <32 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>, <32 x float>* %15, align 4
+  %16 = getelementptr float, float* %14, i32 32
+  %17 = bitcast float* %16 to <32 x float>*
+  store <32 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>, <32 x float>* %17, align 4
+  %18 = add i32 %13, 64
+  %19 = icmp eq i32 %18, %10
+  br i1 %19, label %20, label %12
+
+20:                                               ; preds = %12
+  %21 = icmp eq i32 %10, %1
+  br i1 %21, label %22, label %6
+
+22:                                               ; preds = %23, %20, %2
+  ret i32 0
+
+23:                                               ; preds = %23, %6
+  %24 = phi float* [ %28, %23 ], [ %7, %6 ]
+  %25 = phi i32 [ %26, %23 ], [ %8, %6 ]
+  store float 1.000000e+01, float* %24, align 4
+  %26 = add nuw nsw i32 %25, 1
+  %27 = icmp eq i32 %26, %1
+  %28 = getelementptr float, float* %24, i32 1
+  br i1 %27, label %22, label %23
+}
+
+attributes #0 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv69" "target-features"="+hvx-length128b,+hvxv69,+v69,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" }

diff  --git a/llvm/test/CodeGen/Hexagon/autohvx/hfnosplat_cp.ll b/llvm/test/CodeGen/Hexagon/autohvx/hfnosplat_cp.ll
new file mode 100644
index 000000000000..d5d3dcbe0737
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/autohvx/hfnosplat_cp.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=hexagon < %s | FileCheck %s
+
+; Check that the vsplat instruction is generated
+; CHECK: .word 1097875824
+; CHECK: .word 1048133241
+; CHECK: .word 0
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+; Function Attrs: nofree norecurse nounwind writeonly
+define dso_local i32 @foo(half* nocapture %a) local_unnamed_addr #0 {
+vector.body:
+  %0 = bitcast half* %a to <40 x half>*
+  store <40 x half> <half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH3E79, half 0xH3E79, half 0xH3E79, half 0xH3E79, half 0xH3E79, half 0xH3E79, half 0xH3E79, half 0xH3E79, half 0xH3E79>, <40 x half>* %0, align 2
+  ret i32 0
+}
+
+attributes #0 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv69" "target-features"="+hvx-length128b,+hvxv69,+v69,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" }

diff  --git a/llvm/test/CodeGen/Hexagon/autohvx/hfsplat.ll b/llvm/test/CodeGen/Hexagon/autohvx/hfsplat.ll
new file mode 100644
index 000000000000..3a6847876ea5
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/autohvx/hfsplat.ll
@@ -0,0 +1,57 @@
+; RUN: llc -mtriple=hexagon < %s | FileCheck %s
+
+; Check that the vsplat instruction is generated
+; CHECK: r[[V:[0-9]+]] = #16752
+; CHECK: vsplat(r[[V]])
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+; Function Attrs: nofree norecurse nounwind writeonly
+define dso_local i32 @foo(half* nocapture %0, i32 %1) local_unnamed_addr #0 {
+  %3 = icmp sgt i32 %1, 0
+  br i1 %3, label %4, label %22
+
+4:                                                ; preds = %2
+  %5 = icmp ult i32 %1, 128
+  br i1 %5, label %6, label %9
+
+6:                                                ; preds = %20, %4
+  %7 = phi half* [ %0, %4 ], [ %11, %20 ]
+  %8 = phi i32 [ 0, %4 ], [ %10, %20 ]
+  br label %23
+
+9:                                                ; preds = %4
+  %10 = and i32 %1, -128
+  %11 = getelementptr half, half* %0, i32 %10
+  br label %12
+
+12:                                               ; preds = %12, %9
+  %13 = phi i32 [ 0, %9 ], [ %18, %12 ]
+  %14 = getelementptr half, half* %0, i32 %13
+  %15 = bitcast half* %14 to <64 x half>*
+  store <64 x half> <half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170>, <64 x half>* %15, align 2
+  %16 = getelementptr half, half* %14, i32 64
+  %17 = bitcast half* %16 to <64 x half>*
+  store <64 x half> <half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170>, <64 x half>* %17, align 2
+  %18 = add i32 %13, 128
+  %19 = icmp eq i32 %18, %10
+  br i1 %19, label %20, label %12
+
+20:                                               ; preds = %12
+  %21 = icmp eq i32 %10, %1
+  br i1 %21, label %22, label %6
+
+22:                                               ; preds = %23, %20, %2
+  ret i32 0
+
+23:                                               ; preds = %23, %6
+  %24 = phi half* [ %28, %23 ], [ %7, %6 ]
+  %25 = phi i32 [ %26, %23 ], [ %8, %6 ]
+  store half 0xH4170, half* %24, align 2
+  %26 = add nuw nsw i32 %25, 1
+  %27 = icmp eq i32 %26, %1
+  %28 = getelementptr half, half* %24, i32 1
+  br i1 %27, label %22, label %23
+}
+
+attributes #0 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv69" "target-features"="+hvx-length128b,+hvxv69,+v69,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" }

diff  --git a/llvm/test/CodeGen/Hexagon/autohvx/splat.ll b/llvm/test/CodeGen/Hexagon/autohvx/splat.ll
index c686561f1713..bbea3a21270c 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/splat.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/splat.ll
@@ -397,5 +397,36 @@ define <64 x i32> @f23(i32 %a0) #1 {
   ret <64 x i32> %v1
 }
 
+; Splat register, 16 bit fp, v68+
+define <64 x half> @f24(i16 %a0) #2 {
+; CHECK-LABEL: f24:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0.h = vsplat(r1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:     vmem(r0+#0) = v0.new
+; CHECK-NEXT:    }
+  %v0 = bitcast i16 %a0 to half
+  %v1 = insertelement <64 x half> undef, half %v0, i32 0
+  %v2 = shufflevector <64 x half> %v1, <64 x half> undef, <64 x i32> zeroinitializer
+  ret <64 x half> %v2
+}
+
+; Splat register, 32 bit fp, v68+
+define <32 x float> @f25(float %a0) #2 {
+; CHECK-LABEL: f25:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vsplat(r1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:     vmem(r0+#0) = v0.new
+; CHECK-NEXT:    }
+  %v0 = insertelement <32 x float> undef, float %a0, i32 0
+  %v1 = shufflevector <32 x float> %v0, <32 x float> undef, <32 x i32> zeroinitializer
+  ret <32 x float> %v1
+}
+
+
 attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" }
 attributes #1 = { nounwind readnone "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length128b" }
+attributes #2 = { nounwind readnone "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" }