[llvm] 2342363 - [Hexagon] Handle HVX/FP shuffles, insertion and extraction

Thu Dec 30 08:50:02 PST 2021

Author: Krzysztof Parzyszek
Date: 2021-12-30T08:44:10-08:00
New Revision: 23423638cc6901a292d52f6aee8e5042fffb89b2

URL: https://github.com/llvm/llvm-project/commit/23423638cc6901a292d52f6aee8e5042fffb89b2
DIFF: https://github.com/llvm/llvm-project/commit/23423638cc6901a292d52f6aee8e5042fffb89b2.diff

LOG: [Hexagon] Handle HVX/FP shuffles, insertion and extraction

Co-authored-by: Anirudh Sundar Subramaniam <quic_sanirudh at quicinc.com>

Added: 
    llvm/test/CodeGen/Hexagon/autohvx/hfinsert.ll

Modified: 
    llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 569ad8b337db4..6c7e18a3a207b 100755

--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -91,15 +91,26 @@ HexagonTargetLowering::initializeHVXLowering() {
 
   if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
       Subtarget.useHVXFloatingPoint()) {
+    setOperationAction(ISD::INSERT_SUBVECTOR,  MVT::v64f16, Custom);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64f16, Custom);
+    setOperationAction(ISD::INSERT_SUBVECTOR,  MVT::v32f32, Custom);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom);
+
     // Handle ISD::BUILD_VECTOR for v32f32 in a custom way to generate vsplat
     setOperationAction(ISD::BUILD_VECTOR, MVT::v32f32, Custom);
 
     // BUILD_VECTOR with f16 operands cannot be promoted without
     // promoting the result, so lower the node to vsplat or constant pool
     setOperationAction(ISD::BUILD_VECTOR,      MVT::f16,    Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16,    Custom);
     setOperationAction(ISD::SPLAT_VECTOR,      MVT::f16,    Custom);
     setOperationAction(ISD::SPLAT_VECTOR,      MVT::v64f16, Legal);
     setOperationAction(ISD::SPLAT_VECTOR,      MVT::v32f32, Legal);
+    // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
+    // generated.
+    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
+    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
+    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
 
     // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
     // independent) handling of it would convert it to a load, which is
@@ -1483,6 +1494,7 @@ SDValue
 HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
       const {
   const SDLoc &dl(Op);
+  MVT VecTy = ty(Op);
   SDValue VecV = Op.getOperand(0);
   SDValue ValV = Op.getOperand(1);
   SDValue IdxV = Op.getOperand(2);
@@ -1490,6 +1502,14 @@ HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
   if (ElemTy == MVT::i1)
     return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
 
+  if (ElemTy == MVT::f16) {
+    SDValue T0 = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+        tyVector(VecTy, MVT::i16),
+        DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
+        DAG.getBitcast(MVT::i16, ValV), IdxV);
+    return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
+  }
+
   return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
 }
 

diff  --git a/llvm/test/CodeGen/Hexagon/autohvx/hfinsert.ll b/llvm/test/CodeGen/Hexagon/autohvx/hfinsert.ll
new file mode 100644
index 0000000000000..ffca572e4be84
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/autohvx/hfinsert.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Check that we generate a proper vinsert instruction for f16 types.
+; CHECK: vinsert
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+define half* @fred(half* %v0) local_unnamed_addr #0 {
+b0:
+  %t1 = bitcast half* %v0 to <64 x half>*
+  %v1 = load <64 x half>, <64 x half>* %t1, align 2
+  %v2 = insertelement <64 x half> %v1, half 0xH4170, i32 17
+  store <64 x half> %v2, <64 x half>* %t1, align 2
+  %t2 = bitcast <64 x half>* %t1 to half*
+  ret half* %t2
+}
+
+attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv69" "target-features"="+hvx-length128b,+hvxv69,+v69,+hvx-qfloat,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" }