[llvm] Passes for widening vector operations and shuffle opt (PR #169559)

via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 28 15:15:10 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-hexagon

Author: Fateme Hosseini (fhossein-quic)

<details>
<summary>Changes</summary>

Introduce Hexagon-specific passes to generate widening vector instructions for integer and floating-point operations using generic LLVM intrinsics. This enables widening operations for short vectors and improves type legalization by allowing operands to be widened to appropriate types. The patch also includes a shuffle optimization pass to relocate and validate shufflevector instructions during widening legalization.

Co-authored-by: Jyotsna Verma <jverma@<!-- -->qti.qualcomm.com>
Co-authored-by: Yashas Andaluri <yandalur@<!-- -->qti.qualcomm.com>
Co-authored-by: Fateme Hosseini <fhossein@<!-- -->qti.qualcomm.com>
Co-authored-by: Muntasir Mallick <mallick@<!-- -->qti.qualcomm.com>
Co-authored-by: Tatiana Larina <larina@<!-- -->qti.qualcomm.com>
Co-authored-by: Kaushik Kulkarni <kauskulk@<!-- -->qti.qualcomm.com>

Change-Id: I1f6c146bd70ffd1ea42b614fa22fad04d16d6c35

---

Patch is 255.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169559.diff


37 Files Affected:

- (modified) llvm/include/llvm/IR/IntrinsicsHexagon.td (+79-1) 
- (modified) llvm/include/llvm/IR/IntrinsicsHexagonDep.td (-14) 
- (modified) llvm/lib/Target/Hexagon/CMakeLists.txt (+3) 
- (modified) llvm/lib/Target/Hexagon/Hexagon.h (+4) 
- (modified) llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp (+1-2) 
- (added) llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp (+565) 
- (added) llvm/lib/Target/Hexagon/HexagonGenWideningVecInstr.cpp (+1184) 
- (modified) llvm/lib/Target/Hexagon/HexagonISelLowering.h (+1) 
- (modified) llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp (+110) 
- (modified) llvm/lib/Target/Hexagon/HexagonIntrinsics.td (+114) 
- (modified) llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp (+1-1) 
- (added) llvm/lib/Target/Hexagon/HexagonOptShuffleVector.cpp (+713) 
- (modified) llvm/lib/Target/Hexagon/HexagonPatternsHVX.td (+12) 
- (modified) llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp (+17) 
- (modified) llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp (+21-22) 
- (modified) llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp (+5) 
- (modified) llvm/test/CodeGen/Hexagon/autohvx/isel-vpackew.ll (+11-15) 
- (modified) llvm/test/CodeGen/Hexagon/autohvx/widen-setcc.ll (+1-3) 
- (added) llvm/test/CodeGen/Hexagon/bug54537-vavg.ll (+20) 
- (added) llvm/test/CodeGen/Hexagon/extend-multiply-for-output-fpext.ll (+16) 
- (added) llvm/test/CodeGen/Hexagon/no_widening_of_bf16_vecmul.ll (+60) 
- (added) llvm/test/CodeGen/Hexagon/shortvec-vasrsat.ll (+68) 
- (added) llvm/test/CodeGen/Hexagon/shortvec-vavg.ll (+20) 
- (added) llvm/test/CodeGen/Hexagon/shortvec-vmpy.ll (+27) 
- (added) llvm/test/CodeGen/Hexagon/vadd-const.ll (+114) 
- (added) llvm/test/CodeGen/Hexagon/vasr-sat.ll (+66) 
- (added) llvm/test/CodeGen/Hexagon/vavg.ll (+33) 
- (added) llvm/test/CodeGen/Hexagon/vec-shuff-invalid-operand.ll (+32) 
- (added) llvm/test/CodeGen/Hexagon/vec-shuff-multi-uses.ll (+290) 
- (added) llvm/test/CodeGen/Hexagon/vec-shuff2.ll (+106) 
- (added) llvm/test/CodeGen/Hexagon/vmpa.ll (+64) 
- (added) llvm/test/CodeGen/Hexagon/vmpy-const.ll (+273) 
- (added) llvm/test/CodeGen/Hexagon/vmpy-qfp-const.ll (+71) 
- (added) llvm/test/CodeGen/Hexagon/vsub-const.ll (+112) 
- (added) llvm/test/CodeGen/Hexagon/widening-float-vec.ll (+15) 
- (added) llvm/test/CodeGen/Hexagon/widening-vec.ll (+96) 
- (added) llvm/test/CodeGen/Hexagon/widening-vec2.ll (+23) 


``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsHexagon.td b/llvm/include/llvm/IR/IntrinsicsHexagon.td
index 20ba51ade35a7..2c945d2399b25 100644
--- a/llvm/include/llvm/IR/IntrinsicsHexagon.td
+++ b/llvm/include/llvm/IR/IntrinsicsHexagon.td
@@ -14,7 +14,7 @@
 //
 // All Hexagon intrinsics start with "llvm.hexagon.".
 let TargetPrefix = "hexagon" in {
-  /// Hexagon_Intrinsic - Base class for the majority of Hexagon intrinsics.
+  /// Hexagon_Intrinsic - Base class for majority of Hexagon intrinsics.
   class Hexagon_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
                               list<LLVMType> param_types,
                               list<IntrinsicProperty> properties>
@@ -435,6 +435,84 @@ def int_hexagon_V6_vmaskedstorenq_128B: Hexagon_custom_vms_Intrinsic_128B;
 def int_hexagon_V6_vmaskedstorentq_128B: Hexagon_custom_vms_Intrinsic_128B;
 def int_hexagon_V6_vmaskedstorentnq_128B: Hexagon_custom_vms_Intrinsic_128B;
 
+// Carryo
+// The script can't autogenerate clang builtins for vaddcarryo/vsubarryo,
+// and they are marked in HexagonIset.py as not having intrinsics at all.
+// The script could generate intrinsics, but instead of doing intrinsics
+// without builtins, just put the intrinsics here.
+
+// tag : V6_vaddcarryo
+class Hexagon_custom_v16i32v64i1_v16i32v16i32_Intrinsic<
+      list<IntrinsicProperty> intr_properties = [IntrNoMem]>
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v16i32_ty,llvm_v64i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+       intr_properties>;
+
+// tag : V6_vaddcarryo
+class Hexagon_custom_v32i32v128i1_v32i32v32i32_Intrinsic_128B<
+      list<IntrinsicProperty> intr_properties = [IntrNoMem]>
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v32i32_ty,llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+       intr_properties>;
+
+// Pseudo intrinsics for widening vector isntructions that
+// get replaced with the real Hexagon instructions during
+// instruction lowering.
+class Hexagon_widenvec_Intrinsic
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_anyvector_ty],
+       [LLVMTruncatedType<0>, LLVMTruncatedType<0>],
+       [IntrNoMem]>;
+
+class Hexagon_non_widenvec_Intrinsic
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_anyvector_ty],
+       [LLVMMatchType<0>, LLVMMatchType<0>],
+       [IntrNoMem]>;
+
+// Widening vector add
+def int_hexagon_vadd_su: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vadd_uu: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vadd_ss: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vadd_us: Hexagon_widenvec_Intrinsic;
+
+
+// Widening vector subtract
+def int_hexagon_vsub_su: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vsub_uu: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vsub_ss: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vsub_us: Hexagon_widenvec_Intrinsic;
+
+// Widening vector multiply
+def int_hexagon_vmpy_su: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vmpy_uu: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vmpy_ss: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vmpy_us: Hexagon_widenvec_Intrinsic;
+
+def int_hexagon_vavgu: Hexagon_non_widenvec_Intrinsic;
+def int_hexagon_vavgs: Hexagon_non_widenvec_Intrinsic;
+
+class Hexagon_vasr_Intrinsic
+  : Hexagon_NonGCC_Intrinsic<
+       [LLVMSubdivide2VectorType<0>],
+       [llvm_anyvector_ty, LLVMMatchType<0>, llvm_i32_ty],
+       [IntrNoMem]>;
+
+def int_hexagon_vasrsat_su: Hexagon_vasr_Intrinsic;
+def int_hexagon_vasrsat_uu: Hexagon_vasr_Intrinsic;
+def int_hexagon_vasrsat_ss: Hexagon_vasr_Intrinsic;
+
+class Hexagon_widen_vec_scalar_Intrinsic
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_anyvector_ty],
+       [LLVMTruncatedType<0>, llvm_i32_ty],
+       [IntrNoMem]>;
+
+// Widening vector scalar multiply
+def int_hexagon_vmpy_ub_b: Hexagon_widen_vec_scalar_Intrinsic;
+def int_hexagon_vmpy_ub_ub: Hexagon_widen_vec_scalar_Intrinsic;
+def int_hexagon_vmpy_uh_uh: Hexagon_widen_vec_scalar_Intrinsic;
+def int_hexagon_vmpy_h_h: Hexagon_widen_vec_scalar_Intrinsic;
 
 // Intrinsic for instrumentation based profiling using a custom handler. The
 // name of the handler is passed as the first operand to the intrinsic. The
diff --git a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td
index dde4132791f06..2a673603e4e03 100644
--- a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td
+++ b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td
@@ -491,20 +491,6 @@ class Hexagon_custom_v32i32v128i1_v32i32v32i32v128i1_Intrinsic_128B<
        [llvm_v32i32_ty,llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v128i1_ty],
        intr_properties>;
 
-// tag : V6_vaddcarryo
-class Hexagon_custom_v16i32v64i1_v16i32v16i32_Intrinsic<
-      list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_NonGCC_Intrinsic<
-       [llvm_v16i32_ty,llvm_v64i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
-       intr_properties>;
-
-// tag : V6_vaddcarryo
-class Hexagon_custom_v32i32v128i1_v32i32v32i32_Intrinsic_128B<
-      list<IntrinsicProperty> intr_properties = [IntrNoMem]>
-  : Hexagon_NonGCC_Intrinsic<
-       [llvm_v32i32_ty,llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
-       intr_properties>;
-
 // tag : V6_vaddcarrysat
 class Hexagon_v16i32_v16i32v16i32v64i1_Intrinsic<string GCCIntSuffix,
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
index 1a5f09642ea66..eddab5a235dab 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -37,6 +37,8 @@ add_llvm_target(HexagonCodeGen
   HexagonGenMemAbsolute.cpp
   HexagonGenMux.cpp
   HexagonGenPredicate.cpp
+  HexagonGenWideningVecFloatInstr.cpp
+  HexagonGenWideningVecInstr.cpp
   HexagonHardwareLoops.cpp
   HexagonHazardRecognizer.cpp
   HexagonInstrInfo.cpp
@@ -53,6 +55,7 @@ add_llvm_target(HexagonCodeGen
   HexagonNewValueJump.cpp
   HexagonOptAddrMode.cpp
   HexagonOptimizeSZextends.cpp
+  HexagonOptShuffleVector.cpp
   HexagonPeephole.cpp
   HexagonQFPOptimizer.cpp
   HexagonRDFOpt.cpp
diff --git a/llvm/lib/Target/Hexagon/Hexagon.h b/llvm/lib/Target/Hexagon/Hexagon.h
index 422ab20891b94..b98369d1b3e30 100644
--- a/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/llvm/lib/Target/Hexagon/Hexagon.h
@@ -92,6 +92,9 @@ FunctionPass *createHexagonGenInsert();
 FunctionPass *createHexagonGenMemAbsolute();
 FunctionPass *createHexagonGenMux();
 FunctionPass *createHexagonGenPredicate();
+FunctionPass *
+createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &);
+FunctionPass *createHexagonGenWideningVecInstr(const HexagonTargetMachine &);
 FunctionPass *createHexagonHardwareLoops();
 FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
                                    CodeGenOptLevel OptLevel);
@@ -102,6 +105,7 @@ FunctionPass *createHexagonMergeActivateWeight();
 FunctionPass *createHexagonNewValueJump();
 FunctionPass *createHexagonOptAddrMode();
 FunctionPass *createHexagonOptimizeSZextends();
+FunctionPass *createHexagonOptShuffleVector(const HexagonTargetMachine &);
 FunctionPass *createHexagonPacketizer(bool Minimal);
 FunctionPass *createHexagonPeephole();
 FunctionPass *createHexagonRDFOpt();
diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
index 5344ed8446efc..412d58743df94 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -51,8 +51,7 @@ struct PrintRegister {
 };
 
 [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
-                                         const PrintRegister &PR);
-raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &PR) {
+                                         const PrintRegister &PR) {
   return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg);
 }
 
diff --git a/llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp b/llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp
new file mode 100644
index 0000000000000..7271f1f839d69
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp
@@ -0,0 +1,565 @@
+//===------------------- HexagonGenWideningVecFloatInstr.cpp --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Replace widening vector float operations with hexagon intrinsics.
+//
+//===----------------------------------------------------------------------===//
+//
+// Brief overview of working of GenWideningVecFloatInstr pass.
+// This version of pass is replica of already existing pass(which will replace
+// widen vector integer operations with it's respective intrinsics). In this
+// pass we will generate hexagon intrinsics for widen vector float instructions.
+//
+// Example1(64 vector-width widening):
+// %wide.load = load <64 x half>, <64 x half>* %0, align 2
+// %wide.load53 = load <64 x half>, <64 x half>* %2, align 2
+// %1 = fpext <64 x half> %wide.load to <64 x float>
+// %3 = fpext <64 x half> %wide.load53 to <64 x float>
+// %4 = fmul <64 x float> %1, %3
+//
+// If we run this pass on the above example, it will first find fmul
+// instruction, and then it will check whether the operands of fmul instruction
+// (%1 and %3) belongs to either of these categories [%1 ->fpext, %3 ->fpext]
+// or [%1 ->fpext, %3 ->constant_vector] or [%1 ->constant_vector, %3 ->fpext].
+// If it sees such pattern, then this pass will replace such pattern with
+// appropriate hexagon intrinsics.
+//
+// After replacement:
+// %wide.load = load <64 x half>, <64 x half>* %0, align 2
+// %wide.load53 = load <64 x half>, <64 x half>* %2, align 2
+// %3 = bitcast <64 x half> %wide.load to <32 x i32>
+// %4 = bitcast <64 x half> %wide.load53 to <32 x i32>
+// %5 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%3, %4)
+// %6 = shufflevector <64 x i32> %5, <64 x i32> poison, <64 x i32> ShuffMask1
+// %7 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %6)
+// %8 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %6)
+// %9 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %7)
+// %10 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %8)
+// %11 = bitcast <32 x i32> %9 to <32 x float>
+// %12 = bitcast <32 x i32> %10 to <32 x float>
+// %13 = shufflevector <32 x float> %12, <32 x float> %11, <64 x i32> ShuffMask2
+//
+//
+//
+// Example2(128 vector-width widening):
+// %0 = bitcast half* %a to <128 x half>*
+// %wide.load = load <128 x half>, <128 x half>* %0, align 2
+// %1 = fpext <128 x half> %wide.load to <128 x float>
+// %2 = bitcast half* %b to <128 x half>*
+// %wide.load2 = load <128 x half>, <128 x half>* %2, align 2
+// %3 = fpext <128 x half> %wide.load2 to <128 x float>
+// %4 = fmul <128 x float> %1, %3
+//
+// After replacement:
+// %0 = bitcast half* %a to <128 x half>*
+// %wide.load = load <128 x half>, <128 x half>* %0, align 2
+// %1 = bitcast half* %b to <128 x half>*
+// %wide.load2 = load <128 x half>, <128 x half>* %1, align 2
+// %2 = bitcast <128 x half> %wide.load to <64 x i32>
+// %3 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %2)
+// %4 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %2)
+// %5 = bitcast <128 x half> %wide.load2 to <64 x i32>
+// %6 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %5)
+// %7 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %5)
+// %8 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%3, %6)
+// %9 = shufflevector <64 x i32> %8, <64 x i32> poison, <64 x i32> Mask1
+// %10 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %9)
+// %11 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %9)
+// %12 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %10)
+// %13 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %11)
+// %14 = bitcast <32 x i32> %12 to <32 x float>
+// %15 = bitcast <32 x i32> %13 to <32 x float>
+// %16 = shufflevector <32 x float> %15, <32 x float> %14, <64 x i32> Mask2
+// %17 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%4, %7)
+// %18 = shufflevector <64 x i32> %17, <64 x i32> poison, <64 x i32> Mask1
+// %19 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %18)
+// %20 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %18)
+// %21 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %19)
+// %22 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %20)
+// %23 = bitcast <32 x i32> %21 to <32 x float>
+// %24 = bitcast <32 x i32> %22 to <32 x float>
+// %25 = shufflevector <32 x float> %24, <32 x float> %23, <64 x i32> Mask2
+// %26 = shufflevector <64 x float> %25, <64 x float> %16, <128 x i32> Mask3
+//
+//
+//===----------------------------------------------------------------------===//
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include <algorithm>
+#include <utility>
+
+using namespace llvm;
+
+namespace llvm {
+void initializeHexagonGenWideningVecFloatInstrPass(PassRegistry &);
+FunctionPass *
+createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &);
+} // end namespace llvm
+
+namespace {
+
+class HexagonGenWideningVecFloatInstr : public FunctionPass {
+public:
+  static char ID;
+
+  HexagonGenWideningVecFloatInstr() : FunctionPass(ID) {
+    initializeHexagonGenWideningVecFloatInstrPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  HexagonGenWideningVecFloatInstr(const HexagonTargetMachine *TM)
+      : FunctionPass(ID), TM(TM) {
+    initializeHexagonGenWideningVecFloatInstrPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+    return "Hexagon generate widening vector float instructions";
+  }
+
+  bool runOnFunction(Function &F) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    FunctionPass::getAnalysisUsage(AU);
+  }
+
+private:
+  Module *M = nullptr;
+  const HexagonTargetMachine *TM = nullptr;
+  const HexagonSubtarget *HST = nullptr;
+  unsigned HwVLen;
+  unsigned NumHalfEltsInFullVec;
+
+  struct OPInfo {
+    Value *OP;
+    Value *ExtInOP;
+    unsigned ExtInSize;
+  };
+
+  bool visitBlock(BasicBlock *B);
+  bool processInstruction(Instruction *Inst);
+  bool replaceWithIntrinsic(Instruction *Inst, OPInfo &OP1Info,
+                            OPInfo &OP2Info);
+
+  bool getOperandInfo(Value *V, OPInfo &OPI);
+  bool isExtendedConstant(Constant *C);
+  unsigned getElementSizeInBits(Value *V);
+  Type *getElementTy(unsigned size, IRBuilder<> &IRB);
+
+  Value *adjustExtensionForOp(OPInfo &OPI, IRBuilder<> &IRB,
+                              unsigned NewEltsize, unsigned NumElts);
+
+  std::pair<Value *, Value *> opSplit(Value *OP, Instruction *Inst);
+
+  Value *createIntrinsic(Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1,
+                         Value *NewOP2, FixedVectorType *ResType,
+                         unsigned NumElts, bool BitCastOp);
+};
+
+} // end anonymous namespace
+
+char HexagonGenWideningVecFloatInstr::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonGenWideningVecFloatInstr, "widening-vec-float",
+                      "Hexagon generate "
+                      "widening vector float instructions",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(HexagonGenWideningVecFloatInstr, "widening-vec-float",
+                    "Hexagon generate "
+                    "widening vector float instructions",
+                    false, false)
+
+bool HexagonGenWideningVecFloatInstr::isExtendedConstant(Constant *C) {
+  if (Value *SplatV = C->getSplatValue()) {
+    if (auto *CFP = dyn_cast<ConstantFP>(SplatV)) {
+      bool Ignored;
+      APFloat APF = CFP->getValueAPF();
+      APFloat::opStatus sts = APF.convert(
+          APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);
+      if (sts == APFloat::opStatus::opOK || sts == APFloat::opStatus::opInexact)
+        return true;
+    }
+    return false;
+  }
+  unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
+  for (unsigned i = 0, e = NumElts; i != e; ++i) {
+    if (auto *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(i))) {
+      bool Ignored;
+      APFloat APF = CFP->getValueAPF();
+      APFloat::opStatus sts = APF.convert(
+          APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);
+      if (sts != APFloat::opStatus::opOK && sts != APFloat::opStatus::opInexact)
+        return false;
+      continue;
+    }
+    return false;
+  }
+  return true;
+}
+
+unsigned HexagonGenWideningVecFloatInstr::getElementSizeInBits(Value *V) {
+  Type *ValTy = V->getType();
+  Type *EltTy = ValTy;
+  if (dyn_cast<Constant>(V)) {
+    unsigned EltSize =
+        cast<VectorType>(EltTy)->getElementType()->getPrimitiveSizeInBits();
+    unsigned ReducedSize = EltSize / 2;
+
+    return ReducedSize;
+  }
+
+  if (ValTy->isVectorTy())
+    EltTy = cast<VectorType>(ValTy)->getElementType();
+  return EltTy->getPrimitiveSizeInBits();
+}
+
+bool HexagonGenWideningVecFloatInstr::getOperandInfo(Value *V, OPInfo &OPI) {
+  using namespace PatternMatch;
+  OPI.OP = V;
+  Value *ExtV = nullptr;
+  Constant *C = nullptr;
+
+  if (match(V, (m_FPExt(m_Value(ExtV)))) ||
+      match(V,
+            m_Shuffle(m_InsertElt(m_Poison(), m_FPExt(m_Value(ExtV)), m_Zero()),
+                      m_Poison(), m_ZeroMask()))) {
+
+    if (auto *ExtVType = dyn_cast<VectorType>(ExtV->getType())) {
+      // Matches the first branch.
+      if (ExtVType->getElementType()->isBFloatTy())
+        // do not confuse bf16 with ieee-fp16.
+        return false;
+    } else {
+      // Matches the second branch (insert element branch)
+      if (ExtV->getType()->isBFloatTy())
+        return false;
+    }
+
+    OPI.ExtInOP = ExtV;
+    OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP);
+    return true;
+  }
+
+  if (match(V, m_Constant(C))) {
+    if (!isExtendedConstant(C))
+      return false;
+    OPI.ExtInOP = C;
+    OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP);
+    return true;
+  }
+
+  return false;
+}
+
+Type *HexagonGenWideningVecFloatInstr::getElementTy(unsigned size,
+                                                    IRBuilder<> &IRB) {
+  switch (size) {
+  case 16:
+    return IRB.getHalfTy();
+  case 32:
+    return IRB.getFloatTy();
+  default:
+    llvm_unreachable("Unhandled Element size");
+  }
+}
+
+Value *HexagonGenWideningVecFloatInstr::adjustExtensionForOp(
+    OPInfo &OPI, IRBuilder<> &IRB, unsigned NewExtSize, unsigned NumElts) {
+  Value *V = OPI.ExtInOP;
+  unsigned EltSize = getElementSizeInBits(OPI.ExtInOP);
+  assert(NewExtSize >= EltSize);
+  Type *EltType = getElementTy(NewExtSize, IRB);
+  auto *NewOpTy = FixedVectorType::get(EltType, NumElts);
+
+  if (auto *C = dyn_cast<Constant>(V))
+    return IRB.CreateFPTrunc(C, NewOpTy);
+
+  if (V->getType()->isVectorTy())
+    if (NewExtSize == EltSize)
+      return V;
+
+  return nullptr;
+}
+
+std::pair<Value *, Value *>
+HexagonGenWideningVecFloatInstr::opSplit(Value *OP, Instruction *Inst) {
+  Type *InstTy = Inst->getType();
+  unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
+  IRBuilder<> IRB(Inst);
+  Intrinsic::ID IntHi = Intrinsic::hexagon_V6_hi_128B;
+  Intrinsic::ID IntLo = Intrinsic::hexagon_V6_lo_128B;
+  Function *ExtFHi = Intrinsic::getOrInsertDeclaration(M, IntHi);
+  Function *ExtFLo = Intrinsic::getOrInsertDeclaration(M, IntLo);
+  if (NumElts == 128) {
+    auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 64);
+    OP = IRB.CreateBitCast(OP, InType);
+  }
+  Value *OP1Hi = IRB.CreateCall(ExtFHi, {OP});
+  Value *OP1Lo = IRB.CreateCall(ExtFLo, {OP});
+  return std::pair<Value *, Value *>(OP1Hi, OP1Lo);
+}
+
+Value *HexagonGenWideningVecFloatInstr::createIntrinsic(
+    Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, Value *NewOP2,
+    FixedVectorType *ResType, unsigned NumElts, bool BitCastO...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/169559


More information about the llvm-commits mailing list