[llvm] Passes for widening vector operations and shuffle opt (PR #169559)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 28 15:15:10 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-hexagon
Author: Fateme Hosseini (fhossein-quic)
<details>
<summary>Changes</summary>
Introduce Hexagon-specific passes to generate widening vector instructions for integer and floating-point operations using generic LLVM intrinsics. This enables widening operations for short vectors and improves type legalization by allowing operands to be widened to appropriate types. The patch also includes a shuffle optimization pass to relocate and validate shufflevector instructions during widening legalization.
Co-authored-by: Jyotsna Verma <jverma@<!-- -->qti.qualcomm.com>
Co-authored-by: Yashas Andaluri <yandalur@<!-- -->qti.qualcomm.com>
Co-authored-by: Fateme Hosseini <fhossein@<!-- -->qti.qualcomm.com>
Co-authored-by: Muntasir Mallick <mallick@<!-- -->qti.qualcomm.com>
Co-authored-by: Tatiana Larina <larina@<!-- -->qti.qualcomm.com>
Co-authored-by: Kaushik Kulkarni <kauskulk@<!-- -->qti.qualcomm.com>
Change-Id: I1f6c146bd70ffd1ea42b614fa22fad04d16d6c35
---
Patch is 255.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169559.diff
37 Files Affected:
- (modified) llvm/include/llvm/IR/IntrinsicsHexagon.td (+79-1)
- (modified) llvm/include/llvm/IR/IntrinsicsHexagonDep.td (-14)
- (modified) llvm/lib/Target/Hexagon/CMakeLists.txt (+3)
- (modified) llvm/lib/Target/Hexagon/Hexagon.h (+4)
- (modified) llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp (+1-2)
- (added) llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp (+565)
- (added) llvm/lib/Target/Hexagon/HexagonGenWideningVecInstr.cpp (+1184)
- (modified) llvm/lib/Target/Hexagon/HexagonISelLowering.h (+1)
- (modified) llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp (+110)
- (modified) llvm/lib/Target/Hexagon/HexagonIntrinsics.td (+114)
- (modified) llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp (+1-1)
- (added) llvm/lib/Target/Hexagon/HexagonOptShuffleVector.cpp (+713)
- (modified) llvm/lib/Target/Hexagon/HexagonPatternsHVX.td (+12)
- (modified) llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp (+17)
- (modified) llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp (+21-22)
- (modified) llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp (+5)
- (modified) llvm/test/CodeGen/Hexagon/autohvx/isel-vpackew.ll (+11-15)
- (modified) llvm/test/CodeGen/Hexagon/autohvx/widen-setcc.ll (+1-3)
- (added) llvm/test/CodeGen/Hexagon/bug54537-vavg.ll (+20)
- (added) llvm/test/CodeGen/Hexagon/extend-multiply-for-output-fpext.ll (+16)
- (added) llvm/test/CodeGen/Hexagon/no_widening_of_bf16_vecmul.ll (+60)
- (added) llvm/test/CodeGen/Hexagon/shortvec-vasrsat.ll (+68)
- (added) llvm/test/CodeGen/Hexagon/shortvec-vavg.ll (+20)
- (added) llvm/test/CodeGen/Hexagon/shortvec-vmpy.ll (+27)
- (added) llvm/test/CodeGen/Hexagon/vadd-const.ll (+114)
- (added) llvm/test/CodeGen/Hexagon/vasr-sat.ll (+66)
- (added) llvm/test/CodeGen/Hexagon/vavg.ll (+33)
- (added) llvm/test/CodeGen/Hexagon/vec-shuff-invalid-operand.ll (+32)
- (added) llvm/test/CodeGen/Hexagon/vec-shuff-multi-uses.ll (+290)
- (added) llvm/test/CodeGen/Hexagon/vec-shuff2.ll (+106)
- (added) llvm/test/CodeGen/Hexagon/vmpa.ll (+64)
- (added) llvm/test/CodeGen/Hexagon/vmpy-const.ll (+273)
- (added) llvm/test/CodeGen/Hexagon/vmpy-qfp-const.ll (+71)
- (added) llvm/test/CodeGen/Hexagon/vsub-const.ll (+112)
- (added) llvm/test/CodeGen/Hexagon/widening-float-vec.ll (+15)
- (added) llvm/test/CodeGen/Hexagon/widening-vec.ll (+96)
- (added) llvm/test/CodeGen/Hexagon/widening-vec2.ll (+23)
``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsHexagon.td b/llvm/include/llvm/IR/IntrinsicsHexagon.td
index 20ba51ade35a7..2c945d2399b25 100644
--- a/llvm/include/llvm/IR/IntrinsicsHexagon.td
+++ b/llvm/include/llvm/IR/IntrinsicsHexagon.td
@@ -14,7 +14,7 @@
//
// All Hexagon intrinsics start with "llvm.hexagon.".
let TargetPrefix = "hexagon" in {
- /// Hexagon_Intrinsic - Base class for the majority of Hexagon intrinsics.
+ /// Hexagon_Intrinsic - Base class for majority of Hexagon intrinsics.
class Hexagon_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
list<LLVMType> param_types,
list<IntrinsicProperty> properties>
@@ -435,6 +435,84 @@ def int_hexagon_V6_vmaskedstorenq_128B: Hexagon_custom_vms_Intrinsic_128B;
def int_hexagon_V6_vmaskedstorentq_128B: Hexagon_custom_vms_Intrinsic_128B;
def int_hexagon_V6_vmaskedstorentnq_128B: Hexagon_custom_vms_Intrinsic_128B;
+// Carryo
+// The script can't autogenerate clang builtins for vaddcarryo/vsubarryo,
+// and they are marked in HexagonIset.py as not having intrinsics at all.
+// The script could generate intrinsics, but instead of doing intrinsics
+// without builtins, just put the intrinsics here.
+
+// tag : V6_vaddcarryo
+class Hexagon_custom_v16i32v64i1_v16i32v16i32_Intrinsic<
+ list<IntrinsicProperty> intr_properties = [IntrNoMem]>
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v16i32_ty,llvm_v64i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+ intr_properties>;
+
+// tag : V6_vaddcarryo
+class Hexagon_custom_v32i32v128i1_v32i32v32i32_Intrinsic_128B<
+ list<IntrinsicProperty> intr_properties = [IntrNoMem]>
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_v32i32_ty,llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+ intr_properties>;
+
+// Pseudo intrinsics for widening vector isntructions that
+// get replaced with the real Hexagon instructions during
+// instruction lowering.
+class Hexagon_widenvec_Intrinsic
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_anyvector_ty],
+ [LLVMTruncatedType<0>, LLVMTruncatedType<0>],
+ [IntrNoMem]>;
+
+class Hexagon_non_widenvec_Intrinsic
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+// Widening vector add
+def int_hexagon_vadd_su: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vadd_uu: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vadd_ss: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vadd_us: Hexagon_widenvec_Intrinsic;
+
+
+// Widening vector subtract
+def int_hexagon_vsub_su: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vsub_uu: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vsub_ss: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vsub_us: Hexagon_widenvec_Intrinsic;
+
+// Widening vector multiply
+def int_hexagon_vmpy_su: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vmpy_uu: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vmpy_ss: Hexagon_widenvec_Intrinsic;
+def int_hexagon_vmpy_us: Hexagon_widenvec_Intrinsic;
+
+def int_hexagon_vavgu: Hexagon_non_widenvec_Intrinsic;
+def int_hexagon_vavgs: Hexagon_non_widenvec_Intrinsic;
+
+class Hexagon_vasr_Intrinsic
+ : Hexagon_NonGCC_Intrinsic<
+ [LLVMSubdivide2VectorType<0>],
+ [llvm_anyvector_ty, LLVMMatchType<0>, llvm_i32_ty],
+ [IntrNoMem]>;
+
+def int_hexagon_vasrsat_su: Hexagon_vasr_Intrinsic;
+def int_hexagon_vasrsat_uu: Hexagon_vasr_Intrinsic;
+def int_hexagon_vasrsat_ss: Hexagon_vasr_Intrinsic;
+
+class Hexagon_widen_vec_scalar_Intrinsic
+ : Hexagon_NonGCC_Intrinsic<
+ [llvm_anyvector_ty],
+ [LLVMTruncatedType<0>, llvm_i32_ty],
+ [IntrNoMem]>;
+
+// Widening vector scalar multiply
+def int_hexagon_vmpy_ub_b: Hexagon_widen_vec_scalar_Intrinsic;
+def int_hexagon_vmpy_ub_ub: Hexagon_widen_vec_scalar_Intrinsic;
+def int_hexagon_vmpy_uh_uh: Hexagon_widen_vec_scalar_Intrinsic;
+def int_hexagon_vmpy_h_h: Hexagon_widen_vec_scalar_Intrinsic;
// Intrinsic for instrumentation based profiling using a custom handler. The
// name of the handler is passed as the first operand to the intrinsic. The
diff --git a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td
index dde4132791f06..2a673603e4e03 100644
--- a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td
+++ b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td
@@ -491,20 +491,6 @@ class Hexagon_custom_v32i32v128i1_v32i32v32i32v128i1_Intrinsic_128B<
[llvm_v32i32_ty,llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v128i1_ty],
intr_properties>;
-// tag : V6_vaddcarryo
-class Hexagon_custom_v16i32v64i1_v16i32v16i32_Intrinsic<
- list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_NonGCC_Intrinsic<
- [llvm_v16i32_ty,llvm_v64i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
- intr_properties>;
-
-// tag : V6_vaddcarryo
-class Hexagon_custom_v32i32v128i1_v32i32v32i32_Intrinsic_128B<
- list<IntrinsicProperty> intr_properties = [IntrNoMem]>
- : Hexagon_NonGCC_Intrinsic<
- [llvm_v32i32_ty,llvm_v128i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
- intr_properties>;
-
// tag : V6_vaddcarrysat
class Hexagon_v16i32_v16i32v16i32v64i1_Intrinsic<string GCCIntSuffix,
list<IntrinsicProperty> intr_properties = [IntrNoMem]>
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
index 1a5f09642ea66..eddab5a235dab 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -37,6 +37,8 @@ add_llvm_target(HexagonCodeGen
HexagonGenMemAbsolute.cpp
HexagonGenMux.cpp
HexagonGenPredicate.cpp
+ HexagonGenWideningVecFloatInstr.cpp
+ HexagonGenWideningVecInstr.cpp
HexagonHardwareLoops.cpp
HexagonHazardRecognizer.cpp
HexagonInstrInfo.cpp
@@ -53,6 +55,7 @@ add_llvm_target(HexagonCodeGen
HexagonNewValueJump.cpp
HexagonOptAddrMode.cpp
HexagonOptimizeSZextends.cpp
+ HexagonOptShuffleVector.cpp
HexagonPeephole.cpp
HexagonQFPOptimizer.cpp
HexagonRDFOpt.cpp
diff --git a/llvm/lib/Target/Hexagon/Hexagon.h b/llvm/lib/Target/Hexagon/Hexagon.h
index 422ab20891b94..b98369d1b3e30 100644
--- a/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/llvm/lib/Target/Hexagon/Hexagon.h
@@ -92,6 +92,9 @@ FunctionPass *createHexagonGenInsert();
FunctionPass *createHexagonGenMemAbsolute();
FunctionPass *createHexagonGenMux();
FunctionPass *createHexagonGenPredicate();
+FunctionPass *
+createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &);
+FunctionPass *createHexagonGenWideningVecInstr(const HexagonTargetMachine &);
FunctionPass *createHexagonHardwareLoops();
FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
CodeGenOptLevel OptLevel);
@@ -102,6 +105,7 @@ FunctionPass *createHexagonMergeActivateWeight();
FunctionPass *createHexagonNewValueJump();
FunctionPass *createHexagonOptAddrMode();
FunctionPass *createHexagonOptimizeSZextends();
+FunctionPass *createHexagonOptShuffleVector(const HexagonTargetMachine &);
FunctionPass *createHexagonPacketizer(bool Minimal);
FunctionPass *createHexagonPeephole();
FunctionPass *createHexagonRDFOpt();
diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
index 5344ed8446efc..412d58743df94 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -51,8 +51,7 @@ struct PrintRegister {
};
[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
- const PrintRegister &PR);
-raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &PR) {
+ const PrintRegister &PR) {
return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg);
}
diff --git a/llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp b/llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp
new file mode 100644
index 0000000000000..7271f1f839d69
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp
@@ -0,0 +1,565 @@
+//===------------------- HexagonGenWideningVecFloatInstr.cpp --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Replace widening vector float operations with hexagon intrinsics.
+//
+//===----------------------------------------------------------------------===//
+//
+// Brief overview of working of GenWideningVecFloatInstr pass.
+// This version of pass is replica of already existing pass(which will replace
+// widen vector integer operations with it's respective intrinsics). In this
+// pass we will generate hexagon intrinsics for widen vector float instructions.
+//
+// Example1(64 vector-width widening):
+// %wide.load = load <64 x half>, <64 x half>* %0, align 2
+// %wide.load53 = load <64 x half>, <64 x half>* %2, align 2
+// %1 = fpext <64 x half> %wide.load to <64 x float>
+// %3 = fpext <64 x half> %wide.load53 to <64 x float>
+// %4 = fmul <64 x float> %1, %3
+//
+// If we run this pass on the above example, it will first find fmul
+// instruction, and then it will check whether the operands of fmul instruction
+// (%1 and %3) belongs to either of these categories [%1 ->fpext, %3 ->fpext]
+// or [%1 ->fpext, %3 ->constant_vector] or [%1 ->constant_vector, %3 ->fpext].
+// If it sees such pattern, then this pass will replace such pattern with
+// appropriate hexagon intrinsics.
+//
+// After replacement:
+// %wide.load = load <64 x half>, <64 x half>* %0, align 2
+// %wide.load53 = load <64 x half>, <64 x half>* %2, align 2
+// %3 = bitcast <64 x half> %wide.load to <32 x i32>
+// %4 = bitcast <64 x half> %wide.load53 to <32 x i32>
+// %5 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%3, %4)
+// %6 = shufflevector <64 x i32> %5, <64 x i32> poison, <64 x i32> ShuffMask1
+// %7 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %6)
+// %8 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %6)
+// %9 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %7)
+// %10 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %8)
+// %11 = bitcast <32 x i32> %9 to <32 x float>
+// %12 = bitcast <32 x i32> %10 to <32 x float>
+// %13 = shufflevector <32 x float> %12, <32 x float> %11, <64 x i32> ShuffMask2
+//
+//
+//
+// Example2(128 vector-width widening):
+// %0 = bitcast half* %a to <128 x half>*
+// %wide.load = load <128 x half>, <128 x half>* %0, align 2
+// %1 = fpext <128 x half> %wide.load to <128 x float>
+// %2 = bitcast half* %b to <128 x half>*
+// %wide.load2 = load <128 x half>, <128 x half>* %2, align 2
+// %3 = fpext <128 x half> %wide.load2 to <128 x float>
+// %4 = fmul <128 x float> %1, %3
+//
+// After replacement:
+// %0 = bitcast half* %a to <128 x half>*
+// %wide.load = load <128 x half>, <128 x half>* %0, align 2
+// %1 = bitcast half* %b to <128 x half>*
+// %wide.load2 = load <128 x half>, <128 x half>* %1, align 2
+// %2 = bitcast <128 x half> %wide.load to <64 x i32>
+// %3 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %2)
+// %4 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %2)
+// %5 = bitcast <128 x half> %wide.load2 to <64 x i32>
+// %6 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %5)
+// %7 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %5)
+// %8 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%3, %6)
+// %9 = shufflevector <64 x i32> %8, <64 x i32> poison, <64 x i32> Mask1
+// %10 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %9)
+// %11 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %9)
+// %12 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %10)
+// %13 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %11)
+// %14 = bitcast <32 x i32> %12 to <32 x float>
+// %15 = bitcast <32 x i32> %13 to <32 x float>
+// %16 = shufflevector <32 x float> %15, <32 x float> %14, <64 x i32> Mask2
+// %17 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%4, %7)
+// %18 = shufflevector <64 x i32> %17, <64 x i32> poison, <64 x i32> Mask1
+// %19 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %18)
+// %20 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %18)
+// %21 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %19)
+// %22 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %20)
+// %23 = bitcast <32 x i32> %21 to <32 x float>
+// %24 = bitcast <32 x i32> %22 to <32 x float>
+// %25 = shufflevector <32 x float> %24, <32 x float> %23, <64 x i32> Mask2
+// %26 = shufflevector <64 x float> %25, <64 x float> %16, <128 x i32> Mask3
+//
+//
+//===----------------------------------------------------------------------===//
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include <algorithm>
+#include <utility>
+
+using namespace llvm;
+
+namespace llvm {
+void initializeHexagonGenWideningVecFloatInstrPass(PassRegistry &);
+FunctionPass *
+createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &);
+} // end namespace llvm
+
+namespace {
+
+class HexagonGenWideningVecFloatInstr : public FunctionPass {
+public:
+ static char ID;
+
+ HexagonGenWideningVecFloatInstr() : FunctionPass(ID) {
+ initializeHexagonGenWideningVecFloatInstrPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ HexagonGenWideningVecFloatInstr(const HexagonTargetMachine *TM)
+ : FunctionPass(ID), TM(TM) {
+ initializeHexagonGenWideningVecFloatInstrPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Hexagon generate widening vector float instructions";
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ Module *M = nullptr;
+ const HexagonTargetMachine *TM = nullptr;
+ const HexagonSubtarget *HST = nullptr;
+ unsigned HwVLen;
+ unsigned NumHalfEltsInFullVec;
+
+ struct OPInfo {
+ Value *OP;
+ Value *ExtInOP;
+ unsigned ExtInSize;
+ };
+
+ bool visitBlock(BasicBlock *B);
+ bool processInstruction(Instruction *Inst);
+ bool replaceWithIntrinsic(Instruction *Inst, OPInfo &OP1Info,
+ OPInfo &OP2Info);
+
+ bool getOperandInfo(Value *V, OPInfo &OPI);
+ bool isExtendedConstant(Constant *C);
+ unsigned getElementSizeInBits(Value *V);
+ Type *getElementTy(unsigned size, IRBuilder<> &IRB);
+
+ Value *adjustExtensionForOp(OPInfo &OPI, IRBuilder<> &IRB,
+ unsigned NewEltsize, unsigned NumElts);
+
+ std::pair<Value *, Value *> opSplit(Value *OP, Instruction *Inst);
+
+ Value *createIntrinsic(Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1,
+ Value *NewOP2, FixedVectorType *ResType,
+ unsigned NumElts, bool BitCastOp);
+};
+
+} // end anonymous namespace
+
+char HexagonGenWideningVecFloatInstr::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonGenWideningVecFloatInstr, "widening-vec-float",
+ "Hexagon generate "
+ "widening vector float instructions",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(HexagonGenWideningVecFloatInstr, "widening-vec-float",
+ "Hexagon generate "
+ "widening vector float instructions",
+ false, false)
+
+bool HexagonGenWideningVecFloatInstr::isExtendedConstant(Constant *C) {
+ if (Value *SplatV = C->getSplatValue()) {
+ if (auto *CFP = dyn_cast<ConstantFP>(SplatV)) {
+ bool Ignored;
+ APFloat APF = CFP->getValueAPF();
+ APFloat::opStatus sts = APF.convert(
+ APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);
+ if (sts == APFloat::opStatus::opOK || sts == APFloat::opStatus::opInexact)
+ return true;
+ }
+ return false;
+ }
+ unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ if (auto *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(i))) {
+ bool Ignored;
+ APFloat APF = CFP->getValueAPF();
+ APFloat::opStatus sts = APF.convert(
+ APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);
+ if (sts != APFloat::opStatus::opOK && sts != APFloat::opStatus::opInexact)
+ return false;
+ continue;
+ }
+ return false;
+ }
+ return true;
+}
+
+unsigned HexagonGenWideningVecFloatInstr::getElementSizeInBits(Value *V) {
+ Type *ValTy = V->getType();
+ Type *EltTy = ValTy;
+ if (dyn_cast<Constant>(V)) {
+ unsigned EltSize =
+ cast<VectorType>(EltTy)->getElementType()->getPrimitiveSizeInBits();
+ unsigned ReducedSize = EltSize / 2;
+
+ return ReducedSize;
+ }
+
+ if (ValTy->isVectorTy())
+ EltTy = cast<VectorType>(ValTy)->getElementType();
+ return EltTy->getPrimitiveSizeInBits();
+}
+
+bool HexagonGenWideningVecFloatInstr::getOperandInfo(Value *V, OPInfo &OPI) {
+ using namespace PatternMatch;
+ OPI.OP = V;
+ Value *ExtV = nullptr;
+ Constant *C = nullptr;
+
+ if (match(V, (m_FPExt(m_Value(ExtV)))) ||
+ match(V,
+ m_Shuffle(m_InsertElt(m_Poison(), m_FPExt(m_Value(ExtV)), m_Zero()),
+ m_Poison(), m_ZeroMask()))) {
+
+ if (auto *ExtVType = dyn_cast<VectorType>(ExtV->getType())) {
+ // Matches the first branch.
+ if (ExtVType->getElementType()->isBFloatTy())
+ // do not confuse bf16 with ieee-fp16.
+ return false;
+ } else {
+ // Matches the second branch (insert element branch)
+ if (ExtV->getType()->isBFloatTy())
+ return false;
+ }
+
+ OPI.ExtInOP = ExtV;
+ OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP);
+ return true;
+ }
+
+ if (match(V, m_Constant(C))) {
+ if (!isExtendedConstant(C))
+ return false;
+ OPI.ExtInOP = C;
+ OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP);
+ return true;
+ }
+
+ return false;
+}
+
+Type *HexagonGenWideningVecFloatInstr::getElementTy(unsigned size,
+ IRBuilder<> &IRB) {
+ switch (size) {
+ case 16:
+ return IRB.getHalfTy();
+ case 32:
+ return IRB.getFloatTy();
+ default:
+ llvm_unreachable("Unhandled Element size");
+ }
+}
+
+Value *HexagonGenWideningVecFloatInstr::adjustExtensionForOp(
+ OPInfo &OPI, IRBuilder<> &IRB, unsigned NewExtSize, unsigned NumElts) {
+ Value *V = OPI.ExtInOP;
+ unsigned EltSize = getElementSizeInBits(OPI.ExtInOP);
+ assert(NewExtSize >= EltSize);
+ Type *EltType = getElementTy(NewExtSize, IRB);
+ auto *NewOpTy = FixedVectorType::get(EltType, NumElts);
+
+ if (auto *C = dyn_cast<Constant>(V))
+ return IRB.CreateFPTrunc(C, NewOpTy);
+
+ if (V->getType()->isVectorTy())
+ if (NewExtSize == EltSize)
+ return V;
+
+ return nullptr;
+}
+
+std::pair<Value *, Value *>
+HexagonGenWideningVecFloatInstr::opSplit(Value *OP, Instruction *Inst) {
+ Type *InstTy = Inst->getType();
+ unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
+ IRBuilder<> IRB(Inst);
+ Intrinsic::ID IntHi = Intrinsic::hexagon_V6_hi_128B;
+ Intrinsic::ID IntLo = Intrinsic::hexagon_V6_lo_128B;
+ Function *ExtFHi = Intrinsic::getOrInsertDeclaration(M, IntHi);
+ Function *ExtFLo = Intrinsic::getOrInsertDeclaration(M, IntLo);
+ if (NumElts == 128) {
+ auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 64);
+ OP = IRB.CreateBitCast(OP, InType);
+ }
+ Value *OP1Hi = IRB.CreateCall(ExtFHi, {OP});
+ Value *OP1Lo = IRB.CreateCall(ExtFLo, {OP});
+ return std::pair<Value *, Value *>(OP1Hi, OP1Lo);
+}
+
+Value *HexagonGenWideningVecFloatInstr::createIntrinsic(
+ Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, Value *NewOP2,
+ FixedVectorType *ResType, unsigned NumElts, bool BitCastO...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/169559
More information about the llvm-commits
mailing list