[llvm] e8b255d - Hexagon QFP Optimizer (#163843)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 20 13:59:07 PDT 2025
Author: Fateme Hosseini
Date: 2025-10-20T15:59:03-05:00
New Revision: e8b255df1bb41411c3908b205779ba28264d5c2e
URL: https://github.com/llvm/llvm-project/commit/e8b255df1bb41411c3908b205779ba28264d5c2e
DIFF: https://github.com/llvm/llvm-project/commit/e8b255df1bb41411c3908b205779ba28264d5c2e.diff
LOG: Hexagon QFP Optimizer (#163843)
Co-authored-by: Rahul Utkoor <quic_rutkoor at quicinc.com>
Co-authored-by: Brendon Cahoon <bcahoon at quicinc.com>
Co-authored-by: abhikran <abhikran at codeaurora.org>
Co-authored-by: Sumanth Gundapaneni <sgundapa at quicinc.com>
Co-authored-by: Ikhlas Ajbar <iajbar at quicinc.com>
Co-authored-by: Anirudh Sundar <quic_sanirudh at quicinc.com>
Co-authored-by: Yashas Andaluri <quic_yandalur at quicinc.com>
Co-authored-by: quic-santdas <quic_santdas at quicinc.com>
Added:
llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
llvm/test/CodeGen/Hexagon/qfp-conv.ll
llvm/test/CodeGen/Hexagon/qfp-enabled.ll
llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir
llvm/test/CodeGen/Hexagon/qfp-subreg-bug.mir
llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll
llvm/test/CodeGen/Hexagon/vect/qfp-mix.mir
llvm/test/CodeGen/Hexagon/vect/qfp-zeroinit.mir
llvm/test/CodeGen/Hexagon/vect/unique-vreg-def.ll
Modified:
llvm/lib/Target/Hexagon/CMakeLists.txt
llvm/lib/Target/Hexagon/Hexagon.h
llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
index d758260a8ab5d..1a5f09642ea66 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -54,6 +54,7 @@ add_llvm_target(HexagonCodeGen
HexagonOptAddrMode.cpp
HexagonOptimizeSZextends.cpp
HexagonPeephole.cpp
+ HexagonQFPOptimizer.cpp
HexagonRDFOpt.cpp
HexagonRegisterInfo.cpp
HexagonSelectionDAGInfo.cpp
diff --git a/llvm/lib/Target/Hexagon/Hexagon.h b/llvm/lib/Target/Hexagon/Hexagon.h
index 109aba53b6e3e..422ab20891b94 100644
--- a/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/llvm/lib/Target/Hexagon/Hexagon.h
@@ -67,6 +67,8 @@ void initializeHexagonPeepholePass(PassRegistry &);
void initializeHexagonSplitConst32AndConst64Pass(PassRegistry &);
void initializeHexagonVectorPrintPass(PassRegistry &);
+void initializeHexagonQFPOptimizerPass(PassRegistry &);
+
Pass *createHexagonLoopIdiomPass();
Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
@@ -112,6 +114,7 @@ FunctionPass *createHexagonVectorCombineLegacyPass();
FunctionPass *createHexagonVectorPrint();
FunctionPass *createHexagonVExtract();
FunctionPass *createHexagonExpandCondsets();
+FunctionPass *createHexagonQFPOptimizer();
} // end namespace llvm;
diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
new file mode 100644
index 0000000000000..479ac90b7d526
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
@@ -0,0 +1,334 @@
+//===----- HexagonQFPOptimizer.cpp - Qualcomm-FP to IEEE-FP conversions
+// optimizer ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Basic infrastructure for optimizing intermediate conversion instructions
+// generated while performing vector floating point operations.
+// Currently run at the starting of the code generation for Hexagon, cleans
+// up redundant conversion instructions and replaces the uses of conversion
+// with appropriate machine operand. Liveness is preserved after this pass.
+//
+// @note: The redundant conversion instructions are not eliminated in this pass.
+// In this pass, we are only trying to replace the uses of conversion
+// instructions with its appropriate QFP instruction. We are leaving the job to
+// Dead instruction Elimination pass to remove redundant conversion
+// instructions.
+//
+// Brief overview of working of this QFP optimizer.
+// This version of Hexagon QFP optimizer basically iterates over each
+// instruction, checks whether if it belongs to hexagon floating point HVX
+// arithmetic instruction category(Add, Sub, Mul). And then it finds the unique
+// definition for the machine operands corresponding to the instruction.
+//
+// Example:
+// MachineInstruction *MI be the HVX vadd instruction
+// MI -> $v0 = V6_vadd_sf $v1, $v2
+// MachineOperand *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
+// MachineOperand *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
+//
+// In the above example, DefMI1 and DefMI2 gives the unique definitions
+// corresponding to the operands($v1 and &v2 respectively) of instruction MI.
+//
+// If both of the definitions are not conversion instructions(V6_vconv_sf_qf32,
+// V6_vconv_hf_qf16), then it will skip optimizing the current instruction and
+// iterates over next instruction.
+//
+// If one the definitions is conversion instruction then our pass will replace
+// the arithmetic instruction with its corresponding mix variant.
+// In the above example, if $v1 is conversion instruction
+// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
+// After Transformation:
+// MI -> $v0 = V6_vadd_qf32_mix $v3, $v2 ($v1 is replaced with $v3)
+//
+// If both the definitions are conversion instructions then the instruction will
+// be replaced with its qf variant
+// In the above example, if $v1 and $v2 are conversion instructions
+// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
+// DefMI2 -> $v2 = V6_vconv_sf_qf32 $v4
+// After Transformation:
+// MI -> $v0 = V6_vadd_qf32 $v3, $v4 ($v1 is replaced with $v3, $v2 is replaced
+// with $v4)
+//
+// Currently, in this pass, we are not handling the case when the definitions
+// are PHI inst.
+//
+//===----------------------------------------------------------------------===//
+#include <unordered_set>
+#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
+
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+#include <vector>
+
+#define DEBUG_TYPE "hexagon-qfp-optimizer"
+
+using namespace llvm;
+
+cl::opt<bool>
+ DisableQFOptimizer("disable-qfp-opt", cl::init(false),
+ cl::desc("Disable optimization of Qfloat operations."));
+
+namespace {
+const std::map<unsigned short, unsigned short> QFPInstMap{
+ {Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16_mix},
+ {Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf16},
+ {Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32_mix},
+ {Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_qf32},
+ {Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16_mix},
+ {Hexagon::V6_vsub_qf16_mix, Hexagon::V6_vsub_qf16},
+ {Hexagon::V6_vsub_sf, Hexagon::V6_vsub_qf32_mix},
+ {Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_qf32},
+ {Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf},
+ {Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
+ {Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
+ {Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
+ {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
+} // namespace
+
+namespace llvm {
+
+FunctionPass *createHexagonQFPOptimizer();
+void initializeHexagonQFPOptimizerPass(PassRegistry &);
+
+} // namespace llvm
+
+namespace {
+
+struct HexagonQFPOptimizer : public MachineFunctionPass {
+public:
+ static char ID;
+
+ HexagonQFPOptimizer() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
+
+ StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ const HexagonSubtarget *HST = nullptr;
+ const HexagonInstrInfo *HII = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+};
+
+char HexagonQFPOptimizer::ID = 0;
+} // namespace
+
+INITIALIZE_PASS(HexagonQFPOptimizer, "hexagon-qfp-optimizer",
+ HEXAGON_QFP_OPTIMIZER, false, false)
+
+FunctionPass *llvm::createHexagonQFPOptimizer() {
+ return new HexagonQFPOptimizer();
+}
+
+bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+
+ // Early exit:
+ // - if instruction is invalid or has too few operands (QFP ops need 2 sources
+ // + 1 dest),
+ // - or does not have a transformation mapping.
+ if (MI->getNumOperands() < 3)
+ return false;
+ auto It = QFPInstMap.find(MI->getOpcode());
+ if (It == QFPInstMap.end())
+ return false;
+ unsigned short InstTy = It->second;
+
+ unsigned Op0F = 0;
+ unsigned Op1F = 0;
+ // Get the reaching defs of MI, DefMI1 and DefMI2
+ MachineInstr *DefMI1 = nullptr;
+ MachineInstr *DefMI2 = nullptr;
+
+ if (MI->getOperand(1).isReg())
+ DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (MI->getOperand(2).isReg())
+ DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
+ if (!DefMI1 || !DefMI2)
+ return false;
+
+ MachineOperand &Res = MI->getOperand(0);
+ MachineInstr *Inst1 = nullptr;
+ MachineInstr *Inst2 = nullptr;
+ LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
+ DefMI2->dump());
+
+ // Get the reaching defs of DefMI
+ if (DefMI1->getNumOperands() > 1 && DefMI1->getOperand(1).isReg() &&
+ DefMI1->getOperand(1).getReg().isVirtual())
+ Inst1 = MRI->getVRegDef(DefMI1->getOperand(1).getReg());
+
+ if (DefMI2->getNumOperands() > 1 && DefMI2->getOperand(1).isReg() &&
+ DefMI2->getOperand(1).getReg().isVirtual())
+ Inst2 = MRI->getVRegDef(DefMI2->getOperand(1).getReg());
+
+ unsigned Def1OP = DefMI1->getOpcode();
+ unsigned Def2OP = DefMI2->getOpcode();
+
+ MachineInstrBuilder MIB;
+ // Case 1: Both reaching defs of MI are qf to sf/hf conversions
+ if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
+ Def2OP == Hexagon::V6_vconv_sf_qf32) ||
+ (Def1OP == Hexagon::V6_vconv_hf_qf16 &&
+ Def2OP == Hexagon::V6_vconv_hf_qf16)) {
+
+ // If the reaching defs of DefMI are W register type, we return
+ if ((Inst1 && Inst1->getNumOperands() > 0 && Inst1->getOperand(0).isReg() &&
+ MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass) ||
+ (Inst2 && Inst2->getNumOperands() > 0 && Inst2->getOperand(0).isReg() &&
+ MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass))
+ return false;
+
+ // Analyze the use operands of the conversion to get their KILL status
+ MachineOperand &Src1 = DefMI1->getOperand(1);
+ MachineOperand &Src2 = DefMI2->getOperand(1);
+
+ Op0F = getKillRegState(Src1.isKill());
+ Src1.setIsKill(false);
+
+ Op1F = getKillRegState(Src2.isKill());
+ Src2.setIsKill(false);
+
+ if (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf) {
+ auto OuterIt = QFPInstMap.find(MI->getOpcode());
+ if (OuterIt == QFPInstMap.end())
+ return false;
+ auto InnerIt = QFPInstMap.find(OuterIt->second);
+ if (InnerIt == QFPInstMap.end())
+ return false;
+ InstTy = InnerIt->second;
+ }
+
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg())
+ .addReg(Src2.getReg(), Op1F, Src2.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+
+ // Case 2: Left operand is conversion to sf/hf
+ } else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
+ Def2OP != Hexagon::V6_vconv_sf_qf32) ||
+ (Def1OP == Hexagon::V6_vconv_hf_qf16 &&
+ Def2OP != Hexagon::V6_vconv_hf_qf16)) &&
+ !DefMI2->isPHI() &&
+ (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
+
+ if (Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass)
+ return false;
+
+ MachineOperand &Src1 = DefMI1->getOperand(1);
+ MachineOperand &Src2 = MI->getOperand(2);
+
+ Op0F = getKillRegState(Src1.isKill());
+ Src1.setIsKill(false);
+ Op1F = getKillRegState(Src2.isKill());
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg())
+ .addReg(Src2.getReg(), Op1F, Src2.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+
+ // Case 2: Left operand is conversion to sf/hf
+ } else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
+ Def2OP == Hexagon::V6_vconv_sf_qf32) ||
+ (Def1OP != Hexagon::V6_vconv_hf_qf16 &&
+ Def2OP == Hexagon::V6_vconv_hf_qf16)) &&
+ !DefMI1->isPHI() &&
+ (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
+ // The second operand of original instruction is converted.
+ // In "mix" instructions, "qf" operand is always the first operand.
+
+ // Caveat: vsub is not commutative w.r.t operands.
+ if (InstTy == Hexagon::V6_vsub_qf16_mix ||
+ InstTy == Hexagon::V6_vsub_qf32_mix)
+ return false;
+
+ if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass)
+ return false;
+
+ MachineOperand &Src1 = MI->getOperand(1);
+ MachineOperand &Src2 = DefMI2->getOperand(1);
+
+ Op1F = getKillRegState(Src2.isKill());
+ Src2.setIsKill(false);
+ Op0F = getKillRegState(Src1.isKill());
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src2.getReg(), Op1F,
+ Src2.getSubReg()) // Notice the operands are flipped.
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+ }
+
+ return false;
+}
+
+bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) {
+
+ bool Changed = false;
+
+ if (DisableQFOptimizer)
+ return Changed;
+
+ HST = &MF.getSubtarget<HexagonSubtarget>();
+ if (!HST->useHVXV68Ops() || !HST->usePackets() ||
+ skipFunction(MF.getFunction()))
+ return false;
+ HII = HST->getInstrInfo();
+ MRI = &MF.getRegInfo();
+
+ MachineFunction::iterator MBBI = MF.begin();
+ LLVM_DEBUG(dbgs() << "\n=== Running QFPOptimzer Pass for : " << MF.getName()
+ << " Optimize intermediate conversions ===\n");
+ while (MBBI != MF.end()) {
+ MachineBasicBlock *MBB = &*MBBI;
+ MachineBasicBlock::iterator MII = MBBI->instr_begin();
+ while (MII != MBBI->instr_end()) {
+ MachineInstr *MI = &*MII;
+ ++MII; // As MI might be removed.
+
+ if (QFPInstMap.count(MI->getOpcode()) &&
+ MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
+ MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
+ LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
+ if (optimizeQfp(MI, MBB)) {
+ MI->eraseFromParent();
+ LLVM_DEBUG(dbgs() << "\t....Removing....");
+ Changed = true;
+ }
+ }
+ }
+ ++MBBI;
+ }
+ return Changed;
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index f5d8b696733ba..d9824a3154093 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -220,6 +220,7 @@ LLVMInitializeHexagonTarget() {
initializeHexagonPeepholePass(PR);
initializeHexagonSplitConst32AndConst64Pass(PR);
initializeHexagonVectorPrintPass(PR);
+ initializeHexagonQFPOptimizerPass(PR);
}
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
@@ -386,6 +387,7 @@ bool HexagonPassConfig::addInstSelector() {
addPass(createHexagonGenInsert());
if (EnableEarlyIf)
addPass(createHexagonEarlyIfConversion());
+ addPass(createHexagonQFPOptimizer());
}
return false;
diff --git a/llvm/test/CodeGen/Hexagon/qfp-conv.ll b/llvm/test/CodeGen/Hexagon/qfp-conv.ll
new file mode 100644
index 0000000000000..d2d393e1a859d
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/qfp-conv.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=hexagon -mattr=+hvxv68,+hvx,+hvx-length128b < %s | FileCheck %s
+
+; Test that the Qfloat optimization pass doesn't crash due to an invalid
+; instructions.
+
+; CHECK: v{{[0-9]+}}.hf = v{{[0-9]:[0-9]}}.qf32
+
+define void @test(
+ <32 x i32>* %optr,
+ <64 x i32> %in64,
+ <32 x i32> %va,
+ <32 x i32> %vb
+) local_unnamed_addr #0 {
+entry:
+ br label %for.body
+
+for.body:
+ %optr.068 = phi <32 x i32>* [ %optr, %entry ], [ %incdec.ptr6, %for.body ]
+ %0 = tail call <32 x i32> @llvm.hexagon.V6.vconv.hf.qf32.128B(<64 x i32> %in64) #2
+ %1 = tail call <32 x i32> @llvm.hexagon.V6.vdealh.128B(<32 x i32> %0) #2
+ %2 = tail call <128 x i1> @llvm.hexagon.V6.vgth.128B(<32 x i32> %va, <32 x i32> %1) #2
+ %3 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %2, <32 x i32> %va, <32 x i32> %vb) #2
+ %4 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %3, <32 x i32> %vb) #2
+ %5 = tail call <32 x i32> @llvm.hexagon.V6.vpackhub.sat.128B(<32 x i32> %va, <32 x i32> %4) #2
+ store <32 x i32> %5, <32 x i32>* %optr.068, align 1
+ %incdec.ptr6 = getelementptr inbounds <32 x i32>, <32 x i32>* %optr.068, i32 1
+ br label %for.body
+}
+
+declare <32 x i32> @llvm.hexagon.V6.vdealh.128B(<32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vconv.hf.qf32.128B(<64 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vpackhub.sat.128B(<32 x i32>, <32 x i32>) #1
+declare <128 x i1> @llvm.hexagon.V6.vgth.128B(<32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
diff --git a/llvm/test/CodeGen/Hexagon/qfp-enabled.ll b/llvm/test/CodeGen/Hexagon/qfp-enabled.ll
new file mode 100644
index 0000000000000..a5cc5fa43167e
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/qfp-enabled.ll
@@ -0,0 +1,19 @@
+; Tests if the flag to disable qfp optimizer pass works or not.
+
+; RUN: llc -march=hexagon -mcpu=hexagonv69 -mattr=+hvxv69,+hvx-length128b \
+; RUN: < %s -o -| FileCheck %s --check-prefix=ENABLED
+; RUN: llc -march=hexagon -mcpu=hexagonv69 -mattr=+hvxv69,+hvx-length128b \
+; RUN: -disable-qfp-opt < %s -o -| FileCheck %s --check-prefix=DISABLED
+
+define dso_local <32 x i32> @conv1_qf32(<32 x i32> noundef %input1, <32 x i32> noundef %input2) local_unnamed_addr {
+entry:
+; DISABLED: [[V2:v[0-9]+]].qf32 = vadd(v0.sf,v1.sf)
+; DISABLED: [[V3:v[0-9]+]].sf = [[V2]].qf32
+; DISABLED: qf32 = vadd(v0.sf,[[V3]].sf)
+; ENABLED: [[V4:v[0-9]+]].qf32 = vadd(v0.sf,v1.sf)
+; ENABLED: qf32 = vadd([[V4]].qf32,v0.sf)
+ %0 = tail call <32 x i32> @llvm.hexagon.V6.vadd.sf.128B(<32 x i32> %input1, <32 x i32> %input2)
+ %1 = tail call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %0)
+ %2 = tail call <32 x i32> @llvm.hexagon.V6.vadd.sf.128B(<32 x i32> %input1, <32 x i32> %1)
+ ret <32 x i32> %2
+}
diff --git a/llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir b/llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir
new file mode 100644
index 0000000000000..d8dde7d70885b
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir
@@ -0,0 +1,95 @@
+# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \
+# RUN: -run-pass hexagon-qfp-optimizer -run-pass machineverifier %s -o - | FileCheck %s
+
+# Test that the killed RegState from DefMI operands are removed
+# killed RegState should be set for MI operands
+# CHECK-LABEL: name: qfpAdd
+# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG1:([0-9]+)]]
+# CHECK-NEXT: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG2:([0-9]+)]]
+# CHECK-NEXT: V6_vadd_qf32 killed %[[REG1]], killed %[[REG2]]
+# CHECK-NEXT: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG3:([0-9]+)]]
+# CHECK-NEXT: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG4:([0-9]+)]]
+# CHECK-NEXT: V6_vadd_qf32 killed %[[REG3]], killed %[[REG4]]
+
+---
+name: qfpAdd
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:intregs = COPY $r3
+ %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %6:hvxvr = V6_vL32Ub_ai %2:intregs, 0
+ %7:hvxvr = V6_vL32Ub_ai %3:intregs, 0
+ %8:hvxvr = V6_vconv_sf_qf32 killed %4:hvxvr
+ %9:hvxvr = V6_vconv_sf_qf32 killed %5:hvxvr
+ %10:hvxvr = V6_vadd_sf %8:hvxvr, %9:hvxvr
+ %11:hvxvr = V6_vconv_sf_qf32 killed %6:hvxvr
+ %12:hvxvr = V6_vconv_sf_qf32 killed %7:hvxvr
+ %13:hvxvr = V6_vadd_sf killed %11:hvxvr, killed %12:hvxvr
+...
+
+
+# Test that the killed RegState from DefMI operands are removed
+# CHECK-LABEL: name: qfpAddMix
+# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG1:([0-9]+)]]
+# CHECK-NEXT: V6_vadd_qf32_mix killed %[[REG1]], %{{[0-9]+}}
+# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG2:([0-9]+)]]
+# CHECK-NEXT: V6_vadd_qf32_mix killed %[[REG2]], %{{[0-9]+}}
+
+---
+name: qfpAddMix
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %4:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %2:intregs, 0
+ %6:hvxvr = V6_vmpy_qf32_sf %4, %5
+ %7:hvxvr = V6_vconv_sf_qf32 killed %6:hvxvr
+ %8:hvxvr = V6_vadd_sf %3:hvxvr, %7:hvxvr
+ %9:hvxvr = V6_vmpy_qf32_sf %4, %5
+ %10:hvxvr = V6_vconv_sf_qf32 killed %9:hvxvr
+ %11:hvxvr = V6_vadd_sf %3:hvxvr, killed %10:hvxvr
+...
+
+
+# Test that we do generate V6_vsub_qf32_mix for the below test.
+# V6_vsub_qf32_mix only allowes qf32 as first operand. In the test qf32
+# is passed as first operand. So, V6_vsub_qf32_mix must be generated.
+# CHECK-LABEL: name: qfpAddSwapMix
+# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG1:([0-9]+)]]
+# CHECK-NEXT: V6_vadd_qf32_mix killed %[[REG1]], %{{[0-9]+}}
+# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG2:([0-9]+)]]
+# CHECK-NEXT: V6_vadd_qf32_mix killed %[[REG2]], %{{[0-9]+}}
+
+---
+name: qfpAddSwapMix
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %4:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %2:intregs, 0
+ %6:hvxvr = V6_vmpy_qf32_sf %4, %5
+ %7:hvxvr = V6_vconv_sf_qf32 killed %6:hvxvr
+ %8:hvxvr = V6_vadd_sf %7:hvxvr, %3:hvxvr
+ %9:hvxvr = V6_vmpy_qf32_sf %4, %5
+ %10:hvxvr = V6_vconv_sf_qf32 killed %9:hvxvr
+ %11:hvxvr = V6_vadd_sf killed %10:hvxvr, %3:hvxvr
+...
diff --git a/llvm/test/CodeGen/Hexagon/qfp-subreg-bug.mir b/llvm/test/CodeGen/Hexagon/qfp-subreg-bug.mir
new file mode 100644
index 0000000000000..1d78203cf5d5a
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/qfp-subreg-bug.mir
@@ -0,0 +1,33 @@
+# RUN: llc -march=hexagon -mcpu=hexagonv69 -mattr=+hvxv69,+hvx-length128b -run-pass hexagon-qfp-optimizer %s -o - | FileCheck %s
+
+# CHECK: V6_vshuffvdd
+# CHECK: V6_vadd_sf
+# CHECK: V6_vadd_qf32_mix{{.*}}vsub_lo
+# CHECK: V6_vadd_qf32_mix{{.*}}vsub_hi
+
+---
+name: qfp_subreg_fix
+alignment: 16
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ %10:intregs = IMPLICIT_DEF
+ %9:hvxvr = V6_vL32Ub_ai %10, 0 :: (load (s1024) from `ptr undef`, align 4)
+ %11:intregs = A2_tfrsi 15360
+ %12:hvxvr = V6_lvsplath %11
+ %13:hvxwr = V6_vmpy_qf32_hf %9, %12
+ %15:hvxvr = V6_vconv_sf_qf32 %13.vsub_lo
+ %17:hvxvr = V6_vconv_sf_qf32 %13.vsub_hi
+ %18:intregslow8 = A2_tfrsi -4
+ %19:hvxwr = V6_vshuffvdd %17, %15, %18
+ %21:hvxvr = V6_vadd_sf %19.vsub_hi, %19.vsub_hi
+ %22:hvxvr = V6_vconv_sf_qf32 %21
+ %24:hvxvr = V6_vadd_sf %19.vsub_lo, %19.vsub_lo
+ %25:hvxvr = V6_vconv_sf_qf32 %24
+ %26:hvxvr = V6_vadd_sf %25, %19.vsub_lo
+ %27:hvxvr = V6_vconv_sf_qf32 %26
+ %28:hvxvr = V6_vadd_sf %22, %19.vsub_hi
+ %29:hvxvr = V6_vconv_sf_qf32 %28
+
+...
diff --git a/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll b/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll
new file mode 100644
index 0000000000000..c16370c3b907d
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll
@@ -0,0 +1,21 @@
+; Tests if generated vadd instruction takes in qf32
+; type as first parameter instead of a sf type without
+; any conversion instruction of type sf = qf32
+
+; RUN: llc -mtriple=hexagon < %s -o - | FileCheck %s
+
+; CHECK: [[V2:v[0-9]+]] = vxor([[V2]],[[V2]])
+; CHECK: [[V0:v[0-9]+]].qf32 = vmpy([[V0]].sf,[[V2]].sf)
+; CHECK: [[V1:v[0-9]+]].qf32 = vmpy([[V1]].sf,[[V2]].sf)
+; CHECK: [[V4:v[0-9]+]].qf32 = vadd([[V0]].qf32,[[V2]].sf)
+; CHECK: [[V5:v[0-9]+]].qf32 = vadd([[V1]].qf32,[[V2]].sf)
+
+define void @_Z19compute_ripple_geluIDF16_EviPT_PKS0_(ptr %out_ptr, <64 x float> %conv14.ripple.vectorized) #0 {
+entry:
+ %mul16.ripple.vectorized = fmul <64 x float> %conv14.ripple.vectorized, zeroinitializer
+ %conv17.ripple.vectorized = fptrunc <64 x float> %mul16.ripple.vectorized to <64 x half>
+ store <64 x half> %conv17.ripple.vectorized, ptr %out_ptr, align 2
+ ret void
+}
+
+attributes #0 = { "target-features"="+hvx-length128b,+hvxv75,+v75,-long-calls,-small-data" }
diff --git a/llvm/test/CodeGen/Hexagon/vect/qfp-mix.mir b/llvm/test/CodeGen/Hexagon/vect/qfp-mix.mir
new file mode 100644
index 0000000000000..9a9e938f35d85
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vect/qfp-mix.mir
@@ -0,0 +1,79 @@
+# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \
+# RUN: -run-pass hexagon-qfp-optimizer %s -o - | FileCheck %s
+
+
+# Test that the operands are swapped for Add if the second operand
+# is a qf32 to sf conversion. V6_vadd_qf32_mix supports first operand
+# as qf32.
+# CHECK-LABEL: name: qfpAddMix
+# CHECK: %[[REG:([0-9]+)]]:hvxvr = V6_vmpy_qf32_sf
+# CHECK: V6_vadd_qf32_mix %[[REG]]
+
+---
+name: qfpAddMix
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %4:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %2:intregs, 0
+ %6:hvxvr = V6_vmpy_qf32_sf %4, %5
+ %7:hvxvr = V6_vconv_sf_qf32 %6:hvxvr
+ %8:hvxvr = V6_vadd_sf %3:hvxvr, %7:hvxvr
+...
+
+
+# Test that we do not generate V6_vsub_qf32_mix for the below test.
+# V6_vsub_qf32_mix only allowes qf32 as first operand. In the test qf32
+# is passed as second operand. As sub is not commutative, we should not
+# generate the mix instruction.
+# CHECK-LABEL: name: qfpSubNoMix
+# CHECK-NOT: V6_vsub_qf32_mix
+
+---
+name: qfpSubNoMix
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %4:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %2:intregs, 0
+ %6:hvxvr = V6_vmpy_qf32_sf %4, %5
+ %7:hvxvr = V6_vconv_sf_qf32 %6:hvxvr
+ %8:hvxvr = V6_vsub_sf %3:hvxvr, %7:hvxvr
+...
+
+
+# Test that we do generate V6_vsub_qf32_mix for the below test.
+# V6_vsub_qf32_mix only allowes qf32 as first operand. In the test qf32
+# is passed as first operand. So, V6_vsub_qf32_mix must be generated.
+# CHECK-LABEL: name: qfpSubMix
+# CHECK: V6_vsub_qf32_mix
+
+---
+name: qfpSubMix
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %4:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %2:intregs, 0
+ %6:hvxvr = V6_vmpy_qf32_sf %4, %5
+ %7:hvxvr = V6_vconv_sf_qf32 %6:hvxvr
+ %8:hvxvr = V6_vsub_sf %7:hvxvr, %3:hvxvr
+...
diff --git a/llvm/test/CodeGen/Hexagon/vect/qfp-zeroinit.mir b/llvm/test/CodeGen/Hexagon/vect/qfp-zeroinit.mir
new file mode 100644
index 0000000000000..f0b1d3c96bbb3
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vect/qfp-zeroinit.mir
@@ -0,0 +1,23 @@
+# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b -run-pass hexagon-qfp-optimizer %s -o - | FileCheck %s
+
+# CHECK-LABEL: name: qfpAdd32
+# CHECK: V6_vd0
+# CHECK-NEXT: V6_vL32Ub_ai
+# CHECK-NEXT: V6_vadd_sf
+# CHECK-NEXT: V6_vconv_sf_qf32
+# CHECK-NEXT: V6_vS32Ub_ai
+---
+name: qfpAdd32
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %3:hvxvr = V6_vd0
+ %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %5:hvxvr = V6_vadd_sf %3:hvxvr, %4:hvxvr
+ %6:hvxvr = V6_vconv_sf_qf32 %5:hvxvr
+ V6_vS32Ub_ai %1:intregs, 0, %6:hvxvr
+...
diff --git a/llvm/test/CodeGen/Hexagon/vect/unique-vreg-def.ll b/llvm/test/CodeGen/Hexagon/vect/unique-vreg-def.ll
new file mode 100644
index 0000000000000..2d46da7a039bc
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vect/unique-vreg-def.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; REQUIRES: hexagon
+
+; This test was asserting because getVRegDef() was called on a register with
+; multiple defs.
+; Checks that the test does not assert and vsub is generated.
+; CHECK: vsub
+
+target triple = "hexagon"
+
+ at v = common dso_local local_unnamed_addr global <32 x i32> zeroinitializer, align 128
+
+; Function Attrs: nounwind
+define dso_local void @hvx_twoSum(<32 x i32>* nocapture noundef writeonly %s2lo) local_unnamed_addr #0 {
+entry:
+ %0 = load <32 x i32>, <32 x i32>* @v, align 128
+ %call = tail call inreg <32 x i32> @MY_Vsf_equals_Vqf32(<32 x i32> noundef %0) #3
+ %1 = tail call <32 x i32> @llvm.hexagon.V6.vsub.sf.128B(<32 x i32> %call, <32 x i32> %call)
+ store <32 x i32> %1, <32 x i32>* @v, align 128
+ store <32 x i32> %1, <32 x i32>* %s2lo, align 128
+ ret void
+}
+
+declare dso_local inreg <32 x i32> @MY_Vsf_equals_Vqf32(<32 x i32> noundef) local_unnamed_addr #1
+
+; Function Attrs: nofree nosync nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsub.sf.128B(<32 x i32>, <32 x i32>) #2
+
+attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv73" "target-features"="+hvx-length128b,+hvxv73,+v73,-long-calls" }
+attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv73" "target-features"="+hvx-length128b,+hvxv73,+v73,-long-calls" }
+attributes #2 = { nofree nosync nounwind readnone }
+attributes #3 = { nounwind }
More information about the llvm-commits
mailing list