[llvm] Hexagon QFP Optimizer (PR #163843)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 17 09:02:28 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-hexagon
Author: Fateme Hosseini (fhossein-quic)
<details>
<summary>Changes</summary>
---
Patch is 29.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/163843.diff
12 Files Affected:
- (modified) llvm/lib/Target/Hexagon/CMakeLists.txt (+1)
- (modified) llvm/lib/Target/Hexagon/Hexagon.h (+3)
- (added) llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp (+343)
- (modified) llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp (+2)
- (added) llvm/test/CodeGen/Hexagon/qfp-conv.ll (+35)
- (added) llvm/test/CodeGen/Hexagon/qfp-enabled.ll (+19)
- (added) llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir (+95)
- (added) llvm/test/CodeGen/Hexagon/qfp-subreg-bug.mir (+33)
- (added) llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll (+21)
- (added) llvm/test/CodeGen/Hexagon/vect/qfp-mix.mir (+79)
- (added) llvm/test/CodeGen/Hexagon/vect/qfp-zeroinit.mir (+23)
- (added) llvm/test/CodeGen/Hexagon/vect/unique-vreg-def.ll (+32)
``````````diff
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
index d758260a8ab5d..1a5f09642ea66 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -54,6 +54,7 @@ add_llvm_target(HexagonCodeGen
HexagonOptAddrMode.cpp
HexagonOptimizeSZextends.cpp
HexagonPeephole.cpp
+ HexagonQFPOptimizer.cpp
HexagonRDFOpt.cpp
HexagonRegisterInfo.cpp
HexagonSelectionDAGInfo.cpp
diff --git a/llvm/lib/Target/Hexagon/Hexagon.h b/llvm/lib/Target/Hexagon/Hexagon.h
index 109aba53b6e3e..44c7a8fcafcdc 100644
--- a/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/llvm/lib/Target/Hexagon/Hexagon.h
@@ -67,6 +67,8 @@ void initializeHexagonPeepholePass(PassRegistry &);
void initializeHexagonSplitConst32AndConst64Pass(PassRegistry &);
void initializeHexagonVectorPrintPass(PassRegistry &);
+void initializeHexagonQFPoptimizerPass(PassRegistry &);
+
Pass *createHexagonLoopIdiomPass();
Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
@@ -112,6 +114,7 @@ FunctionPass *createHexagonVectorCombineLegacyPass();
FunctionPass *createHexagonVectorPrint();
FunctionPass *createHexagonVExtract();
FunctionPass *createHexagonExpandCondsets();
+FunctionPass *createHexagonQFPoptimizer();
} // end namespace llvm;
diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
new file mode 100644
index 0000000000000..196ad3dd25824
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
@@ -0,0 +1,343 @@
+//===----- HexagonQFPOptimizer.cpp - Qualcomm-FP to IEEE-FP conversions
+// optimizer ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Basic infrastructure for optimizing intermediate conversion instructions
+// generated while performing vector floating point operations.
+// Currently run at the starting of the code generation for Hexagon, cleans
+// up redundant conversion instructions and replaces the uses of conversion
+// with appropriate machine operand. Liveness is preserved after this pass.
+//
+// @note: The redundant conversion instructions are not eliminated in this pass.
+// In this pass, we are only trying to replace the uses of conversion
+// instructions with its appropriate QFP instruction. We are leaving the job to
+// Dead instruction Elimination pass to remove redundant conversion
+// instructions.
+//
+// Brief overview of working of this QFP optimizer.
+// This version of Hexagon QFP optimizer basically iterates over each
+// instruction, checks whether if it belongs to hexagon floating point HVX
+// arithmetic instruction category(Add, Sub, Mul). And then it finds the unique
+// definition for the machine operands corresponding to the instruction.
+//
+// Example:
+// MachineInstruction *MI be the HVX vadd instruction
+// MI -> $v0 = V6_vadd_sf $v1, $v2
+// MachineOperand *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
+// MachineOperand *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
+//
+// In the above example, DefMI1 and DefMI2 gives the unique definitions
+// corresponding to the operands($v1 and &v2 respectively) of instruction MI.
+//
+// If both of the definitions are not conversion instructions(V6_vconv_sf_qf32,
+// V6_vconv_hf_qf16), then it will skip optimizing the current instruction and
+// iterates over next instruction.
+//
+// If one the definitions is conversion instruction then our pass will replace
+// the arithmetic instruction with its corresponding mix variant.
+// In the above example, if $v1 is conversion instruction
+// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
+// After Transformation:
+// MI -> $v0 = V6_vadd_qf32_mix $v3, $v2 ($v1 is replaced with $v3)
+//
+// If both the definitions are conversion instructions then the instruction will
+// be replaced with its qf variant
+// In the above example, if $v1 and $v2 are conversion instructions
+// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
+// DefMI2 -> $v2 = V6_vconv_sf_qf32 $v4
+// After Transformation:
+// MI -> $v0 = V6_vadd_qf32 $v3, $v4 ($v1 is replaced with $v3, $v2 is replaced
+// with $v4)
+//
+// Currently, in this pass, we are not handling the case when the definitions
+// are PHI inst.
+//
+//===----------------------------------------------------------------------===//
+#include <unordered_set>
+#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
+
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+#include <vector>
+
+#define DEBUG_TYPE "hexagon-qfp-optimizer"
+
+using namespace llvm;
+
+cl::opt<bool>
+ DisableQFOptimizer("disable-qfp-opt", cl::init(false),
+ cl::desc("Disable optimization of Qfloat operations."));
+
+namespace {
+const std::map<unsigned short, unsigned short> QFPInstMap{
+ {Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16_mix},
+ {Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf16},
+ {Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32_mix},
+ {Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_qf32},
+ {Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16_mix},
+ {Hexagon::V6_vsub_qf16_mix, Hexagon::V6_vsub_qf16},
+ {Hexagon::V6_vsub_sf, Hexagon::V6_vsub_qf32_mix},
+ {Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_qf32},
+ {Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf},
+ {Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
+ {Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
+ {Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
+ {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
+} // namespace
+
+namespace llvm {
+
+FunctionPass *createHexagonQFPoptimizer();
+void initializeHexagonQFPoptimizerPass(PassRegistry &);
+
+} // namespace llvm
+
+namespace {
+
+struct HexagonQFPoptimizer : public MachineFunctionPass {
+public:
+ static char ID;
+
+ HexagonQFPoptimizer() : MachineFunctionPass(ID) {
+ for (const auto &entry : QFPInstMap) {
+ QFPInstSet.insert(entry.first);
+ QFPInstSet.insert(entry.second);
+ }
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
+
+ StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ const HexagonSubtarget *HST = nullptr;
+ const HexagonInstrInfo *HII = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+ std::unordered_set<unsigned short> QFPInstSet;
+};
+
+char HexagonQFPoptimizer::ID = 0;
+} // namespace
+
+INITIALIZE_PASS(HexagonQFPoptimizer, "hexagon-qfp-optimizer",
+ HEXAGON_QFP_OPTIMIZER, false, false)
+
+FunctionPass *llvm::createHexagonQFPoptimizer() {
+ return new HexagonQFPoptimizer();
+}
+
+bool HexagonQFPoptimizer::optimizeQfp(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+
+ // Early exit:
+ // - if instruction is invalid or has too few operands (QFP ops need 2 sources
+ // + 1 dest),
+ // - is not part of the QFP instruction set,
+ // - or does not have a transformation mapping.
+ if (MI->getNumOperands() < 3)
+ return false;
+ if (!QFPInstSet.count(MI->getOpcode()))
+ return false;
+ auto It = QFPInstMap.find(MI->getOpcode());
+ if (It == QFPInstMap.end())
+ return false;
+ unsigned short InstTy = It->second;
+
+ unsigned Op0F = 0;
+ unsigned Op1F = 0;
+ // Get the reaching defs of MI, DefMI1 and DefMI2
+ MachineInstr *DefMI1 = nullptr;
+ MachineInstr *DefMI2 = nullptr;
+
+ if (MI->getOperand(1).isReg())
+ DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (MI->getOperand(2).isReg())
+ DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
+ if (!DefMI1 || !DefMI2)
+ return false;
+
+ MachineOperand &Res = MI->getOperand(0);
+ MachineInstr *Inst1 = nullptr;
+ MachineInstr *Inst2 = nullptr;
+ LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
+ DefMI2->dump());
+
+ // Get the reaching defs of DefMI
+ if (DefMI1->getNumOperands() > 1 && DefMI1->getOperand(1).isReg() &&
+ DefMI1->getOperand(1).getReg().isVirtual())
+ Inst1 = MRI->getVRegDef(DefMI1->getOperand(1).getReg());
+
+ if (DefMI2->getNumOperands() > 1 && DefMI2->getOperand(1).isReg() &&
+ DefMI2->getOperand(1).getReg().isVirtual())
+ Inst2 = MRI->getVRegDef(DefMI2->getOperand(1).getReg());
+
+ unsigned Def1OP = DefMI1->getOpcode();
+ unsigned Def2OP = DefMI2->getOpcode();
+
+ MachineInstrBuilder MIB;
+ // Case 1: Both reaching defs of MI are qf to sf/hf conversions
+ if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
+ Def2OP == Hexagon::V6_vconv_sf_qf32) ||
+ (Def1OP == Hexagon::V6_vconv_hf_qf16 &&
+ Def2OP == Hexagon::V6_vconv_hf_qf16)) {
+
+ // If the reaching defs of DefMI are W register type, we return
+ if ((Inst1 && Inst1->getNumOperands() > 0 && Inst1->getOperand(0).isReg() &&
+ MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass) ||
+ (Inst2 && Inst2->getNumOperands() > 0 && Inst2->getOperand(0).isReg() &&
+ MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass))
+ return false;
+
+ // Analyze the use operands of the conversion to get their KILL status
+ MachineOperand &Src1 = DefMI1->getOperand(1);
+ MachineOperand &Src2 = DefMI2->getOperand(1);
+
+ Op0F = getKillRegState(Src1.isKill());
+ Src1.setIsKill(false);
+
+ Op1F = getKillRegState(Src2.isKill());
+ Src2.setIsKill(false);
+
+ if (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf) {
+ auto OuterIt = QFPInstMap.find(MI->getOpcode());
+ if (OuterIt == QFPInstMap.end())
+ return false;
+ auto InnerIt = QFPInstMap.find(OuterIt->second);
+ if (InnerIt == QFPInstMap.end())
+ return false;
+ InstTy = InnerIt->second;
+ }
+
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg())
+ .addReg(Src2.getReg(), Op1F, Src2.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+
+ // Case 2: Left operand is conversion to sf/hf
+ } else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
+ Def2OP != Hexagon::V6_vconv_sf_qf32) ||
+ (Def1OP == Hexagon::V6_vconv_hf_qf16 &&
+ Def2OP != Hexagon::V6_vconv_hf_qf16)) &&
+ !DefMI2->isPHI() &&
+ (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
+
+ if (Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass)
+ return false;
+
+ MachineOperand &Src1 = DefMI1->getOperand(1);
+ MachineOperand &Src2 = MI->getOperand(2);
+
+ Op0F = getKillRegState(Src1.isKill());
+ Src1.setIsKill(false);
+ Op1F = getKillRegState(Src2.isKill());
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg())
+ .addReg(Src2.getReg(), Op1F, Src2.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+
+ // Case 2: Left operand is conversion to sf/hf
+ } else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
+ Def2OP == Hexagon::V6_vconv_sf_qf32) ||
+ (Def1OP != Hexagon::V6_vconv_hf_qf16 &&
+ Def2OP == Hexagon::V6_vconv_hf_qf16)) &&
+ !DefMI1->isPHI() &&
+ (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
+ // The second operand of original instruction is converted.
+ // In "mix" instructions, "qf" operand is always the first operand.
+
+ // Caveat: vsub is not commutative w.r.t operands.
+ if (InstTy == Hexagon::V6_vsub_qf16_mix ||
+ InstTy == Hexagon::V6_vsub_qf32_mix)
+ return false;
+
+ if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass)
+ return false;
+
+ MachineOperand &Src1 = MI->getOperand(1);
+ MachineOperand &Src2 = DefMI2->getOperand(1);
+
+ Op1F = getKillRegState(Src2.isKill());
+ Src2.setIsKill(false);
+ Op0F = getKillRegState(Src1.isKill());
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src2.getReg(), Op1F,
+ Src2.getSubReg()) // Notice the operands are flipped.
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+ }
+
+ return false;
+}
+
+bool HexagonQFPoptimizer::runOnMachineFunction(MachineFunction &MF) {
+
+ bool Changed = false;
+
+ if (DisableQFOptimizer)
+ return Changed;
+
+ HST = &MF.getSubtarget<HexagonSubtarget>();
+ if (!HST->useHVXV68Ops() || !HST->usePackets() ||
+ skipFunction(MF.getFunction()))
+ return false;
+ HII = HST->getInstrInfo();
+ MRI = &MF.getRegInfo();
+
+ MachineFunction::iterator MBBI = MF.begin();
+ LLVM_DEBUG(dbgs() << "\n=== Running QFPOptimzer Pass for : " << MF.getName()
+ << " Optimize intermediate conversions ===\n");
+ while (MBBI != MF.end()) {
+ MachineBasicBlock *MBB = &*MBBI;
+ MachineBasicBlock::iterator MII = MBBI->instr_begin();
+ while (MII != MBBI->instr_end()) {
+ MachineInstr *MI = &*MII;
+ ++MII; // As MI might be removed.
+
+ if (QFPInstSet.count(MI->getOpcode()) &&
+ MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
+ MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
+ LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
+ if (optimizeQfp(MI, MBB)) {
+ MI->eraseFromParent();
+ LLVM_DEBUG(dbgs() << "\t....Removing....");
+ Changed = true;
+ }
+ }
+ }
+ ++MBBI;
+ }
+ return Changed;
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index f5d8b696733ba..048f63a3454f4 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -220,6 +220,7 @@ LLVMInitializeHexagonTarget() {
initializeHexagonPeepholePass(PR);
initializeHexagonSplitConst32AndConst64Pass(PR);
initializeHexagonVectorPrintPass(PR);
+ initializeHexagonQFPoptimizerPass(PR);
}
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
@@ -386,6 +387,7 @@ bool HexagonPassConfig::addInstSelector() {
addPass(createHexagonGenInsert());
if (EnableEarlyIf)
addPass(createHexagonEarlyIfConversion());
+ addPass(createHexagonQFPoptimizer());
}
return false;
diff --git a/llvm/test/CodeGen/Hexagon/qfp-conv.ll b/llvm/test/CodeGen/Hexagon/qfp-conv.ll
new file mode 100644
index 0000000000000..d2d393e1a859d
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/qfp-conv.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=hexagon -mattr=+hvxv68,+hvx,+hvx-length128b < %s | FileCheck %s
+
+; Test that the Qfloat optimization pass doesn't crash due to an invalid
+; instructions.
+
+; CHECK: v{{[0-9]+}}.hf = v{{[0-9]:[0-9]}}.qf32
+
+define void @test(
+ <32 x i32>* %optr,
+ <64 x i32> %in64,
+ <32 x i32> %va,
+ <32 x i32> %vb
+) local_unnamed_addr #0 {
+entry:
+ br label %for.body
+
+for.body:
+ %optr.068 = phi <32 x i32>* [ %optr, %entry ], [ %incdec.ptr6, %for.body ]
+ %0 = tail call <32 x i32> @llvm.hexagon.V6.vconv.hf.qf32.128B(<64 x i32> %in64) #2
+ %1 = tail call <32 x i32> @llvm.hexagon.V6.vdealh.128B(<32 x i32> %0) #2
+ %2 = tail call <128 x i1> @llvm.hexagon.V6.vgth.128B(<32 x i32> %va, <32 x i32> %1) #2
+ %3 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %2, <32 x i32> %va, <32 x i32> %vb) #2
+ %4 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %3, <32 x i32> %vb) #2
+ %5 = tail call <32 x i32> @llvm.hexagon.V6.vpackhub.sat.128B(<32 x i32> %va, <32 x i32> %4) #2
+ store <32 x i32> %5, <32 x i32>* %optr.068, align 1
+ %incdec.ptr6 = getelementptr inbounds <32 x i32>, <32 x i32>* %optr.068, i32 1
+ br label %for.body
+}
+
+declare <32 x i32> @llvm.hexagon.V6.vdealh.128B(<32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vconv.hf.qf32.128B(<64 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vpackhub.sat.128B(<32 x i32>, <32 x i32>) #1
+declare <128 x i1> @llvm.hexagon.V6.vgth.128B(<32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
diff --git a/llvm/test/CodeGen/Hexagon/qfp-enabled.ll b/llvm/test/CodeGen/Hexagon/qfp-enabled.ll
new file mode 100644
index 0000000000000..f17f0a8430786
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/qfp-enabled.ll
@@ -0,0 +1,19 @@
+; Tests if the flag to disabled qfp optimizer pass works or not.
+
+; RUN: llc -march=hexagon -mcpu=hexagonv69 -mattr=+hvxv69,+hvx-length128b \
+; RUN: < %s -o -| FileCheck %s --check-prefix=ENABLED
+; RUN: llc -march=hexagon -mcpu=hexagonv69 -mattr=+hvxv69,+hvx-length128b \
+; RUN: -disable-qfp-opt < %s -o -| FileCheck %s --check-prefix=DISABLED
+
+define dso_local <32 x i32> @conv1_qf32(<32 x i32> noundef %input1, <32 x i32> noundef %input2) local_unnamed_addr {
+entry:
+; DISABLED: [[V2:v[0-9]+]].qf32 = vadd(v0.sf,v1.sf)
+; DISABLED: [[V3:v[0-9]+]].sf = [[V2]].qf32
+; DISABLED: qf32 = vadd(v0.sf,[[V3]].sf)
+; ENABLED: [[V4:v[0-9]+]].qf32 = vadd(v0.sf,v1.sf)
+; ENABLED: qf32 = vadd([[V4]].qf32,v0.sf)
+ %0 = tail call <32 x i32> @llvm.hexagon.V6.vadd.sf.128B(<32 x i32> %input1, <32 x i32> %input2)
+ %1 = tail call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %0)
+ %2 = tail call <32 x i32> @llvm.hexagon.V6.vadd.sf.128B(<32 x i32> %input1, <32 x i32> %1)
+ ret <32 x i32> %2
+}
diff --git a/llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir b/llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir
new file mode 100644
index 0000000000000..d8dde7d70885b
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/qfp-remove-kill.mir
@@ -0,0 +1,95 @@
+# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \
+# RUN: -run-pass hexagon-qfp-optimizer -run-pass machineverifier %s -o - | FileCheck %s
+
+# Test that the killed RegState from DefMI operands are removed
+# killed RegState should be set for MI operands
+# CHECK-LABEL: name: qfpAdd
+# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG1:([0-9]+)]]
+# CHECK-NEXT: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG2:([0-9]+)]]
+# CHECK-NEXT: V6_vadd_qf32 killed %[[REG1]], killed %[[REG2]]
+# CHECK-NEXT: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG3:([0-9]+)]]
+# CHECK-NEXT: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG4:([0-9]+)]]
+# CHECK-NEXT: V6_vadd_qf32 killed %[[REG3]], killed %[[REG4]]
+
+---
+name: qfpAdd
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:intregs = COPY $r3
+ %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %6:hvxvr = V6_vL32Ub_ai %2:intregs, 0
+ %7:hvxvr = V6_vL32Ub_ai %3:intregs, 0
+ %8:hvxvr = V6_vconv_sf_qf32 killed %4:hvxvr
+ %9:hvxvr = V6_vconv_sf_qf32 killed %5:hvxvr
+ %10:hvxvr = V6_vadd_sf %8:hvxvr, %9:hvxvr
+ %11:hvxvr = V6_vconv_sf_qf32 killed %6:hvxvr
+ %12:hvxvr = V6_vconv_sf_qf32 killed %7:hvxvr
+ %13:hvxvr = V6_vadd_sf killed %11:hvxvr, killed %12:hvxvr
+...
+
+
+# Test that the killed RegState from DefMI operands are removed
+# CHECK-LABEL: name: qfpAddMix
+# CHECK: %{{[0-9]+}}:hvxvr = V6_vconv_sf_qf32 %[[REG1:([0-9]+)]]
+# CHECK-NEXT: V6_vadd_qf32_mix killed %[[REG1]], %{{[0-9...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/163843
More information about the llvm-commits
mailing list