[clang] [llvm] [Hexagon] Add Loop Alignment pass. (PR #83379)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 29 12:57:20 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Sumanth Gundapaneni (sgundapa)
<details>
<summary>Changes</summary>
Inspect a basic block and if its single basic block loop with a small number of instructions, set the Loop Alignment to 32 bytes. This will avoid the cache line break in the first packet of loop which will cause a stall per each execution of loop.
---
Patch is 34.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83379.diff
9 Files Affected:
- (modified) clang/test/CodeGen/builtins-hexagon.c (+1-1)
- (modified) llvm/lib/Target/Hexagon/CMakeLists.txt (+1)
- (added) llvm/lib/Target/Hexagon/HexagonLoopAlign.cpp (+216)
- (modified) llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp (+8-1)
- (modified) llvm/lib/Target/Hexagon/HexagonTargetMachine.h (+1)
- (added) llvm/test/CodeGen/Hexagon/loop-balign.ll (+91)
- (added) llvm/test/CodeGen/Hexagon/loop_align_count.ll (+115)
- (added) llvm/test/CodeGen/Hexagon/loop_align_count.mir (+130)
- (added) llvm/test/CodeGen/Hexagon/v6-haar-balign32.ll (+117)
``````````diff
diff --git a/clang/test/CodeGen/builtins-hexagon.c b/clang/test/CodeGen/builtins-hexagon.c
index 9a1b733da5cdb8..52073f27ae70f5 100644
--- a/clang/test/CodeGen/builtins-hexagon.c
+++ b/clang/test/CodeGen/builtins-hexagon.c
@@ -1,5 +1,5 @@
// REQUIRES: hexagon-registered-target
-// RUN: %clang_cc1 -triple hexagon-unknown-elf -target-cpu hexagonv65 -target-feature +hvxv65 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple hexagon-unknown-elf -target-cpu hexagonv65 -target-feature +hvxv65 -target-feature +hvx-length128b -emit-llvm %s -o - | FileCheck %s
void test() {
int v64 __attribute__((__vector_size__(64)));
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
index a22a5c11e6ab3a..cdc062eee72b1e 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -43,6 +43,7 @@ add_llvm_target(HexagonCodeGen
HexagonISelDAGToDAGHVX.cpp
HexagonISelLowering.cpp
HexagonISelLoweringHVX.cpp
+ HexagonLoopAlign.cpp
HexagonLoopIdiomRecognition.cpp
HexagonMachineFunctionInfo.cpp
HexagonMachineScheduler.cpp
diff --git a/llvm/lib/Target/Hexagon/HexagonLoopAlign.cpp b/llvm/lib/Target/Hexagon/HexagonLoopAlign.cpp
new file mode 100644
index 00000000000000..c79b528ff2f3f9
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonLoopAlign.cpp
@@ -0,0 +1,216 @@
+//===----- HexagonLoopAlign.cpp - Generate loop alignment directives -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Inspect a basic block and if its single basic block loop with a small
+// number of instructions, set the prefLoopAlignment to 32 bytes (5).
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-loop-align"
+
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ DisableLoopAlign("disable-hexagon-loop-align", cl::Hidden,
+ cl::desc("Disable Hexagon loop alignment pass"));
+
+static cl::opt<uint32_t> HVXLoopAlignLimitUB(
+ "hexagon-hvx-loop-align-limit-ub", cl::Hidden, cl::init(16),
+ cl::desc("Set hexagon hvx loop upper bound align limit"));
+
+static cl::opt<uint32_t> TinyLoopAlignLimitUB(
+ "hexagon-tiny-loop-align-limit-ub", cl::Hidden, cl::init(16),
+ cl::desc("Set hexagon tiny-core loop upper bound align limit"));
+
+static cl::opt<uint32_t>
+ LoopAlignLimitUB("hexagon-loop-align-limit-ub", cl::Hidden, cl::init(8),
+ cl::desc("Set hexagon loop upper bound align limit"));
+
+static cl::opt<uint32_t>
+ LoopAlignLimitLB("hexagon-loop-align-limit-lb", cl::Hidden, cl::init(4),
+ cl::desc("Set hexagon loop lower bound align limit"));
+
+static cl::opt<uint32_t>
+ LoopBndlAlignLimit("hexagon-loop-bundle-align-limit", cl::Hidden,
+ cl::init(4),
+ cl::desc("Set hexagon loop align bundle limit"));
+
+static cl::opt<uint32_t> TinyLoopBndlAlignLimit(
+ "hexagon-tiny-loop-bundle-align-limit", cl::Hidden, cl::init(8),
+ cl::desc("Set hexagon tiny-core loop align bundle limit"));
+
+static cl::opt<uint32_t>
+ LoopEdgeThreshold("hexagon-loop-edge-threshold", cl::Hidden, cl::init(7500),
+ cl::desc("Set hexagon loop align edge theshold"));
+
+namespace llvm {
+FunctionPass *createHexagonLoopAlign();
+void initializeHexagonLoopAlignPass(PassRegistry &);
+} // namespace llvm
+
+namespace {
+
+class HexagonLoopAlign : public MachineFunctionPass {
+ const HexagonSubtarget *HST = nullptr;
+ const TargetMachine *HTM = nullptr;
+ const HexagonInstrInfo *HII = nullptr;
+
+public:
+ static char ID;
+ HexagonLoopAlign() : MachineFunctionPass(ID) {
+ initializeHexagonLoopAlignPass(*PassRegistry::getPassRegistry());
+ }
+ bool shouldBalignLoop(MachineBasicBlock &BB, bool AboveThres);
+ bool isSingleLoop(MachineBasicBlock &MBB);
+ bool attemptToBalignSmallLoop(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return "Hexagon LoopAlign pass"; }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+char HexagonLoopAlign::ID = 0;
+
+bool HexagonLoopAlign::shouldBalignLoop(MachineBasicBlock &BB,
+ bool AboveThres) {
+ bool isVec = false;
+ unsigned InstCnt = 0;
+ unsigned BndlCnt = 0;
+
+ for (MachineBasicBlock::instr_iterator II = BB.instr_begin(),
+ IE = BB.instr_end();
+ II != IE; ++II) {
+
+ // End if the instruction is endloop.
+ if (HII->isEndLoopN(II->getOpcode()))
+ break;
+ // Count the number of bundles.
+ if (II->isBundle()) {
+ BndlCnt++;
+ continue;
+ }
+ // Skip over debug instructions.
+ if (II->isDebugInstr())
+ continue;
+ // Check if there are any HVX instructions in loop.
+ isVec |= HII->isHVXVec(*II);
+ // Count the number of instructions.
+ InstCnt++;
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Bundle Count : " << BndlCnt << "\n";
+ dbgs() << "Instruction Count : " << InstCnt << "\n";
+ });
+
+ unsigned LimitUB = 0;
+ unsigned LimitBndl = LoopBndlAlignLimit;
+ // The conditions in the order of priority.
+ if (HST->isTinyCore()) {
+ LimitUB = TinyLoopAlignLimitUB;
+ LimitBndl = TinyLoopBndlAlignLimit;
+ } else if (isVec)
+ LimitUB = HVXLoopAlignLimitUB;
+ else if (AboveThres)
+ LimitUB = LoopAlignLimitUB;
+
+ // if the upper bound is not set to a value, implies we didn't meet
+ // the criteria.
+ if (LimitUB == 0)
+ return false;
+
+ return InstCnt >= LoopAlignLimitLB && InstCnt <= LimitUB &&
+ BndlCnt <= LimitBndl;
+}
+
+bool HexagonLoopAlign::isSingleLoop(MachineBasicBlock &MBB) {
+ int Succs = MBB.succ_size();
+ return (MBB.isSuccessor(&MBB) && (Succs == 2));
+}
+
+bool HexagonLoopAlign::attemptToBalignSmallLoop(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
+ if (!isSingleLoop(MBB))
+ return false;
+
+ const MachineBranchProbabilityInfo *MBPI =
+ &getAnalysis<MachineBranchProbabilityInfo>();
+ const MachineBlockFrequencyInfo *MBFI =
+ &getAnalysis<MachineBlockFrequencyInfo>();
+
+ // Compute frequency of back edge,
+ BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
+ BranchProbability BrProb = MBPI->getEdgeProbability(&MBB, &MBB);
+ BlockFrequency EdgeFreq = BlockFreq * BrProb;
+ LLVM_DEBUG({
+ dbgs() << "Loop Align Pass:\n";
+ dbgs() << "\tedge with freq(" << EdgeFreq.getFrequency() << ")\n";
+ });
+
+ bool AboveThres = EdgeFreq.getFrequency() > LoopEdgeThreshold;
+ if (shouldBalignLoop(MBB, AboveThres)) {
+ // We found a loop, change its alignment to be 32 (5).
+ MBB.setAlignment(llvm::Align(1 << 5));
+ return true;
+ }
+ return false;
+}
+
+// Inspect each basic block, and if its a single BB loop, see if it
+// meets the criteria for increasing alignment to 32.
+
+bool HexagonLoopAlign::runOnMachineFunction(MachineFunction &MF) {
+
+ HST = &MF.getSubtarget<HexagonSubtarget>();
+ HII = HST->getInstrInfo();
+ HTM = &MF.getTarget();
+
+ if (skipFunction(MF.getFunction()))
+ return false;
+ if (DisableLoopAlign)
+ return false;
+
+ // This optimization is performed at
+ // i) -O2 and above, and when the loop has a HVX instruction.
+ // ii) -O3
+ if (HST->useHVXOps()) {
+ if (HTM->getOptLevel() < CodeGenOptLevel::Default)
+ return false;
+ } else {
+ if (HTM->getOptLevel() < CodeGenOptLevel::Aggressive)
+ return false;
+ }
+
+ bool Changed = false;
+ for (MachineFunction::iterator MBBi = MF.begin(), MBBe = MF.end();
+ MBBi != MBBe; ++MBBi) {
+ MachineBasicBlock &MBB = *MBBi;
+ Changed |= attemptToBalignSmallLoop(MF, MBB);
+ }
+ return Changed;
+}
+
+} // namespace
+
+INITIALIZE_PASS(HexagonLoopAlign, "hexagon-loop-align",
+ "Hexagon LoopAlign pass", false, false)
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonLoopAlign() { return new HexagonLoopAlign(); }
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 7d77286339399d..3c346c334d6d30 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -164,6 +164,7 @@ namespace llvm {
void initializeHexagonGenMuxPass(PassRegistry&);
void initializeHexagonHardwareLoopsPass(PassRegistry&);
void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
+ void initializeHexagonLoopAlignPass(PassRegistry &);
void initializeHexagonNewValueJumpPass(PassRegistry&);
void initializeHexagonOptAddrModePass(PassRegistry&);
void initializeHexagonPacketizerPass(PassRegistry&);
@@ -194,6 +195,7 @@ namespace llvm {
FunctionPass *createHexagonHardwareLoops();
FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
CodeGenOptLevel OptLevel);
+ FunctionPass *createHexagonLoopAlign();
FunctionPass *createHexagonLoopRescheduling();
FunctionPass *createHexagonNewValueJump();
FunctionPass *createHexagonOptAddrMode();
@@ -256,8 +258,10 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small),
(HexagonNoOpt ? CodeGenOptLevel::None : OL)),
- TLOF(std::make_unique<HexagonTargetObjectFile>()) {
+ TLOF(std::make_unique<HexagonTargetObjectFile>()),
+ Subtarget(Triple(TT), CPU, FS, *this) {
initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
+ initializeHexagonLoopAlignPass(*PassRegistry::getPassRegistry());
initializeHexagonTfrCleanupPass(*PassRegistry::getPassRegistry());
initAsmInfo();
}
@@ -476,6 +480,9 @@ void HexagonPassConfig::addPreEmitPass() {
// Packetization is mandatory: it handles gather/scatter at all opt levels.
addPass(createHexagonPacketizer(NoOpt));
+ if (!NoOpt)
+ addPass(createHexagonLoopAlign());
+
if (EnableVectorPrint)
addPass(createHexagonVectorPrint());
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
index c5fed0cd65a814..34ff45b6acf345 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -23,6 +23,7 @@ namespace llvm {
class HexagonTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ HexagonSubtarget Subtarget;
mutable StringMap<std::unique_ptr<HexagonSubtarget>> SubtargetMap;
public:
diff --git a/llvm/test/CodeGen/Hexagon/loop-balign.ll b/llvm/test/CodeGen/Hexagon/loop-balign.ll
new file mode 100644
index 00000000000000..9d1f42a4b14b18
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/loop-balign.ll
@@ -0,0 +1,91 @@
+; RUN: llc -march=hexagon -O3 < %s | FileCheck %s -check-prefix=BALIGN
+; BALIGN: .p2align{{.*}}5
+
+; The test for checking the alignment of 'for.body4.for.body4_crit_edge' basic block
+
+define dso_local void @foo(i32 %nCol, i32 %nRow, ptr nocapture %resMat) local_unnamed_addr {
+entry:
+ %shl = shl i32 %nRow, 2
+ %cmp36 = icmp sgt i32 %nRow, 0
+ %0 = add i32 %nCol, -1
+ %.inv = icmp slt i32 %0, 1
+ %1 = select i1 %.inv, i32 1, i32 %nCol
+ br label %Outerloop
+
+Outerloop: ; preds = %for.end7, %entry
+ %r12.0 = phi i32 [ 0, %entry ], [ %inc8, %for.end7 ]
+ %r7_6.0 = phi i64 [ undef, %entry ], [ %r7_6.1.lcssa, %for.end7 ]
+ %r0i.0 = phi i32 [ undef, %entry ], [ %r0i.1.lcssa, %for.end7 ]
+ %r5.0 = phi ptr [ %resMat, %entry ], [ %r5.1.lcssa, %for.end7 ]
+ %r8.0 = phi i32 [ %shl, %entry ], [ %r8.1.lcssa, %for.end7 ]
+ br i1 %cmp36, label %for.body.lr.ph, label %for.end7
+
+for.body.lr.ph: ; preds = %Outerloop
+ %cmp332 = icmp eq i32 %r12.0, 0
+ %exitcond.peel = icmp eq i32 %r12.0, 1
+ br label %for.body
+
+for.body: ; preds = %for.end, %for.body.lr.ph
+ %r8.141 = phi i32 [ %r8.0, %for.body.lr.ph ], [ %add, %for.end ]
+ %r5.140 = phi ptr [ %r5.0, %for.body.lr.ph ], [ %add.ptr, %for.end ]
+ %i.039 = phi i32 [ 0, %for.body.lr.ph ], [ %inc6, %for.end ]
+ %r0i.138 = phi i32 [ %r0i.0, %for.body.lr.ph ], [ %4, %for.end ]
+ %r7_6.137 = phi i64 [ %r7_6.0, %for.body.lr.ph ], [ %r7_6.2.lcssa, %for.end ]
+ %add = add nsw i32 %r8.141, %shl
+ br i1 %cmp332, label %for.end, label %for.body4.peel
+
+for.body4.peel: ; preds = %for.body
+ %r1i.0.in.peel = inttoptr i32 %r8.141 to ptr
+ %r1i.0.peel = load i32, ptr %r1i.0.in.peel, align 4
+ %2 = tail call i64 @llvm.hexagon.M2.dpmpyss.nac.s0(i64 %r7_6.137, i32 %r1i.0.peel, i32 %r0i.138)
+ br i1 %exitcond.peel, label %for.end, label %for.body4.preheader.peel.newph
+
+for.body4.preheader.peel.newph: ; preds = %for.body4.peel
+ %r1i.0.in = inttoptr i32 %add to ptr
+ %r1i.0 = load i32, ptr %r1i.0.in, align 4
+ br label %for.body4
+
+for.body4: ; preds = %for.body4.for.body4_crit_edge, %for.body4.preheader.peel.newph
+ %inc.phi = phi i32 [ %inc.0, %for.body4.for.body4_crit_edge ], [ 2, %for.body4.preheader.peel.newph ]
+ %r7_6.233 = phi i64 [ %3, %for.body4.for.body4_crit_edge ], [ %2, %for.body4.preheader.peel.newph ]
+ %3 = tail call i64 @llvm.hexagon.M2.dpmpyss.nac.s0(i64 %r7_6.233, i32 %r1i.0, i32 %r0i.138)
+ %exitcond = icmp eq i32 %inc.phi, %r12.0
+ br i1 %exitcond, label %for.end.loopexit, label %for.body4.for.body4_crit_edge
+
+for.body4.for.body4_crit_edge: ; preds = %for.body4
+ %inc.0 = add nuw nsw i32 %inc.phi, 1
+ br label %for.body4
+
+for.end.loopexit: ; preds = %for.body4
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %for.body4.peel, %for.body
+ %r7_6.2.lcssa = phi i64 [ %r7_6.137, %for.body ], [ %2, %for.body4.peel ], [ %3, %for.end.loopexit ]
+ %4 = tail call i32 @llvm.hexagon.S2.clbp(i64 %r7_6.2.lcssa)
+ store i32 %4, ptr %r5.140, align 4
+ %add.ptr = getelementptr inbounds i8, ptr %r5.140, i32 undef
+ %inc6 = add nuw nsw i32 %i.039, 1
+ %exitcond47 = icmp eq i32 %inc6, %nRow
+ br i1 %exitcond47, label %for.end7.loopexit, label %for.body
+
+for.end7.loopexit: ; preds = %for.end
+ br label %for.end7
+
+for.end7: ; preds = %for.end7.loopexit, %Outerloop
+ %r7_6.1.lcssa = phi i64 [ %r7_6.0, %Outerloop ], [ %r7_6.2.lcssa, %for.end7.loopexit ]
+ %r0i.1.lcssa = phi i32 [ %r0i.0, %Outerloop ], [ %4, %for.end7.loopexit ]
+ %r5.1.lcssa = phi ptr [ %r5.0, %Outerloop ], [ %add.ptr, %for.end7.loopexit ]
+ %r8.1.lcssa = phi i32 [ %r8.0, %Outerloop ], [ %add, %for.end7.loopexit ]
+ %inc8 = add nuw i32 %r12.0, 1
+ %exitcond48 = icmp eq i32 %inc8, %1
+ br i1 %exitcond48, label %if.end, label %Outerloop
+
+if.end: ; preds = %for.end7
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hexagon.M2.dpmpyss.nac.s0(i64, i32, i32)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.hexagon.S2.clbp(i64)
diff --git a/llvm/test/CodeGen/Hexagon/loop_align_count.ll b/llvm/test/CodeGen/Hexagon/loop_align_count.ll
new file mode 100644
index 00000000000000..07d7e4a8d61176
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/loop_align_count.ll
@@ -0,0 +1,115 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv73 -O2 -mattr=+hvxv73,hvx-length64b \
+; RUN: -debug-only=hexagon-loop-align 2>&1 < %s | FileCheck %s
+; Validate that there are 4 bundles in the loop.
+
+; CHECK: Loop Align Pass:
+; CHECK: Bundle Count : 4
+; CHECK: .p2align{{.*}}5
+
+; Function Attrs: nounwind
+define void @ham(ptr noalias nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3, ptr noalias nocapture %arg4, i32 %arg5) #0 {
+bb:
+ %ashr = ashr i32 %arg3, 2
+ %ashr6 = ashr i32 %arg3, 1
+ %add = add nsw i32 %ashr6, %ashr
+ %icmp = icmp sgt i32 %arg2, 0
+ br i1 %icmp, label %bb7, label %bb61
+
+bb7: ; preds = %bb
+ %sdiv = sdiv i32 %arg1, 64
+ %icmp8 = icmp sgt i32 %arg1, 63
+ br label %bb9
+
+bb9: ; preds = %bb57, %bb7
+ %phi = phi i32 [ 0, %bb7 ], [ %add58, %bb57 ]
+ %ashr10 = ashr exact i32 %phi, 1
+ %mul = mul nsw i32 %ashr10, %arg3
+ br i1 %icmp8, label %bb11, label %bb57
+
+bb11: ; preds = %bb9
+ %add12 = add nsw i32 %phi, 1
+ %mul13 = mul nsw i32 %add12, %arg5
+ %mul14 = mul nsw i32 %phi, %arg5
+ %add15 = add i32 %add, %mul
+ %add16 = add i32 %mul, %ashr
+ %add17 = add i32 %mul, %ashr6
+ %getelementptr = getelementptr inbounds i8, ptr %arg4, i32 %mul13
+ %getelementptr18 = getelementptr inbounds i8, ptr %arg4, i32 %mul14
+ %getelementptr19 = getelementptr inbounds i16, ptr %arg, i32 %add15
+ %getelementptr20 = getelementptr inbounds i16, ptr %arg, i32 %add16
+ %getelementptr21 = getelementptr inbounds i16, ptr %arg, i32 %add17
+ %getelementptr22 = getelementptr inbounds i16, ptr %arg, i32 %mul
+ %bitcast = bitcast ptr %getelementptr to ptr
+ %bitcast23 = bitcast ptr %getelementptr18 to ptr
+ %bitcast24 = bitcast ptr %getelementptr19 to ptr
+ %bitcast25 = bitcast ptr %getelementptr20 to ptr
+ %bitcast26 = bitcast ptr %getelementptr21 to ptr
+ %bitcast27 = bitcast ptr %getelementptr22 to ptr
+ br label %bb28
+
+bb28: ; preds = %bb28, %bb11
+ %phi29 = phi i32 [ 0, %bb11 ], [ %add54, %bb28 ]
+ %phi30 = phi ptr [ %bitcast27, %bb11 ], [ %getelementptr36, %bb28 ]
+ %phi31 = phi ptr [ %bitcast26, %bb11 ], [ %getelementptr37, %bb28 ]
+ %phi32 = phi ptr [ %bitcast25, %bb11 ], [ %getelementptr39, %bb28 ]
+ %phi33 = phi ptr [ %bitcast24, %bb11 ], [ %getelementptr41, %bb28 ]
+ %phi34 = phi ptr [ %bitcast, %bb11 ], [ %getelementptr53, %bb28 ]
+ %phi35 = phi ptr [ %bitcast23, %bb11 ], [ %getelementptr52, %bb28 ]
+ %getelementptr36 = getelementptr inbounds <16 x i32>, ptr %phi30, i32 1
+ %load = load <16 x i32>, ptr %phi30, align 64
+ %getelementptr37 = getelementptr inbounds <16 x i32>, ptr %phi31, i32 1
+ %load38 = load <16 x i32>, ptr %phi31, align 64
+ %getelementptr39 = getelementptr inbounds <16 x i32>, ptr %phi32, i32 1
+ %load40 = load <16 x i32>, ptr %phi32, align 64
+ %getelementptr41 = getelementptr inbounds <16 x i32>, ptr %phi33, i32 1
+ %load42 = load <16 x i32>, ptr %phi33, align 64
+ %call = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %load, <16 x i32> %load38)
+ %call43 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %load, <16 x i32> %load38)
+ %call44 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %load40, <16 x i32> %load42)
+ %call45 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %load40, <16 x i32> %load42)
+ %call46 = tail call <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32> %call, <16 x i32> %call44)
+ %call47 = tail call <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32> %call, <16 x i32> %call44)
+ %call48 = tail call <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32> %call43, <16 x i32> %call45)
+ %call49 = tail call <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32> %call43, <16 x i32> %call45)
+ %call50 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %call47, <16 x i32> %call46)
+ %call51 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %call49, <16 x i32> %call48)
+ %getelementptr52 = getelementptr inbounds <16 x i32>, ptr %phi35, i32 ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/83379
More information about the llvm-commits
mailing list