[llvm] [AArch64][GlobalISel] Improve non-SVE popcount for 32bit and 64 bit using udot (PR #96409)
Tim Gymnich via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 23 15:33:09 PDT 2024
https://github.com/tgymnich updated https://github.com/llvm/llvm-project/pull/96409
>From 72e8da900169ed81203c8cb1e16e5e7d2eb58514 Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tgymnich at icloud.com>
Date: Sun, 23 Jun 2024 00:38:17 +0200
Subject: [PATCH] [AArch64][GlobalISel] Improve non-SVE popcount for 32bit and
64 bit using udot
---
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 26 +++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index fef0b722efe45..d70c890674324 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -14,6 +14,7 @@
#include "AArch64LegalizerInfo.h"
#include "AArch64RegisterBankInfo.h"
#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
@@ -1904,6 +1905,31 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
// Sum across lanes.
+
+ if (ST->hasDotProd() && Ty.getNumElements() >= 2 &&
+ Ty.getScalarSizeInBits() != 16) {
+ LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
+ auto Zeros = MIRBuilder.buildConstant(Dt, 0);
+ auto Ones = MIRBuilder.buildConstant(VTy, 1);
+ MachineInstrBuilder SUM;
+
+ if (Ty == LLT::fixed_vector(2, 64)) {
+ auto UDOT =
+ MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
+ SUM = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
+ } else if (Ty == LLT::fixed_vector(4, 32)) {
+ SUM = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
+ } else if (Ty == LLT::fixed_vector(2, 32)) {
+ SUM = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
+ } else {
+ llvm_unreachable("unexpected vector shape");
+ }
+
+ SUM->getOperand(0).setReg(Dst);
+ MI.eraseFromParent();
+ return true;
+ }
+
Register HSum = CTPOP.getReg(0);
unsigned Opc;
SmallVector<LLT> HAddTys;
More information about the llvm-commits
mailing list