[llvm] [AArch64][GlobalISel] Improve non-SVE popcount for 32bit and 64 bit using udot (PR #96409)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 22 15:41:45 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Tim Gymnich (tgymnich)
<details>
<summary>Changes</summary>
Follow up for #<!-- -->95881
Use udot instead of a sequence of uaddlp instructions when summing up lanes for popcount.
### TODO:
- [ ] add/update test
---
Full diff: https://github.com/llvm/llvm-project/pull/96409.diff
1 Files Affected:
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+26)
``````````diff
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index fef0b722efe45..84cdc40bc5a3b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -14,6 +14,7 @@
#include "AArch64LegalizerInfo.h"
#include "AArch64RegisterBankInfo.h"
#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
@@ -1904,6 +1905,31 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
// Sum across lanes.
+
+ if (ST->hasDotProd() && Ty.getNumElements() >= 2 &&
+ Ty.getScalarSizeInBits() != 16) {
+ LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
+ auto Zeros = MIRBuilder.buildConstant(Ty, 0);
+ auto Ones = MIRBuilder.buildConstant(VTy, 1);
+ MachineInstrBuilder SUM;
+
+ if (Ty == LLT::fixed_vector(2, 64)) {
+ auto UDOT =
+ MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
+ SUM = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
+ } else if (Ty == LLT::fixed_vector(4, 32)) {
+ SUM = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
+ } else if (Ty == LLT::fixed_vector(2, 32)) {
+ SUM = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
+ } else {
+ llvm_unreachable("unexpected vector shape");
+ }
+
+ SUM->getOperand(0).setReg(Dst);
+ MI.eraseFromParent();
+ return true;
+ }
+
Register HSum = CTPOP.getReg(0);
unsigned Opc;
SmallVector<LLT> HAddTys;
``````````
</details>
https://github.com/llvm/llvm-project/pull/96409
More information about the llvm-commits
mailing list