[llvm] 75187aa - [AArch64][GlobalISel] Legalize ctpop for v2s64, v2s32, v4s32, v4s16, v8s16
Jon Roelofs via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 20 15:39:27 PDT 2021
Author: Jon Roelofs
Date: 2021-07-20T15:37:56-07:00
New Revision: 75187aa352554255219125d614fbb1a1141c4c7d
URL: https://github.com/llvm/llvm-project/commit/75187aa352554255219125d614fbb1a1141c4c7d
DIFF: https://github.com/llvm/llvm-project/commit/75187aa352554255219125d614fbb1a1141c4c7d.diff
LOG: [AArch64][GlobalISel] Legalize ctpop for v2s64, v2s32, v4s32, v4s16, v8s16
https://llvm.godbolt.org/z/nTTK6M5qe
Differential revision: https://reviews.llvm.org/D106388
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir
Removed:
llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-ctpop.mir
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 04e2aa87db69..8f120bfe5005 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -763,7 +763,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.customFor({{s32, s32}, {s64, s64}});
// TODO: Custom legalization for s128
- // TODO: v2s64, v2s32, v4s32, v4s16, v8s16
// TODO: Use generic lowering when custom lowering is not possible.
auto always = [=](const LegalityQuery &Q) { return true; };
getActionDefinitionsBuilder(G_CTPOP)
@@ -772,7 +771,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0)
.minScalarEltSameAsIf(always, 1, 0)
.maxScalarEltSameAsIf(always, 1, 0)
- .customFor({{s32, s32}, {s64, s64}});
+ .customFor({{s32, s32},
+ {s64, s64},
+ {v2s64, v2s64},
+ {v2s32, v2s32},
+ {v4s32, v4s32},
+ {v4s16, v4s16},
+ {v8s16, v8s16}});
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
@@ -1115,6 +1120,18 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
// CNT V0.8B, V0.8B // 8xbyte pop-counts
// ADDV B0, V0.8B // sum 8xbyte pop-counts
// UMOV X0, V0.B[0] // copy byte result back to integer reg
+ //
+ // For 128 bit vector popcounts, we lower to the following sequence:
+ // cnt.16b v0, v0 // v8s16, v4s32, v2s64
+ // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
+ // uaddlp.4s v0, v0 // v4s32, v2s64
+ // uaddlp.2d v0, v0 // v2s64
+ //
+ // For 64 bit vector popcounts, we lower to the following sequence:
+ // cnt.8b v0, v0 // v4s16, v2s32
+ // uaddlp.4h v0, v0 // v4s16, v2s32
+ // uaddlp.2s v0, v0 // v2s32
+
if (!ST->hasNEON() ||
MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat))
return false;
@@ -1123,27 +1140,66 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
Register Val = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(Val);
- // TODO: Handle vector types.
- assert(!Ty.isVector() && "Vector types not handled yet!");
assert(Ty == MRI.getType(Dst) &&
"Expected src and dst to have the same type!");
- // TODO: Handle s128.
unsigned Size = Ty.getSizeInBits();
- assert((Size == 32 || Size == 64) && "Expected only 32 or 64 bit scalars!");
- if (Size == 32)
- Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
- const LLT V8S8 = LLT::fixed_vector(8, LLT::scalar(8));
- Val = MIRBuilder.buildBitcast(V8S8, Val).getReg(0);
- auto CTPOP = MIRBuilder.buildCTPOP(V8S8, Val);
- auto UADDLV =
- MIRBuilder
- .buildIntrinsic(Intrinsic::aarch64_neon_uaddlv, {LLT::scalar(32)},
- /*HasSideEffects = */ false)
- .addUse(CTPOP.getReg(0));
- if (Size == 64)
- MIRBuilder.buildZExt(Dst, UADDLV);
+
+ // Pre-conditioning: widen Val up to the nearest vector type.
+ // s32,s64,v4s16,v2s32 -> v8i8
+ // v8s16,v4s32,v2s64 -> v16i8
+ LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
+ if (Ty.isScalar()) {
+ // TODO: Handle s128.
+ assert((Size == 32 || Size == 64) && "Expected only 32 or 64 bit scalars!");
+ if (Size == 32) {
+ Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
+ }
+ }
+ Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
+
+ // Count bits in each byte-sized lane.
+ auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
+
+ // Sum across lanes.
+ Register HSum = CTPOP.getReg(0);
+ unsigned Opc;
+ SmallVector<LLT> HAddTys;
+ if (Ty.isScalar()) {
+ Opc = Intrinsic::aarch64_neon_uaddlv;
+ HAddTys.push_back(LLT::scalar(32));
+ } else if (Ty == LLT::fixed_vector(8, 16)) {
+ Opc = Intrinsic::aarch64_neon_uaddlp;
+ HAddTys.push_back(LLT::fixed_vector(8, 16));
+ } else if (Ty == LLT::fixed_vector(4, 32)) {
+ Opc = Intrinsic::aarch64_neon_uaddlp;
+ HAddTys.push_back(LLT::fixed_vector(8, 16));
+ HAddTys.push_back(LLT::fixed_vector(4, 32));
+ } else if (Ty == LLT::fixed_vector(2, 64)) {
+ Opc = Intrinsic::aarch64_neon_uaddlp;
+ HAddTys.push_back(LLT::fixed_vector(8, 16));
+ HAddTys.push_back(LLT::fixed_vector(4, 32));
+ HAddTys.push_back(LLT::fixed_vector(2, 64));
+ } else if (Ty == LLT::fixed_vector(4, 16)) {
+ Opc = Intrinsic::aarch64_neon_uaddlp;
+ HAddTys.push_back(LLT::fixed_vector(4, 16));
+ } else if (Ty == LLT::fixed_vector(2, 32)) {
+ Opc = Intrinsic::aarch64_neon_uaddlp;
+ HAddTys.push_back(LLT::fixed_vector(4, 16));
+ HAddTys.push_back(LLT::fixed_vector(2, 32));
+ } else
+ llvm_unreachable("unexpected vector shape");
+ MachineInstrBuilder UADD;
+ for (LLT HTy : HAddTys) {
+ UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}, /*HasSideEffects =*/false)
+ .addUse(HSum);
+ HSum = UADD.getReg(0);
+ }
+
+ // Post-conditioning.
+ if (Ty.isScalar() && Size == 64)
+ MIRBuilder.buildZExt(Dst, UADD);
else
- UADDLV->getOperand(0).setReg(Dst);
+ UADD->getOperand(0).setReg(Dst);
MI.eraseFromParent();
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir
index d846f2d47eae..04406c15296c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir
@@ -188,3 +188,112 @@ body: |
%ext:_(s32) = G_ANYEXT %ctpop(s16)
$w0 = COPY %ext(s32)
RET_ReallyLR implicit $w0
+
+...
+---
+name: custom_8x16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: custom_8x16
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<8 x s16>)
+ ; CHECK: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
+ ; CHECK: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
+ ; CHECK: $q0 = COPY [[INT]](<8 x s16>)
+ ; CHECK: RET_ReallyLR implicit $q0
+ %0:_(<8 x s16>) = COPY $q0
+ %1:_(<8 x s16>) = G_CTPOP %0(<8 x s16>)
+ $q0 = COPY %1(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: custom_4x32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: custom_4x32
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<4 x s32>)
+ ; CHECK: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
+ ; CHECK: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
+ ; CHECK: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
+ ; CHECK: $q0 = COPY [[INT1]](<4 x s32>)
+ ; CHECK: RET_ReallyLR implicit $q0
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = G_CTPOP %0(<4 x s32>)
+ $q0 = COPY %1(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: custom_2x64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: custom_2x64
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<2 x s64>)
+ ; CHECK: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
+ ; CHECK: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
+ ; CHECK: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
+ ; CHECK: [[INT2:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT1]](<4 x s32>)
+ ; CHECK: $q0 = COPY [[INT2]](<2 x s64>)
+ ; CHECK: RET_ReallyLR implicit $q0
+ %0:_(<2 x s64>) = COPY $q0
+ %1:_(<2 x s64>) = G_CTPOP %0(<2 x s64>)
+ $q0 = COPY %1(<2 x s64>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: custom_4x16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: custom_4x16
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+ ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<4 x s16>)
+ ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
+ ; CHECK: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
+ ; CHECK: $d0 = COPY [[INT]](<4 x s16>)
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(<4 x s16>) = G_CTPOP %0(<4 x s16>)
+ $d0 = COPY %1(<4 x s16>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: custom_2x32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: custom_2x32
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<2 x s32>)
+ ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
+ ; CHECK: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
+ ; CHECK: [[INT1:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<4 x s16>)
+ ; CHECK: $d0 = COPY [[INT1]](<2 x s32>)
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s32>) = G_CTPOP %0(<2 x s32>)
+ $d0 = COPY %1(<2 x s32>)
+ RET_ReallyLR implicit $d0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-ctpop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-ctpop.mir
deleted file mode 100644
index 84d3096527c4..000000000000
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-ctpop.mir
+++ /dev/null
@@ -1,22 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer %s -o - -global-isel-abort=2 | FileCheck %s
-...
-# This test doesn't currently legalize but should at least not crash.
----
-name: v2s64_dont_crash
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $q0
- ; CHECK-LABEL: name: v2s64_dont_crash
- ; CHECK: liveins: $q0
- ; CHECK: %copy:_(<2 x s64>) = COPY $q0
- ; CHECK: %ctpop:_(<2 x s64>) = G_CTPOP %copy(<2 x s64>)
- ; CHECK: $q0 = COPY %ctpop(<2 x s64>)
- ; CHECK: RET_ReallyLR implicit $q0
- %copy:_(<2 x s64>) = COPY $q0
- %ctpop:_(<2 x s64>) = G_CTPOP %copy(<2 x s64>)
- $q0 = COPY %ctpop(<2 x s64>)
- RET_ReallyLR implicit $q0
-
-...
More information about the llvm-commits
mailing list