[llvm] [AArch64][SelectionDAG] Implement vector splitting for histogram intrinsic (PR #103037)
Max Beck-Jones via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 28 10:39:40 PDT 2024
https://github.com/DevM-uk updated https://github.com/llvm/llvm-project/pull/103037
>From 79ce338d7af82fef343e4aa6f215b89d1b6cf305 Mon Sep 17 00:00:00 2001
From: Max Beck-Jones <max.beck-jones at arm.com>
Date: Mon, 12 Aug 2024 13:48:31 +0000
Subject: [PATCH 1/4] [AArch64] Implement vector splitting for histogram
intrinsic
Adds support for wider-than-legal vector types for the histogram intrinsic (llvm.experimental.vector.histogram.add) by splitting the vector.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 39 ++++++++
llvm/test/CodeGen/AArch64/sve2-histcnt.ll | 95 +++++++++++++++++++
2 files changed, 134 insertions(+)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7777aa4b50a370..7c9b34b272f17d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1128,6 +1128,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
+ setTargetDAGCombine(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM);
+
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemset =
@@ -25434,6 +25436,41 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return NVCAST;
}
+static SDValue performHistogramCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
+ MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
+ SDLoc DL(HG);
+ SDValue Chain = HG->getChain();
+ SDValue Inc = HG->getInc();
+ SDValue Mask = HG->getMask();
+ SDValue Ptr = HG->getBasePtr();
+ SDValue Index = HG->getIndex();
+ SDValue Scale = HG->getScale();
+ SDValue IntID = HG->getIntID();
+ EVT MemVT = HG->getMemoryVT();
+ EVT IndexVT = Index.getValueType();
+ MachineMemOperand *MMO = HG->getMemOperand();
+ ISD::MemIndexType IndexType = HG->getIndexType();
+
+ if (IndexVT == MVT::nxv4i32 || IndexVT == MVT::nxv2i64)
+ return SDValue();
+
+ // Split vectors which are too wide
+ SDValue IndexLo, IndexHi, MaskLo, MaskHi;
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ SDValue HistogramOpsLo[] = {Chain, Inc, MaskLo, Ptr, IndexLo, Scale, IntID};
+ SDValue HChain = DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL,
+ HistogramOpsLo, MMO, IndexType);
+ SDValue HistogramOpsHi[] = {HChain, Inc, MaskHi, Ptr, IndexHi, Scale, IntID};
+ return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL,
+ HistogramOpsHi, MMO, IndexType);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -25778,6 +25815,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performCTLZCombine(N, DAG, Subtarget);
case ISD::SCALAR_TO_VECTOR:
return performScalarToVectorCombine(N, DCI, DAG);
+ case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM:
+ return performHistogramCombine(N, DCI, DAG);
}
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
index 2874e47511e12f..56d5eb13ab12e3 100644
--- a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
@@ -169,4 +169,99 @@ define void @histogram_i16_literal_3(ptr %base, <vscale x 4 x i32> %indices, <vs
ret void
}
+define void @histogram_i64_4_lane(<vscale x 4 x ptr> %buckets, i64 %inc, <vscale x 4 x i1> %mask) #0 {
+; CHECK-LABEL: histogram_i64_4_lane:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpklo p1.h, p0.b
+; CHECK-NEXT: mov z4.d, x0
+; CHECK-NEXT: ptrue p2.d
+; CHECK-NEXT: histcnt z2.d, p1/z, z0.d, z0.d
+; CHECK-NEXT: ld1d { z3.d }, p1/z, [z0.d]
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: mad z2.d, p2/m, z4.d, z3.d
+; CHECK-NEXT: st1d { z2.d }, p1, [z0.d]
+; CHECK-NEXT: histcnt z0.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: ld1d { z2.d }, p0/z, [z1.d]
+; CHECK-NEXT: mad z0.d, p2/m, z4.d, z2.d
+; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
+; CHECK-NEXT: ret
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 %inc, <vscale x 4 x i1> %mask)
+ ret void
+}
+
+define void @histogram_i64_8_lane(<vscale x 8 x ptr> %buckets, i64 %inc, <vscale x 8 x i1> %mask) #0 {
+; CHECK-LABEL: histogram_i64_8_lane:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpklo p2.h, p0.b
+; CHECK-NEXT: mov z6.d, x0
+; CHECK-NEXT: ptrue p1.d
+; CHECK-NEXT: punpklo p3.h, p2.b
+; CHECK-NEXT: punpkhi p2.h, p2.b
+; CHECK-NEXT: histcnt z4.d, p3/z, z0.d, z0.d
+; CHECK-NEXT: ld1d { z5.d }, p3/z, [z0.d]
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: mad z4.d, p1/m, z6.d, z5.d
+; CHECK-NEXT: st1d { z4.d }, p3, [z0.d]
+; CHECK-NEXT: histcnt z0.d, p2/z, z1.d, z1.d
+; CHECK-NEXT: ld1d { z4.d }, p2/z, [z1.d]
+; CHECK-NEXT: mad z0.d, p1/m, z6.d, z4.d
+; CHECK-NEXT: st1d { z0.d }, p2, [z1.d]
+; CHECK-NEXT: punpklo p2.h, p0.b
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: histcnt z0.d, p2/z, z2.d, z2.d
+; CHECK-NEXT: ld1d { z1.d }, p2/z, [z2.d]
+; CHECK-NEXT: mad z0.d, p1/m, z6.d, z1.d
+; CHECK-NEXT: st1d { z0.d }, p2, [z2.d]
+; CHECK-NEXT: histcnt z0.d, p0/z, z3.d, z3.d
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [z3.d]
+; CHECK-NEXT: mad z0.d, p1/m, z6.d, z1.d
+; CHECK-NEXT: st1d { z0.d }, p0, [z3.d]
+; CHECK-NEXT: ret
+ call void @llvm.experimental.vector.histogram.add.nxv8p0.i64(<vscale x 8 x ptr> %buckets, i64 %inc, <vscale x 8 x i1> %mask)
+ ret void
+}
+
+define void @histogram_i32_8_lane(ptr %base, <vscale x 8 x i32> %indices, i32 %inc, <vscale x 8 x i1> %mask) #0 {
+; CHECK-LABEL: histogram_i32_8_lane:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpklo p1.h, p0.b
+; CHECK-NEXT: mov z4.s, w1
+; CHECK-NEXT: ptrue p2.s
+; CHECK-NEXT: histcnt z2.s, p1/z, z0.s, z0.s
+; CHECK-NEXT: ld1w { z3.s }, p1/z, [x0, z0.s, sxtw #2]
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: mad z2.s, p2/m, z4.s, z3.s
+; CHECK-NEXT: st1w { z2.s }, p1, [x0, z0.s, sxtw #2]
+; CHECK-NEXT: histcnt z0.s, p0/z, z1.s, z1.s
+; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, z1.s, sxtw #2]
+; CHECK-NEXT: mad z0.s, p2/m, z4.s, z2.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, sxtw #2]
+; CHECK-NEXT: ret
+ %buckets = getelementptr i32, ptr %base, <vscale x 8 x i32> %indices
+ call void @llvm.experimental.vector.histogram.add.nxv8p0.i32(<vscale x 8 x ptr> %buckets, i32 %inc, <vscale x 8 x i1> %mask)
+ ret void
+}
+
+define void @histogram_i16_8_lane(ptr %base, <vscale x 8 x i32> %indices, i16 %inc, <vscale x 8 x i1> %mask) #0 {
+; CHECK-LABEL: histogram_i16_8_lane:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpklo p1.h, p0.b
+; CHECK-NEXT: mov z4.s, w1
+; CHECK-NEXT: ptrue p2.s
+; CHECK-NEXT: histcnt z2.s, p1/z, z0.s, z0.s
+; CHECK-NEXT: ld1h { z3.s }, p1/z, [x0, z0.s, sxtw #1]
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: mad z2.s, p2/m, z4.s, z3.s
+; CHECK-NEXT: st1h { z2.s }, p1, [x0, z0.s, sxtw #1]
+; CHECK-NEXT: histcnt z0.s, p0/z, z1.s, z1.s
+; CHECK-NEXT: ld1h { z2.s }, p0/z, [x0, z1.s, sxtw #1]
+; CHECK-NEXT: mad z0.s, p2/m, z4.s, z2.s
+; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw #1]
+; CHECK-NEXT: ret
+ %buckets = getelementptr i16, ptr %base, <vscale x 8 x i32> %indices
+ call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 %inc, <vscale x 8 x i1> %mask)
+ ret void
+}
+
+
attributes #0 = { "target-features"="+sve2" vscale_range(1, 16) }
>From 577383ce209661bb4402bff524083513eb75d01f Mon Sep 17 00:00:00 2001
From: Max Beck-Jones <max.beck-jones at arm.com>
Date: Tue, 20 Aug 2024 11:37:31 +0000
Subject: [PATCH 2/4] Move legalization to legalizer
Moves the splitting from a target specific DAG combine to LegalizeVectorTypes and adds promotion to LegalizeIntegerTypes. Also updates how the histogram intrinsic is custom lowered during legalize ops.
---
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 5 ++
.../SelectionDAG/LegalizeIntegerTypes.cpp | 11 +++++
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 28 +++++++++++
.../Target/AArch64/AArch64ISelLowering.cpp | 49 +++----------------
llvm/test/CodeGen/AArch64/sve2-histcnt.ll | 8 ++-
6 files changed, 58 insertions(+), 45 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f4b3d1a41c681b..3c94f01da0be43 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1241,6 +1241,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
+ case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(),
+ cast<MaskedHistogramSDNode>(Node)->getIndex().getValueType());
+ break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
Action = TLI.getCustomOperationAction(*Node);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 3635bc7a965804..98d6d8b1fc94b8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2037,6 +2037,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::EXPERIMENTAL_VP_SPLICE:
Res = PromoteIntOp_VP_SPLICE(N, OpNo);
break;
+ case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM:
+ Res = PromoteIntOp_VECTOR_HISTOGRAM(N, OpNo);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -2749,6 +2752,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo) {
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_VECTOR_HISTOGRAM(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1 && "Unexpected operand for promotion");
+ SmallVector<SDValue, 7> NewOps(N->ops());
+ NewOps[1] = GetPromotedInteger(N->getOperand(1));
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
//===----------------------------------------------------------------------===//
// Integer Result Expansion
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 3a49a8ff10860a..af763b6159aee7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -424,6 +424,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_VECTOR_HISTOGRAM(SDNode *N, unsigned OpNo);
void SExtOrZExtPromotedOperands(SDValue &LHS, SDValue &RHS);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -972,6 +973,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SplitVecOp_CMP(SDNode *N);
SDValue SplitVecOp_FP_TO_XINT_SAT(SDNode *N);
SDValue SplitVecOp_VP_CttzElements(SDNode *N);
+ SDValue SplitVecOp_VECTOR_HISTOGRAM(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 5a21ad7ac7e2cd..c3388b895f2e8d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3264,6 +3264,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
Res = SplitVecOp_VP_CttzElements(N);
break;
+ case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM:
+ Res = SplitVecOp_VECTOR_HISTOGRAM(N);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -4274,6 +4277,31 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_CttzElements(SDNode *N) {
DAG.getNode(ISD::ADD, DL, ResVT, VLo, ResHi));
}
+SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_HISTOGRAM(SDNode *N) {
+ MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
+ SDLoc DL(HG);
+ SDValue Chain = HG->getChain();
+ SDValue Inc = HG->getInc();
+ SDValue Mask = HG->getMask();
+ SDValue Ptr = HG->getBasePtr();
+ SDValue Index = HG->getIndex();
+ SDValue Scale = HG->getScale();
+ SDValue IntID = HG->getIntID();
+ EVT MemVT = HG->getMemoryVT();
+ MachineMemOperand *MMO = HG->getMemOperand();
+ ISD::MemIndexType IndexType = HG->getIndexType();
+
+ SDValue IndexLo, IndexHi, MaskLo, MaskHi;
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ SDValue HistogramOpsLo[] = {Chain, Inc, MaskLo, Ptr, IndexLo, Scale, IntID};
+ SDValue HChain = DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL,
+ HistogramOpsLo, MMO, IndexType);
+ SDValue HistogramOpsHi[] = {HChain, Inc, MaskHi, Ptr, IndexHi, Scale, IntID};
+ return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL,
+ HistogramOpsHi, MMO, IndexType);
+}
+
//===----------------------------------------------------------------------===//
// Result Vector Widening
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7c9b34b272f17d..5af07af38bdb7b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1128,8 +1128,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
- setTargetDAGCombine(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM);
-
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemset =
@@ -1779,10 +1777,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Histcnt is SVE2 only
if (Subtarget->hasSVE2()) {
- setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::Other,
+ setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::i8, Promote);
+ setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::i16, Promote);
+ setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::nxv4i32,
+ Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::nxv2i64,
Custom);
- setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::i8, Custom);
- setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::i16, Custom);
}
}
@@ -25436,41 +25436,6 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return NVCAST;
}
-static SDValue performHistogramCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG) {
- if (!DCI.isBeforeLegalize())
- return SDValue();
-
- MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
- SDLoc DL(HG);
- SDValue Chain = HG->getChain();
- SDValue Inc = HG->getInc();
- SDValue Mask = HG->getMask();
- SDValue Ptr = HG->getBasePtr();
- SDValue Index = HG->getIndex();
- SDValue Scale = HG->getScale();
- SDValue IntID = HG->getIntID();
- EVT MemVT = HG->getMemoryVT();
- EVT IndexVT = Index.getValueType();
- MachineMemOperand *MMO = HG->getMemOperand();
- ISD::MemIndexType IndexType = HG->getIndexType();
-
- if (IndexVT == MVT::nxv4i32 || IndexVT == MVT::nxv2i64)
- return SDValue();
-
- // Split vectors which are too wide
- SDValue IndexLo, IndexHi, MaskLo, MaskHi;
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
- SDValue HistogramOpsLo[] = {Chain, Inc, MaskLo, Ptr, IndexLo, Scale, IntID};
- SDValue HChain = DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL,
- HistogramOpsLo, MMO, IndexType);
- SDValue HistogramOpsHi[] = {HChain, Inc, MaskHi, Ptr, IndexHi, Scale, IntID};
- return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL,
- HistogramOpsHi, MMO, IndexType);
-}
-
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -25815,8 +25780,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performCTLZCombine(N, DAG, Subtarget);
case ISD::SCALAR_TO_VECTOR:
return performScalarToVectorCombine(N, DCI, DAG);
- case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM:
- return performHistogramCombine(N, DCI, DAG);
}
return SDValue();
}
@@ -28219,7 +28182,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_HISTOGRAM(SDValue Op,
EVT IndexVT = Index.getValueType();
LLVMContext &Ctx = *DAG.getContext();
ElementCount EC = IndexVT.getVectorElementCount();
- EVT MemVT = EVT::getVectorVT(Ctx, IncVT, EC);
+ EVT MemVT = EVT::getVectorVT(Ctx, HG->getMemoryVT(), EC);
EVT IncExtVT =
EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
EVT IncSplatVT = EVT::getVectorVT(Ctx, IncExtVT, EC);
diff --git a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
index 56d5eb13ab12e3..dd0b9639a8fc2f 100644
--- a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
@@ -132,8 +132,10 @@ define void @histogram_i16_literal_1(ptr %base, <vscale x 4 x i32> %indices, <vs
; CHECK-LABEL: histogram_i16_literal_1:
; CHECK: // %bb.0:
; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
+; CHECK-NEXT: mov z3.s, #1 // =0x1
; CHECK-NEXT: ld1h { z2.s }, p0/z, [x0, z0.s, sxtw #1]
-; CHECK-NEXT: add z1.s, z2.s, z1.s
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
; CHECK-NEXT: st1h { z1.s }, p0, [x0, z0.s, sxtw #1]
; CHECK-NEXT: ret
%buckets = getelementptr i16, ptr %base, <vscale x 4 x i32> %indices
@@ -145,8 +147,10 @@ define void @histogram_i16_literal_2(ptr %base, <vscale x 4 x i32> %indices, <vs
; CHECK-LABEL: histogram_i16_literal_2:
; CHECK: // %bb.0:
; CHECK-NEXT: histcnt z1.s, p0/z, z0.s, z0.s
+; CHECK-NEXT: mov z3.s, #2 // =0x2
; CHECK-NEXT: ld1h { z2.s }, p0/z, [x0, z0.s, sxtw #1]
-; CHECK-NEXT: adr z1.s, [z2.s, z1.s, lsl #1]
+; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: mad z1.s, p1/m, z3.s, z2.s
; CHECK-NEXT: st1h { z1.s }, p0, [x0, z0.s, sxtw #1]
; CHECK-NEXT: ret
%buckets = getelementptr i16, ptr %base, <vscale x 4 x i32> %indices
>From e703b0cec1a6b04aa90bfbf22da61b5cd6cfc2c8 Mon Sep 17 00:00:00 2001
From: Max Beck-Jones <max.beck-jones at arm.com>
Date: Wed, 28 Aug 2024 16:41:29 +0000
Subject: [PATCH 3/4] fixup: Remove unnecessary setOperationActions
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5af07af38bdb7b..2e36d7df061c47 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1777,8 +1777,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Histcnt is SVE2 only
if (Subtarget->hasSVE2()) {
- setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::i8, Promote);
- setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::i16, Promote);
setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::nxv4i32,
Custom);
setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::nxv2i64,
>From a914c3c7b3e508b861d30e72b3954688fbd53124 Mon Sep 17 00:00:00 2001
From: Max Beck-Jones <max.beck-jones at arm.com>
Date: Wed, 28 Aug 2024 17:27:22 +0000
Subject: [PATCH 4/4] fixup: Review nits
---
.../SelectionDAG/LegalizeVectorTypes.cpp | 19 ++++++++-----------
1 file changed, 8 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index c3388b895f2e8d..10c186b1ac2ee8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4280,11 +4280,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_CttzElements(SDNode *N) {
SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_HISTOGRAM(SDNode *N) {
MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
SDLoc DL(HG);
- SDValue Chain = HG->getChain();
SDValue Inc = HG->getInc();
- SDValue Mask = HG->getMask();
SDValue Ptr = HG->getBasePtr();
- SDValue Index = HG->getIndex();
SDValue Scale = HG->getScale();
SDValue IntID = HG->getIntID();
EVT MemVT = HG->getMemoryVT();
@@ -4292,14 +4289,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_HISTOGRAM(SDNode *N) {
ISD::MemIndexType IndexType = HG->getIndexType();
SDValue IndexLo, IndexHi, MaskLo, MaskHi;
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
- SDValue HistogramOpsLo[] = {Chain, Inc, MaskLo, Ptr, IndexLo, Scale, IntID};
- SDValue HChain = DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL,
- HistogramOpsLo, MMO, IndexType);
- SDValue HistogramOpsHi[] = {HChain, Inc, MaskHi, Ptr, IndexHi, Scale, IntID};
- return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL,
- HistogramOpsHi, MMO, IndexType);
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(HG->getIndex(), DL);
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(HG->getMask(), DL);
+ SDValue OpsLo[] = {HG->getChain(), Inc, MaskLo, Ptr, IndexLo, Scale, IntID};
+ SDValue Lo = DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL,
+ OpsLo, MMO, IndexType);
+ SDValue OpsHi[] = {Lo, Inc, MaskHi, Ptr, IndexHi, Scale, IntID};
+ return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, OpsHi,
+ MMO, IndexType);
}
//===----------------------------------------------------------------------===//
More information about the llvm-commits
mailing list