[llvm] 2b15c4a - [AArch64] Postcommit fixes for histogram intrinsic (#92095)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 14 07:16:47 PDT 2024
Author: Graham Hunter
Date: 2024-05-14T15:16:42+01:00
New Revision: 2b15c4a62be6ceab124cb2505ae8dc6a98ba6e7d
URL: https://github.com/llvm/llvm-project/commit/2b15c4a62be6ceab124cb2505ae8dc6a98ba6e7d
DIFF: https://github.com/llvm/llvm-project/commit/2b15c4a62be6ceab124cb2505ae8dc6a98ba6e7d.diff
LOG: [AArch64] Postcommit fixes for histogram intrinsic (#92095)
A buildbot with expensive checks enabled flagged some problems with my patch. There was also a post-commit nit on the langref changes.
Added:
Modified:
llvm/docs/LangRef.rst
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
llvm/test/CodeGen/AArch64/sve2-histcnt.ll
Removed:
################################################################################
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 06809f8bf445d..e2f4d8bfcaeed 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -19143,8 +19143,8 @@ will be on any later loop iteration.
This intrinsic will only return 0 if the input count is also 0. A non-zero input
count will produce a non-zero result.
-'``llvm.experimental.vector.histogram.*``' Intrinsics
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+'``llvm.experimental.vector.histogram.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
These intrinsics are overloaded.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 33cc8ffaf85d5..f6d80f78910cf 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -27395,9 +27395,11 @@ SDValue AArch64TargetLowering::LowerVECTOR_HISTOGRAM(SDValue Op,
SDValue IncSplat = DAG.getSplatVector(MemVT, DL, Inc);
SDValue Ops[] = {Chain, PassThru, Mask, Ptr, Index, Scale};
- // Set the MMO to load only, rather than load|store.
- MachineMemOperand *GMMO = HG->getMemOperand();
- GMMO->setFlags(MachineMemOperand::MOLoad);
+ MachineMemOperand *MMO = HG->getMemOperand();
+ // Create an MMO for the gather, without load|store flags.
+ MachineMemOperand *GMMO = DAG.getMachineFunction().getMachineMemOperand(
+ MMO->getPointerInfo(), MachineMemOperand::MOLoad, MMO->getSize(),
+ MMO->getAlign(), MMO->getAAInfo());
ISD::MemIndexType IndexType = HG->getIndexType();
SDValue Gather =
DAG.getMaskedGather(DAG.getVTList(MemVT, MVT::Other), MemVT, DL, Ops,
@@ -27412,10 +27414,10 @@ SDValue AArch64TargetLowering::LowerVECTOR_HISTOGRAM(SDValue Op,
SDValue Mul = DAG.getNode(ISD::MUL, DL, MemVT, HistCnt, IncSplat);
SDValue Add = DAG.getNode(ISD::ADD, DL, MemVT, Gather, Mul);
- // Create a new MMO for the scatter.
+ // Create an MMO for the scatter, without load|store flags.
MachineMemOperand *SMMO = DAG.getMachineFunction().getMachineMemOperand(
- GMMO->getPointerInfo(), MachineMemOperand::MOStore, GMMO->getSize(),
- GMMO->getAlign(), GMMO->getAAInfo());
+ MMO->getPointerInfo(), MachineMemOperand::MOStore, MMO->getSize(),
+ MMO->getAlign(), MMO->getAAInfo());
SDValue ScatterOps[] = {GChain, Add, Mask, Ptr, Index, Scale};
SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MemVT, DL,
diff --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
index de80fa2c05023..8f820a3bba2b3 100644
--- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
+++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
@@ -1006,7 +1006,7 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
CI->getArgOperand(1)->getType()))
return false;
scalarizeMaskedVectorHistogram(DL, CI, DTU, ModifiedDT);
- break;
+ return true;
case Intrinsic::masked_load:
// Scalarize unsupported vector masked load
if (TTI.isLegalMaskedLoad(
diff --git a/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll b/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
index 45f1429a810a0..e59d9098a30d6 100644
--- a/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
+++ b/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=aarch64 < %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs < %s -o - | FileCheck %s
;; This test exercises the default lowering of the histogram to scalarized code.
diff --git a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
index 557a42116cdb0..db164e288abde 100644
--- a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=aarch64 < %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs < %s -o - | FileCheck %s
define void @histogram_i64(<vscale x 2 x ptr> %buckets, i64 %inc, <vscale x 2 x i1> %mask) #0 {
; CHECK-LABEL: histogram_i64:
More information about the llvm-commits
mailing list