[llvm] 2b15c4a - [AArch64] Postcommit fixes for histogram intrinsic (#92095)

Tue May 14 07:16:47 PDT 2024

Author: Graham Hunter
Date: 2024-05-14T15:16:42+01:00
New Revision: 2b15c4a62be6ceab124cb2505ae8dc6a98ba6e7d

URL: https://github.com/llvm/llvm-project/commit/2b15c4a62be6ceab124cb2505ae8dc6a98ba6e7d
DIFF: https://github.com/llvm/llvm-project/commit/2b15c4a62be6ceab124cb2505ae8dc6a98ba6e7d.diff

LOG: [AArch64] Postcommit fixes for histogram intrinsic (#92095)

A buildbot with expensive checks enabled flagged some problems with my patch. There was also a post-commit nit on the langref changes.

Added: 
    

Modified: 
    llvm/docs/LangRef.rst
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
    llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
    llvm/test/CodeGen/AArch64/sve2-histcnt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 06809f8bf445d..e2f4d8bfcaeed 100644

--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -19143,8 +19143,8 @@ will be on any later loop iteration.
 This intrinsic will only return 0 if the input count is also 0. A non-zero input
 count will produce a non-zero result.
 
-'``llvm.experimental.vector.histogram.*``' Intrinsics
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+'``llvm.experimental.vector.histogram.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 These intrinsics are overloaded.
 

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 33cc8ffaf85d5..f6d80f78910cf 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -27395,9 +27395,11 @@ SDValue AArch64TargetLowering::LowerVECTOR_HISTOGRAM(SDValue Op,
   SDValue IncSplat = DAG.getSplatVector(MemVT, DL, Inc);
   SDValue Ops[] = {Chain, PassThru, Mask, Ptr, Index, Scale};
 
-  // Set the MMO to load only, rather than load|store.
-  MachineMemOperand *GMMO = HG->getMemOperand();
-  GMMO->setFlags(MachineMemOperand::MOLoad);
+  MachineMemOperand *MMO = HG->getMemOperand();
+  // Create an MMO for the gather, without load|store flags.
+  MachineMemOperand *GMMO = DAG.getMachineFunction().getMachineMemOperand(
+      MMO->getPointerInfo(), MachineMemOperand::MOLoad, MMO->getSize(),
+      MMO->getAlign(), MMO->getAAInfo());
   ISD::MemIndexType IndexType = HG->getIndexType();
   SDValue Gather =
       DAG.getMaskedGather(DAG.getVTList(MemVT, MVT::Other), MemVT, DL, Ops,
@@ -27412,10 +27414,10 @@ SDValue AArch64TargetLowering::LowerVECTOR_HISTOGRAM(SDValue Op,
   SDValue Mul = DAG.getNode(ISD::MUL, DL, MemVT, HistCnt, IncSplat);
   SDValue Add = DAG.getNode(ISD::ADD, DL, MemVT, Gather, Mul);
 
-  // Create a new MMO for the scatter.
+  // Create an MMO for the scatter, without load|store flags.
   MachineMemOperand *SMMO = DAG.getMachineFunction().getMachineMemOperand(
-      GMMO->getPointerInfo(), MachineMemOperand::MOStore, GMMO->getSize(),
-      GMMO->getAlign(), GMMO->getAAInfo());
+      MMO->getPointerInfo(), MachineMemOperand::MOStore, MMO->getSize(),
+      MMO->getAlign(), MMO->getAAInfo());
 
   SDValue ScatterOps[] = {GChain, Add, Mask, Ptr, Index, Scale};
   SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MemVT, DL,

diff  --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
index de80fa2c05023..8f820a3bba2b3 100644
--- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
+++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
@@ -1006,7 +1006,7 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
                                            CI->getArgOperand(1)->getType()))
         return false;
       scalarizeMaskedVectorHistogram(DL, CI, DTU, ModifiedDT);
-      break;
+      return true;
     case Intrinsic::masked_load:
       // Scalarize unsupported vector masked load
       if (TTI.isLegalMaskedLoad(

diff  --git a/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll b/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
index 45f1429a810a0..e59d9098a30d6 100644
--- a/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
+++ b/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=aarch64 < %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs < %s -o - | FileCheck %s
 
 ;; This test exercises the default lowering of the histogram to scalarized code.
 

diff  --git a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
index 557a42116cdb0..db164e288abde 100644
--- a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=aarch64 < %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs < %s -o - | FileCheck %s
 
 define void @histogram_i64(<vscale x 2 x ptr> %buckets, i64 %inc, <vscale x 2 x i1> %mask) #0 {
 ; CHECK-LABEL: histogram_i64: