[clang] [llvm] [Draft][LLVM] Refine MemoryEffect handling for target-specific intrinsics (PR #155590)

via cfe-commits cfe-commits at lists.llvm.org
Fri Aug 29 01:48:48 PDT 2025


https://github.com/CarolineConcatto updated https://github.com/llvm/llvm-project/pull/155590

>From 0f25eb4c385e1f870910f05aca1d5cd5c631afcd Mon Sep 17 00:00:00 2001
From: CarolineConcatto <caroline.concatto at arm.com>
Date: Tue, 1 Jul 2025 11:48:55 +0000
Subject: [PATCH 1/5] [RFC] Extend MemoryEffects to Support Target-Specific
 Memory Locations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch introduces preliminary support for additional memory locations,
such as FPMR and ZA, needed to model AArch64 architectural registers as
memory dependencies.

Currently, these locations are not yet target-specific. The goal is to enable
the compiler to express read/write effects on these resources.

What This Patch Does:
  Adds two new memory locations: FPMR and ZA, intended to represent
AArch64-specific inaccessible memory types.

Current Limitations:
  These new locations are not yet target-specific in the type-safe sense,
they are globally visible and hardcoded.
  There is no mechanism yet to associate a memory location with its
corresponding target (e.g., AArch64 vs RISCV).
  No changes are made yet to bitcode serialization, parser support, or alias
analysis behavior.

This patch is not functionally complete — it is a structural prototype to
solicit feedback on the direction and I would like some suggestion on
how to proceed.
---
 llvm/include/llvm/AsmParser/LLToken.h         |  2 +
 llvm/include/llvm/IR/Intrinsics.td            | 11 ++++
 llvm/include/llvm/Support/ModRef.h            | 46 +++++++++++++++-
 llvm/include/llvm/TableGen/Record.h           |  3 ++
 llvm/lib/AsmParser/LLLexer.cpp                |  2 +
 llvm/lib/AsmParser/LLParser.cpp               | 32 ++++++-----
 llvm/lib/IR/Attributes.cpp                    | 13 +++++
 llvm/lib/Support/ModRef.cpp                   |  9 ++++
 llvm/lib/TableGen/Record.cpp                  | 15 ++++++
 llvm/lib/Transforms/IPO/FunctionAttrs.cpp     |  3 ++
 llvm/test/Assembler/memory-attribute.ll       | 25 +++++++++
 llvm/test/Bitcode/attributes.ll               |  1 -
 llvm/test/TableGen/intrinsic-attrs-fp8.td     | 54 +++++++++++++++++++
 llvm/unittests/Support/ModRefTest.cpp         |  3 +-
 .../TableGen/Basic/CodeGenIntrinsics.cpp      | 14 ++++-
 15 files changed, 216 insertions(+), 17 deletions(-)
 create mode 100644 llvm/test/TableGen/intrinsic-attrs-fp8.td

diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index c7e4bdf3ff811..c08eb99c1f5b2 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -202,6 +202,8 @@ enum Kind {
   kw_readwrite,
   kw_argmem,
   kw_inaccessiblemem,
+  kw_aarch64_fpmr,
+  kw_aarch64_za,
   kw_errnomem,
 
   // Legacy attributes:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index bd6f94ac1286c..ad1b0b462be37 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -49,6 +49,17 @@ def IntrArgMemOnly : IntrinsicProperty;
 // accessible by the module being compiled. This is a weaker form of IntrNoMem.
 def IntrInaccessibleMemOnly : IntrinsicProperty;
 
+
+
+class IntrinsicMemoryLocation;
+// This should be added in the Target, but once in IntrinsicsAArch64.td
+// It complains error: "Variable not defined: 'AArch64_FPMR'"
+def AArch64_FPMR : IntrinsicMemoryLocation;
+def AArch64_ZA:   IntrinsicMemoryLocation;
+// IntrInaccessible{Read|Write}MemOnly needs to set Location
+class IntrInaccessibleReadMemOnly<IntrinsicMemoryLocation idx> : IntrinsicProperty{IntrinsicMemoryLocation Loc=idx;}
+class IntrInaccessibleWriteMemOnly<IntrinsicMemoryLocation idx> : IntrinsicProperty{IntrinsicMemoryLocation Loc=idx;}
+
 // IntrInaccessibleMemOrArgMemOnly -- This intrinsic only accesses memory that
 // its pointer-typed arguments point to or memory that is not accessible
 // by the module being compiled. This is a weaker form of IntrArgMemOnly.
diff --git a/llvm/include/llvm/Support/ModRef.h b/llvm/include/llvm/Support/ModRef.h
index 71f3b5bcb9c2b..0de2b02e4e05a 100644
--- a/llvm/include/llvm/Support/ModRef.h
+++ b/llvm/include/llvm/Support/ModRef.h
@@ -56,6 +56,11 @@ enum class ModRefInfo : uint8_t {
 /// Debug print ModRefInfo.
 LLVM_ABI raw_ostream &operator<<(raw_ostream &OS, ModRefInfo MR);
 
+enum class InaccessibleTargetMemLocation {
+  AARCH64_FPMR = 3,
+  AARCH64_ZA = 4,
+};
+
 /// The locations at which a function might access memory.
 enum class IRMemLocation {
   /// Access to memory via argument pointers.
@@ -65,7 +70,7 @@ enum class IRMemLocation {
   /// Errno memory.
   ErrnoMem = 2,
   /// Any other memory.
-  Other = 3,
+  Other = 5,
 
   /// Helpers to iterate all locations in the MemoryEffectsBase class.
   First = ArgMem,
@@ -152,6 +157,40 @@ template <typename LocationEnum> class MemoryEffectsBase {
     return MemoryEffectsBase(Location::Other, MR);
   }
 
+  /// Create MemoryEffectsBase that can only read inaccessible memory.
+  static MemoryEffectsBase
+  inaccessibleReadMemOnly(Location Loc = Location::InaccessibleMem) {
+    return MemoryEffectsBase(Loc, ModRefInfo::Ref);
+  }
+
+  /// Create MemoryEffectsBase that can only write inaccessible memory.
+  static MemoryEffectsBase
+  inaccessibleWriteMemOnly(Location Loc = Location::InaccessibleMem) {
+    return MemoryEffectsBase(Loc, ModRefInfo::Mod);
+  }
+
+  /// Checks if only target-specific memory locations are set.
+  /// Ignores standard locations like ArgMem or InaccessibleMem.
+  /// Needed because `Data` may be non-zero by default unless explicitly
+  /// cleared.
+  bool onlyAccessTargetMemoryLocation() {
+    MemoryEffectsBase ME = *this;
+    for (unsigned I = static_cast<int>(LocationEnum::ErrnoMem);
+         I < static_cast<int>(LocationEnum::Last); I++)
+      ME = ME.getWithoutLoc(static_cast<IRMemLocation>(I));
+    return ME.doesNotAccessMemory();
+  }
+
+  /// Create MemoryEffectsBase that can only access Target Memory Locations
+  static MemoryEffectsBase
+  setTargetMemLocationModRef(ModRefInfo MR = ModRefInfo::NoModRef) {
+    MemoryEffectsBase FRMB = none();
+    for (unsigned I = static_cast<int>(LocationEnum::ErrnoMem);
+         I < static_cast<int>(LocationEnum::Last); I++)
+      FRMB.setModRef(static_cast<Location>(I), MR);
+    return FRMB;
+  }
+
   /// Create MemoryEffectsBase that can only access inaccessible or argument
   /// memory.
   static MemoryEffectsBase
@@ -178,6 +217,11 @@ template <typename LocationEnum> class MemoryEffectsBase {
     return MemoryEffectsBase(Data);
   }
 
+  bool isTargetMemLoc(IRMemLocation Loc) {
+    return static_cast<unsigned>(Loc) >
+           static_cast<unsigned>(Location::ErrnoMem);
+  }
+
   /// Convert MemoryEffectsBase into an encoded integer value (used by memory
   /// attribute).
   uint32_t toIntValue() const {
diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index a2b86eb8e7cad..5aeb331c49c9b 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -25,6 +25,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ModRef.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/TrailingObjects.h"
@@ -1961,6 +1962,8 @@ class Record {
   /// value is not the right type.
   int64_t getValueAsInt(StringRef FieldName) const;
 
+  llvm::IRMemLocation getLocationTypeAsInt(StringRef FieldName) const;
+
   /// This method looks up the specified field and returns its value as an Dag,
   /// throwing an exception if the field does not exist or if the value is not
   /// the right type.
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index ce813e1d7b1c4..c086f9f9585a2 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -701,6 +701,8 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(write);
   KEYWORD(readwrite);
   KEYWORD(argmem);
+  KEYWORD(aarch64_fpmr);
+  KEYWORD(aarch64_za);
   KEYWORD(inaccessiblemem);
   KEYWORD(errnomem);
   KEYWORD(argmemonly);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index b7f6950f679ef..abde2993bb048 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1666,6 +1666,25 @@ static bool upgradeMemoryAttr(MemoryEffects &ME, lltok::Kind Kind) {
   }
 }
 
+static std::optional<MemoryEffects::Location> keywordToLoc(lltok::Kind Tok) {
+  switch (Tok) {
+  case lltok::kw_argmem:
+    return IRMemLocation::ArgMem;
+  case lltok::kw_inaccessiblemem:
+    return IRMemLocation::InaccessibleMem;
+  case lltok::kw_errnomem:
+    return IRMemLocation::ErrnoMem;
+  case lltok::kw_aarch64_fpmr:
+    return static_cast<IRMemLocation>(
+        llvm::InaccessibleTargetMemLocation::AARCH64_FPMR);
+  case lltok::kw_aarch64_za:
+    return static_cast<IRMemLocation>(
+        llvm::InaccessibleTargetMemLocation::AARCH64_ZA);
+  default:
+    return std::nullopt;
+  }
+}
+
 /// parseFnAttributeValuePairs
 ///   ::= <attr> | <attr> '=' <value>
 bool LLParser::parseFnAttributeValuePairs(AttrBuilder &B,
@@ -2510,19 +2529,6 @@ bool LLParser::parseAllocKind(AllocFnKind &Kind) {
   return false;
 }
 
-static std::optional<MemoryEffects::Location> keywordToLoc(lltok::Kind Tok) {
-  switch (Tok) {
-  case lltok::kw_argmem:
-    return IRMemLocation::ArgMem;
-  case lltok::kw_inaccessiblemem:
-    return IRMemLocation::InaccessibleMem;
-  case lltok::kw_errnomem:
-    return IRMemLocation::ErrnoMem;
-  default:
-    return std::nullopt;
-  }
-}
-
 static std::optional<ModRefInfo> keywordToModRef(lltok::Kind Tok) {
   switch (Tok) {
   case lltok::kw_none:
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index d1fbcb9e893a7..37e9d7c5c74db 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -640,6 +640,10 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
       if (MR == OtherMR)
         continue;
 
+      // Dont want to print Target Location if NoModRef
+      if (ME.isTargetMemLoc(Loc) && (MR == ModRefInfo::NoModRef))
+        continue;
+
       if (!First)
         OS << ", ";
       First = false;
@@ -656,6 +660,15 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
         break;
       case IRMemLocation::Other:
         llvm_unreachable("This is represented as the default access kind");
+      default: {
+        InaccessibleTargetMemLocation TargetLoc =
+            static_cast<InaccessibleTargetMemLocation>(Loc);
+        if (TargetLoc == InaccessibleTargetMemLocation::AARCH64_FPMR)
+          OS << "aarch64_fpmr: ";
+        if (TargetLoc == InaccessibleTargetMemLocation::AARCH64_ZA)
+          OS << "aarch64_za: ";
+        break;
+      }
       }
       OS << getModRefStr(MR);
     }
diff --git a/llvm/lib/Support/ModRef.cpp b/llvm/lib/Support/ModRef.cpp
index 2bb9bc945bd2e..dc0dafdbe7e49 100644
--- a/llvm/lib/Support/ModRef.cpp
+++ b/llvm/lib/Support/ModRef.cpp
@@ -49,6 +49,15 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, MemoryEffects ME) {
     case IRMemLocation::Other:
       OS << "Other: ";
       break;
+    default: {
+      InaccessibleTargetMemLocation TargetLoc =
+          static_cast<InaccessibleTargetMemLocation>(Loc);
+      if (TargetLoc == InaccessibleTargetMemLocation::AARCH64_FPMR)
+        OS << "AARCH64_FPMR: ";
+      if (TargetLoc == InaccessibleTargetMemLocation::AARCH64_ZA)
+        OS << "AARCH64_ZA: ";
+      break;
+    }
     }
     OS << ME.getModRef(Loc);
   });
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 1f3e5dc68f1d6..d114358266737 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -3102,6 +3102,21 @@ Record::getValueAsListOfDefs(StringRef FieldName) const {
   return Defs;
 }
 
+llvm::IRMemLocation Record::getLocationTypeAsInt(StringRef FieldName) const {
+  const Record *LocRec = getValueAsDef(FieldName);
+  StringRef Name = LocRec->getName();
+  if (Name == "AArch64_FPMR")
+    return static_cast<IRMemLocation>(
+        llvm::InaccessibleTargetMemLocation::AARCH64_FPMR);
+  else if (Name == "AArch64_ZA")
+    return static_cast<IRMemLocation>(
+        llvm::InaccessibleTargetMemLocation::AARCH64_ZA);
+  else if (Name == "InaccessibleMem")
+    return llvm::IRMemLocation::InaccessibleMem;
+  else
+    PrintFatalError(getLoc(), "unknown IRMemLocation: " + Name);
+}
+
 int64_t Record::getValueAsInt(StringRef FieldName) const {
   const RecordVal *R = getValue(FieldName);
   if (!R || !R->getValue())
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index f43202eea6306..49b822b3ef38e 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -143,6 +143,9 @@ static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc,
     ME |= MemoryEffects::argMemOnly(MR);
   ME |= MemoryEffects(IRMemLocation::ErrnoMem, MR);
   ME |= MemoryEffects(IRMemLocation::Other, MR);
+  // Should also set the other Target Memory Locations as MR.
+  // To compares with MemoryEffects::unknown() in addMemoryAttrs
+  ME |= MemoryEffects::setTargetMemLocationModRef(MR);
 }
 
 static void addArgLocs(MemoryEffects &ME, const CallBase *Call,
diff --git a/llvm/test/Assembler/memory-attribute.ll b/llvm/test/Assembler/memory-attribute.ll
index effd4ce7c4548..42f9b9f87e8b0 100644
--- a/llvm/test/Assembler/memory-attribute.ll
+++ b/llvm/test/Assembler/memory-attribute.ll
@@ -78,3 +78,28 @@ declare void @fn_argmem_read_inaccessiblemem_write()
 ; CHECK: @fn_argmem_read_inaccessiblemem_write_reordered()
 declare void @fn_argmem_read_inaccessiblemem_write_reordered()
     memory(inaccessiblemem: write, argmem: read)
+
+; CHECK: Function Attrs: memory(aarch64_za: write)
+; CHECK: @fn_inaccessiblemem_write_aarch64_za()
+declare void @fn_inaccessiblemem_write_aarch64_za()
+    memory(aarch64_za: write)
+
+; CHECK: Function Attrs: memory(aarch64_za: read)
+; CHECK: @fn_inaccessiblemem_read_aarch64_za()
+declare void @fn_inaccessiblemem_read_aarch64_za()
+    memory(aarch64_za: read)
+
+; CHECK: Function Attrs: memory(aarch64_fpmr: write)
+; CHECK: @fn_inaccessiblemem_write_aarch64_fpmr()
+declare void @fn_inaccessiblemem_write_aarch64_fpmr()
+    memory(aarch64_fpmr: write)
+
+; CHECK: Function Attrs: memory(aarch64_fpmr: read)
+; CHECK: @fn_inaccessiblemem_read_aarch64_fpmr()
+declare void @fn_inaccessiblemem_read_aarch64_fpmr()
+    memory(aarch64_fpmr: read)
+
+; CHECK: Function Attrs: memory(aarch64_fpmr: read, aarch64_za: write)
+; CHECK: @fn_inaccessiblemem_read_aarch64_fpmr_write_aarch64_za()
+declare void @fn_inaccessiblemem_read_aarch64_fpmr_write_aarch64_za()
+    memory(aarch64_fpmr: read, aarch64_za: write)
diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll
index 8c1a76365e1b4..8e72e7ade54c1 100644
--- a/llvm/test/Bitcode/attributes.ll
+++ b/llvm/test/Bitcode/attributes.ll
@@ -572,7 +572,6 @@ define void @dead_on_return(ptr dead_on_return %p) {
   ret void
 }
 
-; CHECK: attributes #0 = { noreturn }
 ; CHECK: attributes #1 = { nounwind }
 ; CHECK: attributes #2 = { memory(none) }
 ; CHECK: attributes #3 = { memory(read) }
diff --git a/llvm/test/TableGen/intrinsic-attrs-fp8.td b/llvm/test/TableGen/intrinsic-attrs-fp8.td
new file mode 100644
index 0000000000000..5aaba44edcc45
--- /dev/null
+++ b/llvm/test/TableGen/intrinsic-attrs-fp8.td
@@ -0,0 +1,54 @@
+// RUN: llvm-tblgen -gen-intrinsic-impl -I %p/../../include -DTEST_INTRINSICS_SUPPRESS_DEFS %s | FileCheck %s
+
+include "llvm/IR/Intrinsics.td"
+
+def int_aarch64_set_fpmr_2   : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleWriteMemOnly<AArch64_FPMR>]>;
+
+def int_aarch64_get_za_2   : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleReadMemOnly<AArch64_ZA>]>;
+
+def int_aarch64_get_fpmr_set_za   : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleWriteMemOnly<AArch64_ZA>]>;
+
+// CHECK:    static constexpr unsigned IntrinsicNameOffsetTable[] = {
+// CHECK-NEXT:  1, // not_intrinsic
+// CHECK-NEXT:  15, // llvm.aarch64.get.fpmr.set.za
+// CHECK-NEXT:  44, // llvm.aarch64.get.za.2
+// CHECK-NEXT:  66, // llvm.aarch64.set.fpmr.2
+
+// CHECK:   static AttributeSet getIntrinsicFnAttributeSet(LLVMContext &C, unsigned ID) {
+// CHECK-NEXT:  switch (ID) {
+// CHECK-NEXT:    default: llvm_unreachable("Invalid attribute set number");
+// CHECK-NEXT:  case 0:
+// CHECK-NEXT:    return AttributeSet::get(C, {
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoUnwind),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoCallback),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoSync),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoFree),
+// CHECK-NEXT:      Attribute::get(C, Attribute::WillReturn),
+// CHECK-NEXT:      // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, AARCH64_FPMR: Ref, AARCH64_ZA: Mod, Other: NoModRef
+// CHECK-NEXT:      Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(576)),
+// CHECK-NEXT:    });
+// CHECK-NEXT:  case 1:
+// CHECK-NEXT:    return AttributeSet::get(C, {
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoUnwind),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoCallback),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoSync),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoFree),
+// CHECK-NEXT:      Attribute::get(C, Attribute::WillReturn),
+// CHECK-NEXT:      // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, AARCH64_FPMR: NoModRef, AARCH64_ZA: Ref, Other: NoModRef
+// CHECK-NEXT:      Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(256)),
+// CHECK-NEXT:    });
+// CHECK-NEXT:  case 2:
+// CHECK-NEXT:    return AttributeSet::get(C, {
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoUnwind),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoCallback),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoSync),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoFree),
+// CHECK-NEXT:      Attribute::get(C, Attribute::WillReturn),
+// CHECK-NEXT:      // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, AARCH64_FPMR: Mod, AARCH64_ZA: NoModRef, Other: NoModRef
+// CHECK-NEXT:      Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(128)),
+
+// CHECK: static constexpr uint16_t IntrinsicsToAttributesMap[] = {
+// CHECK-NEXT:    0 << 8 | 0, // llvm.aarch64.get.fpmr.set.za
+// CHECK-NEXT:    1 << 8 | 0, // llvm.aarch64.get.za.2
+// CHECK-NEXT:    2 << 8 | 0, // llvm.aarch64.set.fpmr.2
+// CHECK-NEXT:};
diff --git a/llvm/unittests/Support/ModRefTest.cpp b/llvm/unittests/Support/ModRefTest.cpp
index 9c13908da44bb..7aa473ad20336 100644
--- a/llvm/unittests/Support/ModRefTest.cpp
+++ b/llvm/unittests/Support/ModRefTest.cpp
@@ -21,7 +21,8 @@ TEST(ModRefTest, PrintMemoryEffects) {
   raw_string_ostream OS(S);
   OS << MemoryEffects::none();
   EXPECT_EQ(S, "ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: "
-               "NoModRef, Other: NoModRef");
+               "NoModRef, AARCH64_FPMR: NoModRef, AARCH64_ZA: NoModRef, Other: "
+               "NoModRef");
 }
 
 } // namespace
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
index bc42efa3b2e9c..eb2d4de7e9320 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
@@ -374,7 +374,19 @@ void CodeGenIntrinsic::setProperty(const Record *R) {
     ME &= MemoryEffects::argMemOnly();
   else if (R->getName() == "IntrInaccessibleMemOnly")
     ME &= MemoryEffects::inaccessibleMemOnly();
-  else if (R->getName() == "IntrInaccessibleMemOrArgMemOnly")
+  else if (R->isSubClassOf("IntrInaccessibleReadMemOnly")) {
+    llvm::IRMemLocation Loc = R->getLocationTypeAsInt("Loc");
+    if (ME.onlyAccessTargetMemoryLocation())
+      ME = ME.getWithModRef(Loc, ModRefInfo::Ref);
+    else
+      ME &= MemoryEffects::inaccessibleReadMemOnly(Loc);
+  } else if (R->isSubClassOf("IntrInaccessibleWriteMemOnly")) {
+    llvm::IRMemLocation Loc = R->getLocationTypeAsInt("Loc");
+    if (ME.onlyAccessTargetMemoryLocation())
+      ME = ME.getWithModRef(Loc, ModRefInfo::Mod);
+    else
+      ME &= MemoryEffects::inaccessibleWriteMemOnly(Loc);
+  } else if (R->getName() == "IntrInaccessibleMemOrArgMemOnly")
     ME &= MemoryEffects::inaccessibleOrArgMemOnly();
   else if (R->getName() == "Commutative")
     isCommutative = true;

>From 9c7861b6fb256a769869ef1c0fe50f8efd019482 Mon Sep 17 00:00:00 2001
From: CarolineConcatto <caroline.concatto at arm.com>
Date: Wed, 13 Aug 2025 13:13:53 +0000
Subject: [PATCH 2/5] [LLVM]Add read and write inaccessible memory metadata

This patch adds IntrInaccessibleReadWriteMem metadata to allow to set
ModRef at the same time for a Location.

This patch depends on how we implement PR#148650.
---
 llvm/include/llvm/IR/Intrinsics.td            |  1 +
 llvm/include/llvm/Support/ModRef.h            |  6 ++
 llvm/test/TableGen/intrinsic-attrs-fp8.td     | 72 ++++++++++++++++---
 .../TableGen/Basic/CodeGenIntrinsics.cpp      |  8 ++-
 4 files changed, 78 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index ad1b0b462be37..33e89f88ef0d6 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -59,6 +59,7 @@ def AArch64_ZA:   IntrinsicMemoryLocation;
 // IntrInaccessible{Read|Write}MemOnly needs to set Location
 class IntrInaccessibleReadMemOnly<IntrinsicMemoryLocation idx> : IntrinsicProperty{IntrinsicMemoryLocation Loc=idx;}
 class IntrInaccessibleWriteMemOnly<IntrinsicMemoryLocation idx> : IntrinsicProperty{IntrinsicMemoryLocation Loc=idx;}
+class IntrInaccessibleReadWriteMem<IntrinsicMemoryLocation idx> : IntrinsicProperty{IntrinsicMemoryLocation Loc=idx;}
 
 // IntrInaccessibleMemOrArgMemOnly -- This intrinsic only accesses memory that
 // its pointer-typed arguments point to or memory that is not accessible
diff --git a/llvm/include/llvm/Support/ModRef.h b/llvm/include/llvm/Support/ModRef.h
index 0de2b02e4e05a..53d14717f486b 100644
--- a/llvm/include/llvm/Support/ModRef.h
+++ b/llvm/include/llvm/Support/ModRef.h
@@ -169,6 +169,12 @@ template <typename LocationEnum> class MemoryEffectsBase {
     return MemoryEffectsBase(Loc, ModRefInfo::Mod);
   }
 
+  /// Create MemoryEffectsBase that can read write inaccessible memory.
+  static MemoryEffectsBase
+  inaccessibleReadWriteMem(Location Loc = Location::InaccessibleMem) {
+    return MemoryEffectsBase(Loc, ModRefInfo::ModRef);
+  }
+
   /// Checks if only target-specific memory locations are set.
   /// Ignores standard locations like ArgMem or InaccessibleMem.
   /// Needed because `Data` may be non-zero by default unless explicitly
diff --git a/llvm/test/TableGen/intrinsic-attrs-fp8.td b/llvm/test/TableGen/intrinsic-attrs-fp8.td
index 5aaba44edcc45..c01f8983b36c3 100644
--- a/llvm/test/TableGen/intrinsic-attrs-fp8.td
+++ b/llvm/test/TableGen/intrinsic-attrs-fp8.td
@@ -8,11 +8,23 @@ def int_aarch64_get_za_2   : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInacc
 
 def int_aarch64_get_fpmr_set_za   : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleWriteMemOnly<AArch64_ZA>]>;
 
+def int_aarch64_get_set_fpmr  : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleReadWriteMem<AArch64_FPMR>]>;
+
+def int_aarch64_get_set_za  : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleReadWriteMem<AArch64_ZA>]>;
+
+def int_aarch64_get_set_fpmr_get_za  : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleReadWriteMem<AArch64_FPMR>, IntrInaccessibleReadMemOnly<AArch64_ZA>]>;
+
+def int_aarch64_get_fpmr_get_set_za  : DefaultAttrsIntrinsic<[], [llvm_i64_ty],  [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>]>;
+
 // CHECK:    static constexpr unsigned IntrinsicNameOffsetTable[] = {
 // CHECK-NEXT:  1, // not_intrinsic
-// CHECK-NEXT:  15, // llvm.aarch64.get.fpmr.set.za
-// CHECK-NEXT:  44, // llvm.aarch64.get.za.2
-// CHECK-NEXT:  66, // llvm.aarch64.set.fpmr.2
+// CHECK-NEXT:  15, // llvm.aarch64.get.fpmr.get.set.za
+// CHECK-NEXT:  48, // llvm.aarch64.get.fpmr.set.za
+// CHECK-NEXT:  77, // llvm.aarch64.get.set.fpmr
+// CHECK-NEXT:  103, // llvm.aarch64.get.set.fpmr.get.za
+// CHECK-NEXT:  136, // llvm.aarch64.get.set.za
+// CHECK-NEXT:  160, // llvm.aarch64.get.za.2
+// CHECK-NEXT:  182, // llvm.aarch64.set.fpmr.2
 
 // CHECK:   static AttributeSet getIntrinsicFnAttributeSet(LLVMContext &C, unsigned ID) {
 // CHECK-NEXT:  switch (ID) {
@@ -24,10 +36,50 @@ def int_aarch64_get_fpmr_set_za   : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [In
 // CHECK-NEXT:      Attribute::get(C, Attribute::NoSync),
 // CHECK-NEXT:      Attribute::get(C, Attribute::NoFree),
 // CHECK-NEXT:      Attribute::get(C, Attribute::WillReturn),
+// CHECK-NEXT:     // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, AARCH64_FPMR: Ref, AARCH64_ZA: ModRef, Other: NoModRef
+// CHECK-NEXT:     Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(832)),
+// CHECK-NEXT:    });
+// CHECK-NEXT:  case 1:
+// CHECK-NEXT:    return AttributeSet::get(C, {
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoUnwind),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoCallback),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoSync),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoFree),
+// CHECK-NEXT:      Attribute::get(C, Attribute::WillReturn),
 // CHECK-NEXT:      // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, AARCH64_FPMR: Ref, AARCH64_ZA: Mod, Other: NoModRef
 // CHECK-NEXT:      Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(576)),
 // CHECK-NEXT:    });
-// CHECK-NEXT:  case 1:
+// CHECK-NEXT:  case 2:
+// CHECK-NEXT:    return AttributeSet::get(C, {
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoUnwind),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoCallback),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoSync),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoFree),
+// CHECK-NEXT:      Attribute::get(C, Attribute::WillReturn),
+// CHECK-NEXT:      // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, AARCH64_FPMR: ModRef, AARCH64_ZA: NoModRef, Other: NoModRef
+// CHECK-NEXT:      Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(192)),
+// CHECK-NEXT:    });
+// CHECK-NEXT:  case 3:
+// CHECK-NEXT:    return AttributeSet::get(C, {
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoUnwind),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoCallback),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoSync),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoFree),
+// CHECK-NEXT:      Attribute::get(C, Attribute::WillReturn),
+// CHECK-NEXT:      // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, AARCH64_FPMR: ModRef, AARCH64_ZA: Ref, Other: NoModRef
+// CHECK-NEXT:      Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(448)),
+// CHECK-NEXT:    });
+// CHECK-NEXT:  case 4:
+// CHECK-NEXT:    return AttributeSet::get(C, {
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoUnwind),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoCallback),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoSync),
+// CHECK-NEXT:      Attribute::get(C, Attribute::NoFree),
+// CHECK-NEXT:      Attribute::get(C, Attribute::WillReturn),
+// CHECK-NEXT:      // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, AARCH64_FPMR: NoModRef, AARCH64_ZA: ModRef, Other: NoModRef
+// CHECK-NEXT:      Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(768)),
+// CHECK-NEXT:    });
+// CHECK-NEXT:  case 5:
 // CHECK-NEXT:    return AttributeSet::get(C, {
 // CHECK-NEXT:      Attribute::get(C, Attribute::NoUnwind),
 // CHECK-NEXT:      Attribute::get(C, Attribute::NoCallback),
@@ -37,7 +89,7 @@ def int_aarch64_get_fpmr_set_za   : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [In
 // CHECK-NEXT:      // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, AARCH64_FPMR: NoModRef, AARCH64_ZA: Ref, Other: NoModRef
 // CHECK-NEXT:      Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(256)),
 // CHECK-NEXT:    });
-// CHECK-NEXT:  case 2:
+// CHECK-NEXT:  case 6:
 // CHECK-NEXT:    return AttributeSet::get(C, {
 // CHECK-NEXT:      Attribute::get(C, Attribute::NoUnwind),
 // CHECK-NEXT:      Attribute::get(C, Attribute::NoCallback),
@@ -48,7 +100,11 @@ def int_aarch64_get_fpmr_set_za   : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [In
 // CHECK-NEXT:      Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(128)),
 
 // CHECK: static constexpr uint16_t IntrinsicsToAttributesMap[] = {
-// CHECK-NEXT:    0 << 8 | 0, // llvm.aarch64.get.fpmr.set.za
-// CHECK-NEXT:    1 << 8 | 0, // llvm.aarch64.get.za.2
-// CHECK-NEXT:    2 << 8 | 0, // llvm.aarch64.set.fpmr.2
+// CHECK-NEXT:    0 << 8 | 0, // llvm.aarch64.get.fpmr.get.set.za
+// CHECK-NEXT:    1 << 8 | 0, // llvm.aarch64.get.fpmr.set.za
+// CHECK-NEXT:    2 << 8 | 0, // llvm.aarch64.get.set.fpmr
+// CHECK-NEXT:    3 << 8 | 0, // llvm.aarch64.get.set.fpmr.get.za
+// CHECK-NEXT:    4 << 8 | 0, // llvm.aarch64.get.set.za
+// CHECK-NEXT:    5 << 8 | 0, // llvm.aarch64.get.za.2
+// CHECK-NEXT:    6 << 8 | 0, // llvm.aarch64.set.fpmr.2
 // CHECK-NEXT:};
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
index eb2d4de7e9320..7064ee6f3c4e9 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
@@ -374,7 +374,13 @@ void CodeGenIntrinsic::setProperty(const Record *R) {
     ME &= MemoryEffects::argMemOnly();
   else if (R->getName() == "IntrInaccessibleMemOnly")
     ME &= MemoryEffects::inaccessibleMemOnly();
-  else if (R->isSubClassOf("IntrInaccessibleReadMemOnly")) {
+  else if (R->isSubClassOf("IntrInaccessibleReadWriteMem")) {
+    llvm::IRMemLocation Loc = R->getLocationTypeAsInt("Loc");
+    if (ME.onlyAccessTargetMemoryLocation())
+      ME = ME.getWithModRef(Loc, ModRefInfo::ModRef);
+    else
+      ME &= MemoryEffects::inaccessibleReadWriteMem(Loc);
+  } else if (R->isSubClassOf("IntrInaccessibleReadMemOnly")) {
     llvm::IRMemLocation Loc = R->getLocationTypeAsInt("Loc");
     if (ME.onlyAccessTargetMemoryLocation())
       ME = ME.getWithModRef(Loc, ModRefInfo::Ref);

>From b50963ee92b4ff1428ee64098c0ea5c26fe26d27 Mon Sep 17 00:00:00 2001
From: CarolineConcatto <caroline.concatto at arm.com>
Date: Mon, 18 Aug 2025 15:11:25 +0000
Subject: [PATCH 3/5] [NFC] Update attributes for FP8 instructions using new
 target memory locations

---
 .../test/CodeGen/AArch64/attr-fp8-function.c  | 25 ++++--
 llvm/include/llvm/IR/IntrinsicsAArch64.td     | 79 ++++++++++++-------
 2 files changed, 67 insertions(+), 37 deletions(-)

diff --git a/clang/test/CodeGen/AArch64/attr-fp8-function.c b/clang/test/CodeGen/AArch64/attr-fp8-function.c
index 54bfd177bd809..62b910eafa4a7 100644
--- a/clang/test/CodeGen/AArch64/attr-fp8-function.c
+++ b/clang/test/CodeGen/AArch64/attr-fp8-function.c
@@ -18,20 +18,29 @@ svfloat16_t test_svcvtlt2_f16_mf8(svmfloat8_t zn, fpm_t fpm) __arm_streaming {
   return svcvtlt2_f16_mf8_fpm(zn, fpm);
 }
 
-// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR3:#.*]]
-// CHECK: declare <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16(<vscale x 16 x i8>) [[ATTR4:#.*]]
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR2:#.*]]
+// CHECK: declare <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16(<vscale x 16 x i8>) [[ATTR3:#.*]]
 
 
 // SME
+// With only fprm as inaccessible memory
 svfloat32_t test_svmlalltt_lane_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm) __arm_streaming {
   return svmlalltt_lane_f32_mf8_fpm(zda, zn, zm, 7, fpm);
 }
 
-// CHECK: declare <vscale x 4 x float> @llvm.aarch64.sve.fp8.fmlalltt.lane.nxv4f32(<vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32 immarg) [[ATTR4]]
+// CHECK: declare <vscale x 4 x float> @llvm.aarch64.sve.fp8.fmlalltt.lane.nxv4f32(<vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32 immarg) [[ATTR3:#.*]]
 
-// CHECK: declare <16 x i8> @llvm.aarch64.neon.fp8.fcvtn.v16i8.v8f16(<8 x half>, <8 x half>) [[ATTR4]]
+// With fpmr and za as incaccessible memory
+void test_svdot_lane_za32_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr)  __arm_streaming __arm_inout("za") {
+  svdot_lane_za32_mf8_vg1x2_fpm(slice, zn, zm, 3, fpmr);
+}
+
+// CHECK: declare void @llvm.aarch64.sme.fp8.fdot.lane.za32.vg1x2(i32, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32 immarg) [[ATTR5:#.*]]
+// CHECK: declare <16 x i8> @llvm.aarch64.neon.fp8.fcvtn.v16i8.v8f16(<8 x half>, <8 x half>) [[ATTR3]]
 
-// CHECK: attributes [[ATTR1:#.*]] = {{{.*}}} 
-// CHECK: attributes [[ATTR2:#.*]] = {{{.*}}}
-// CHECK: attributes [[ATTR3]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
-// CHECK: attributes [[ATTR4]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: read) }
+// CHECK: attributes [[ATTR0:#.*]] = {{{.*}}}
+// CHECK: attributes [[ATTR1:#.*]] = {{{.*}}}
+// CHECK: attributes [[ATTR2]] = { nocallback nofree nosync nounwind willreturn memory(aarch64_fpmr: write) }
+// CHECK: attributes [[ATTR3]] = { nocallback nofree nosync nounwind willreturn memory(aarch64_fpmr: read) }
+// CHECK: attributes [[ATTR4:#.*]] = {{{.*}}}
+// CHECK: attributes [[ATTR5:#.*]] = { nocallback nofree nosync nounwind willreturn memory(aarch64_fpmr: read, aarch64_za: readwrite) }
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index ca6e2128812f7..3aaf52b981eb0 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -761,7 +761,7 @@ let TargetPrefix = "aarch64" in {
   class RNDR_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_i64_ty, llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects]>;
   class FPMR_Set_Intrinsic
-    : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>;
+    : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleWriteMemOnly<AArch64_FPMR>]>;
 }
 
 // FP environment registers.
@@ -999,7 +999,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat
 
   // Conversions
   class AdvSIMD_FP8_1VectorArg_Long_Intrinsic
-    : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrReadMem, IntrInaccessibleMemOnly]>;
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;
 
   def int_aarch64_neon_fp8_cvtl1   : AdvSIMD_FP8_1VectorArg_Long_Intrinsic;
   def int_aarch64_neon_fp8_cvtl2   : AdvSIMD_FP8_1VectorArg_Long_Intrinsic;
@@ -1008,13 +1008,13 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [llvm_anyvector_ty,
                              LLVMMatchType<1>],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;
   def int_aarch64_neon_fp8_fcvtn2
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [LLVMMatchType<0>,
                              llvm_anyvector_ty,
                              LLVMMatchType<1>],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;
 
   // Dot-product
   class AdvSIMD_FP8_DOT_Intrinsic
@@ -1022,14 +1022,14 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat
                             [LLVMMatchType<0>,
                              llvm_anyvector_ty,
                              LLVMMatchType<1>],
-                             [IntrReadMem, IntrInaccessibleMemOnly]>;
+                             [IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;
   class AdvSIMD_FP8_DOT_LANE_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [LLVMMatchType<0>,
                              llvm_anyvector_ty,
                              llvm_v16i8_ty,
                              llvm_i32_ty],
-                             [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
+                             [IntrInaccessibleReadMemOnly<AArch64_FPMR>, ImmArg<ArgIndex<3>>]>;
 
   def int_aarch64_neon_fp8_fdot2 : AdvSIMD_FP8_DOT_Intrinsic;
   def int_aarch64_neon_fp8_fdot2_lane : AdvSIMD_FP8_DOT_LANE_Intrinsic;
@@ -1044,7 +1044,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat
                             [LLVMMatchType<0>,
                              llvm_v16i8_ty,
                              llvm_v16i8_ty],
-                             [IntrReadMem, IntrInaccessibleMemOnly]>;
+                             [IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;
 
   class AdvSIMD_FP8_FMLA_LANE_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
@@ -1052,7 +1052,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat
                              llvm_v16i8_ty,
                              llvm_v16i8_ty,
                              llvm_i32_ty],
-                             [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
+                             [IntrInaccessibleReadMemOnly<AArch64_FPMR>, ImmArg<ArgIndex<3>>]>;
 
   def int_aarch64_neon_fp8_fmlalb : AdvSIMD_FP8_FMLA_Intrinsic;
   def int_aarch64_neon_fp8_fmlalt : AdvSIMD_FP8_FMLA_Intrinsic;
@@ -3070,6 +3070,12 @@ let TargetPrefix = "aarch64" in {
           llvm_anyvector_ty,
           LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
 
+ class SME_FP8_OuterProduct_QuarterTile_Single_Single
+      : DefaultAttrsIntrinsic<[],
+          [llvm_i32_ty,
+          llvm_anyvector_ty,
+          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>, IntrHasSideEffects]>;
+
   class SME_OuterProduct_QuarterTile_Single_Multi
       : DefaultAttrsIntrinsic<[],
           [llvm_i32_ty,
@@ -3077,6 +3083,13 @@ let TargetPrefix = "aarch64" in {
           LLVMMatchType<0>,
           LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
 
+  class SME_FP8_OuterProduct_QuarterTile_Single_Multi
+      : DefaultAttrsIntrinsic<[],
+          [llvm_i32_ty,
+          llvm_anyvector_ty,
+          LLVMMatchType<0>,
+          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>, IntrHasSideEffects]>;
+
   class SME_OuterProduct_QuarterTile_Multi_Multi
       : DefaultAttrsIntrinsic<[],
           [llvm_i32_ty,
@@ -3085,6 +3098,14 @@ let TargetPrefix = "aarch64" in {
           LLVMMatchType<0>,
           LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
 
+  class SME_FP8_OuterProduct_QuarterTile_Multi_Multi
+      : DefaultAttrsIntrinsic<[],
+          [llvm_i32_ty,
+          llvm_anyvector_ty,
+          LLVMMatchType<0>,
+          LLVMMatchType<0>,
+          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>, IntrHasSideEffects]>;
+
   // 2-way and 4-way multi-vector signed/unsigned Quarter Tile Quarter Product A/S
   foreach mode = ["s", "a"] in {
     foreach za = ["", "_za64"] in {
@@ -3127,10 +3148,10 @@ let TargetPrefix = "aarch64" in {
 
   // 16 and 32 bit multi-vector floating point 8 Quarter Tile Quarter Product
   foreach za = ["za16", "za32"] in {
-    def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_OuterProduct_QuarterTile_Single_Single;
-    def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_OuterProduct_QuarterTile_Single_Multi;
-    def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_OuterProduct_QuarterTile_Single_Multi;
-    def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_OuterProduct_QuarterTile_Multi_Multi;
+    def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_FP8_OuterProduct_QuarterTile_Single_Single;
+    def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_FP8_OuterProduct_QuarterTile_Single_Multi;
+    def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_FP8_OuterProduct_QuarterTile_Single_Multi;
+    def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_FP8_OuterProduct_QuarterTile_Multi_Multi;
   }
 
   class SME_AddVectorToTile_Intrinsic
@@ -4027,7 +4048,7 @@ let TargetPrefix = "aarch64" in {
   class SVE2_FP8_Cvt
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [llvm_nxv16i8_ty],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;
 
   def int_aarch64_sve_fp8_cvt1   : SVE2_FP8_Cvt;
   def int_aarch64_sve_fp8_cvt2   : SVE2_FP8_Cvt;
@@ -4038,7 +4059,7 @@ let TargetPrefix = "aarch64" in {
   class SVE2_FP8_Narrow_Cvt
     : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
                             [llvm_anyvector_ty, LLVMMatchType<0>],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;
   
   def int_aarch64_sve_fp8_cvtn  : SVE2_FP8_Narrow_Cvt;
   def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt;
@@ -4046,20 +4067,20 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sve_fp8_cvtnt
     : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
                             [llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;
 
   // Dot product
   class SVE2_FP8_FMLA_FDOT
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [LLVMMatchType<0>,
                              llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;
   
   class SVE2_FP8_FMLA_FDOT_Lane
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [LLVMMatchType<0>,
                              llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
-                            [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>, ImmArg<ArgIndex<3>>]>;
   
   def int_aarch64_sve_fp8_fdot      : SVE2_FP8_FMLA_FDOT;
   def int_aarch64_sve_fp8_fdot_lane : SVE2_FP8_FMLA_FDOT_Lane;
@@ -4086,69 +4107,69 @@ let TargetPrefix = "aarch64" in {
   class SVE2_FP8_CVT_X2_Single_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                             [llvm_nxv16i8_ty],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;
 
   class SVE2_FP8_CVT_Single_X4_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
                             [llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;
 
   class SME_FP8_OuterProduct_Intrinsic
       : DefaultAttrsIntrinsic<[],
           [llvm_i32_ty,
           llvm_nxv16i1_ty, llvm_nxv16i1_ty,
           llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-          [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly]>;
+          [ImmArg<ArgIndex<0>>, IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>]>;
 
   class SME_FP8_ZA_LANE_VGx1_Intrinsic
    : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                llvm_nxv16i8_ty,
                                llvm_nxv16i8_ty,
                                llvm_i32_ty],
-                          [IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
+                          [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>, ImmArg<ArgIndex<3>>]>;
 
   class SME_FP8_ZA_LANE_VGx2_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty,
                                 llvm_i32_ty],
-                            [IntrInaccessibleMemOnly, ImmArg<ArgIndex<4>>]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>, ImmArg<ArgIndex<4>>]>;
 
   class SME_FP8_ZA_LANE_VGx4_Intrinsic
    : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty,
                                 llvm_i32_ty],
-                            [IntrInaccessibleMemOnly, ImmArg<ArgIndex<6>>]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>, ImmArg<ArgIndex<6>>]>;
   class SME_FP8_ZA_SINGLE_VGx1_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>]>;
 
   class SME_FP8_ZA_SINGLE_VGx2_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>]>;
 
   class SME_FP8_ZA_SINGLE_VGx4_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty],
-                              [IntrInaccessibleMemOnly]>;
+                              [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>]>;
 
   class SME_FP8_ZA_MULTI_VGx2_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>]>;
 
   class SME_FP8_ZA_MULTI_VGx4_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>, IntrInaccessibleReadWriteMem<AArch64_ZA>]>;
   //
   // CVT from FP8 to half-precision/BFloat16 multi-vector
   //
@@ -4167,7 +4188,7 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sve_fp8_cvt_x2
     : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
                             [llvm_anyvector_ty, LLVMMatchType<0>],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrInaccessibleReadMemOnly<AArch64_FPMR>]>;
 
   def int_aarch64_sve_fp8_cvt_x4  : SVE2_FP8_CVT_Single_X4_Intrinsic;
   def int_aarch64_sve_fp8_cvtn_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic;

>From fb5b7ca4a9a96badb509dbf350eb5e03a72717d4 Mon Sep 17 00:00:00 2001
From: CarolineConcatto <caroline.concatto at arm.com>
Date: Mon, 14 Jul 2025 15:22:53 +0000
Subject: [PATCH 4/5] [Draft][LLVM] Refine MemoryEffect handling for
 target-specific intrinsics

Enable more precise alias and dependency analysis between calls when reasoning
about its  operation on the same target memory location.

The key motivation is to break unnecessary dependencies between calls when one
only reads from a target memory location if followed by a call that only modifies
it. If the second call does not access any other memory location, we conclude
that the two calls are independent.

For example:
```
  call void @llvm.aarch64.set.fpmr(i64)                 ; Call0
  call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(...) ; Call1
  call void @llvm.aarch64.set.fpmr(i64)                 ; Call2
```

Here, the dependency should exist only between Call0 (write) and Call1 (read).
Call1 and Call2 both touch the same target location, but since Call1 is a
read and Call2 is a write with no other side effects, they are
independent of each other..

The implementation modifies the MemoryEffects query by checking target-specific
memory locations (IRMemLocation) and relaxing Mod/Ref relations accordingly.
This allows the optimizer to avoid conservatively chaining dependencies across
otherwise independent target memory operations.

    This patch depends on how we implement PR#148650
---
 llvm/lib/Analysis/BasicAliasAnalysis.cpp      |  29 ++++
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp     |   4 +
 .../EarlyCSE/AArch64/fp8-target-memory.ll     | 156 ++++++++++++++++++
 3 files changed, 189 insertions(+)
 create mode 100644 llvm/test/Transforms/EarlyCSE/AArch64/fp8-target-memory.ll

diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 86a2edbd8bd41..a021a8f7aa512 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1065,6 +1065,35 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
                ? ModRefInfo::Mod
                : ModRefInfo::NoModRef;
 
+  MemoryEffects MECall1Loc = Call1->getMemoryEffects();
+  MemoryEffects MECall2Loc = Call2->getMemoryEffects();
+  ModRefInfo NewMR = ModRefInfo::NoModRef;
+  bool Changed = false;
+  for (unsigned TargetLoc =
+           (static_cast<unsigned>(IRMemLocation::ErrnoMem) + 1);
+       TargetLoc < static_cast<unsigned>(IRMemLocation::Last); ++TargetLoc) {
+    ModRefInfo LocCall1MR =
+        MECall1Loc.getModRef(static_cast<IRMemLocation>(TargetLoc));
+    if (LocCall1MR == ModRefInfo::NoModRef)
+      continue;
+    ModRefInfo LocCall2MR =
+        MECall2Loc.getModRef(static_cast<IRMemLocation>(TargetLoc));
+    if (LocCall2MR == ModRefInfo::NoModRef)
+      continue;
+    // Should be no relation between the two calls if  (Call1 == Ref) and
+    // (Call2 == Mod). And the one that Modifies(Call2) only changes one
+    // Target Memory Location.
+    // Example Call1 = sme.fdot.lane and Call2 = set.fpmr
+    if (LocCall1MR == ModRefInfo::Ref && LocCall2MR == ModRefInfo::Mod &&
+        MECall2Loc.getWithoutLoc(static_cast<IRMemLocation>(TargetLoc))
+            .doesNotAccessMemory())
+      Changed = true;
+    else
+      Changed = false;
+  }
+
+  if (Changed)
+    return NewMR;
   // Be conservative.
   return ModRefInfo::ModRef;
 }
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 66ecc69c9874d..a91b70e34ccd3 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2398,8 +2398,12 @@ Error BitcodeReader::parseAttributeGroupBlock() {
               ModRefInfo ArgMem = ModRefInfo((EncodedME >> 0) & 3);
               ModRefInfo InaccessibleMem = ModRefInfo((EncodedME >> 2) & 3);
               ModRefInfo OtherMem = ModRefInfo((EncodedME >> 4) & 3);
+              ModRefInfo Fpmr = ModRefInfo((EncodedME >> 6) & 3);
+              ModRefInfo Za = ModRefInfo((EncodedME >> 8) & 3);
               auto ME = MemoryEffects::inaccessibleMemOnly(InaccessibleMem) |
                         MemoryEffects::argMemOnly(ArgMem) |
+                        MemoryEffects::argMemOnly(Fpmr) |
+                        MemoryEffects::argMemOnly(Za) |
                         MemoryEffects::errnoMemOnly(OtherMem) |
                         MemoryEffects::otherMemOnly(OtherMem);
               B.addMemoryAttr(ME);
diff --git a/llvm/test/Transforms/EarlyCSE/AArch64/fp8-target-memory.ll b/llvm/test/Transforms/EarlyCSE/AArch64/fp8-target-memory.ll
new file mode 100644
index 0000000000000..4b1e5a417b4cb
--- /dev/null
+++ b/llvm/test/Transforms/EarlyCSE/AArch64/fp8-target-memory.ll
@@ -0,0 +1,156 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes='early-cse<memssa>' < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+;; Only FPMR as inaccessible memory
+define <4 x half> @test_fpmr_inaccessible(<4 x half>%vd, <8 x i8> %vn, <8 x i8> %vm, <16 x i8> %vm2, i64 %fpm) {
+; CHECK-LABEL: define <4 x half> @test_fpmr_inaccessible(
+; CHECK-SAME: <4 x half> [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], <16 x i8> [[VM2:%.*]], i64 [[FPM:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+; CHECK-NEXT:    [[FDOT1:%.*]] = tail call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[VD]], <8 x i8> [[VN]], <16 x i8> [[VM2]], i32 2)
+; CHECK-NEXT:    [[FDOT2:%.*]] = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> [[FDOT1]], <8 x i8> [[VN]], <8 x i8> [[VM]])
+; CHECK-NEXT:    [[FDOT3:%.*]] = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> [[FDOT2]], <8 x i8> [[VN]], <8 x i8> [[VM]])
+; CHECK-NEXT:    ret <4 x half> [[FDOT3]]
+;
+entry:
+  call void @llvm.aarch64.set.fpmr(i64 %fpm)
+  %fdot1 = tail call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> %vd, <8 x i8> %vn, <16 x i8> %vm2, i32 2)
+  call void @llvm.aarch64.set.fpmr(i64 %fpm)
+  %fdot2 = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> %fdot1, <8 x i8> %vn, <8 x i8> %vm)
+  call void @llvm.aarch64.set.fpmr(i64 %fpm)
+  %fdot3 = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half>  %fdot2, <8 x i8> %vn, <8 x i8> %vm)
+  ret <4 x half> %fdot3
+}
+
+
+;; Write to ZA should not block the optimization for FPRM
+;; sme.fp8.fdot.lane.za16  ZA: write FPMR: read
+define void @test_fpmr_za_inaccessible(i32 %slice.0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, i64 %fpm) {
+; CHECK-LABEL: define void @test_fpmr_za_inaccessible(
+; CHECK-SAME: i32 [[SLICE_0:%.*]], <vscale x 16 x i8> [[ZN1:%.*]], <vscale x 16 x i8> [[ZN2:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 [[FPM:%.*]]) {
+; CHECK-NEXT:    [[SLICE:%.*]] = add i32 [[SLICE_0]], 7
+; CHECK-NEXT:    call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+; CHECK-NEXT:    call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN1]], <vscale x 16 x i8> [[ZN2]], <vscale x 16 x i8> [[ZM]], i32 1)
+; CHECK-NEXT:    call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN2]], <vscale x 16 x i8> [[ZN1]], <vscale x 16 x i8> [[ZM]], i32 0)
+; CHECK-NEXT:    call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 [[SLICE_0]], <vscale x 16 x i8> [[ZN1]], <vscale x 16 x i8> [[ZN2]], <vscale x 16 x i8> [[ZM]], i32 0)
+; CHECK-NEXT:    ret void
+;
+  %slice = add i32 %slice.0, 7
+  call void @llvm.aarch64.set.fpmr(i64 %fpm)
+  call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 %slice,
+  <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
+  <vscale x 16 x i8> %zm, i32 1)
+  call void @llvm.aarch64.set.fpmr(i64 %fpm)
+  call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 %slice,
+  <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn1,
+  <vscale x 16 x i8> %zm, i32 0)
+  call void @llvm.aarch64.set.fpmr(i64 %fpm)
+  call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 %slice.0,
+  <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
+  <vscale x 16 x i8> %zm, i32 0)
+  ret void
+}
+
+
+define dso_local <4 x half> @test_loop_fpmr_inaccessible(<4 x half>%vd, <8 x i8> %vn, <8 x i8> %vm, <16 x i8> %vm2, i64%fpm) {
+; CHECK-LABEL: define dso_local <4 x half> @test_loop_fpmr_inaccessible(
+; CHECK-SAME: <4 x half> [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], <16 x i8> [[VM2:%.*]], i64 [[FPM:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+; CHECK-NEXT:    [[FDOT0:%.*]] = tail call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[VD]], <8 x i8> [[VN]], <16 x i8> [[VM2]], i32 2)
+; CHECK-NEXT:    [[FDOT00:%.*]] = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> [[FDOT0]], <8 x i8> [[VN]], <8 x i8> [[VM]])
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+; CHECK-NEXT:    [[FDOT1:%.*]] = tail call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[FDOT00]], <8 x i8> [[VN]], <16 x i8> [[VM2]], i32 2)
+; CHECK-NEXT:    [[FDOT2:%.*]] = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> [[FDOT1]], <8 x i8> [[VN]], <8 x i8> [[VM]])
+; CHECK-NEXT:    [[FDOT3:%.*]] = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> [[FDOT2]], <8 x i8> [[VN]], <8 x i8> [[VM]])
+; CHECK-NEXT:    [[I_NEXT]] = add i8 [[I]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i8 [[I]], 20
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret <4 x half> [[FDOT3]]
+;
+  entry:
+  call void @llvm.aarch64.set.fpmr(i64 %fpm)
+  %fdot0 = tail call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> %vd, <8 x i8> %vn, <16 x i8> %vm2, i32 2)
+  call void @llvm.aarch64.set.fpmr(i64 %fpm)
+  %fdot00 = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half>  %fdot0, <8 x i8> %vn, <8 x i8> %vm)
+  br label %loop
+  loop:
+  %i = phi i8 [ 0, %entry ], [ %i.next, %loop ]
+  call void @llvm.aarch64.set.fpmr(i64 %fpm)
+  %fdot1 = tail call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> %fdot00, <8 x i8> %vn, <16 x i8> %vm2, i32 2)
+  call void @llvm.aarch64.set.fpmr(i64 %fpm)
+  %fdot2 = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> %fdot1, <8 x i8> %vn, <8 x i8> %vm)
+  call void @llvm.aarch64.set.fpmr(i64 %fpm)
+  %fdot3 = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half>  %fdot2, <8 x i8> %vn, <8 x i8> %vm)
+  %i.next = add i8 %i, 1
+  %ec = icmp eq i8 %i, 20
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret <4 x half> %fdot3
+}
+
+
+;; Negative tests
+
+define dso_local <4 x half> @neg_test_fpmr_inaccessible(<4 x half>%vd, <8 x i8> %vn, <8 x i8> %vm, <16 x i8> %vm2, i64 %fpm1, i64 %fpm2) {
+; CHECK-LABEL: define dso_local <4 x half> @neg_test_fpmr_inaccessible(
+; CHECK-SAME: <4 x half> [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], <16 x i8> [[VM2:%.*]], i64 [[FPM1:%.*]], i64 [[FPM2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.aarch64.set.fpmr(i64 [[FPM1]])
+; CHECK-NEXT:    [[FDOT1:%.*]] = tail call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[VD]], <8 x i8> [[VN]], <16 x i8> [[VM2]], i32 2)
+; CHECK-NEXT:    call void @llvm.aarch64.set.fpmr(i64 [[FPM2]])
+; CHECK-NEXT:    [[FDOT2:%.*]] = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> [[FDOT1]], <8 x i8> [[VN]], <8 x i8> [[VM]])
+; CHECK-NEXT:    call void @llvm.aarch64.set.fpmr(i64 [[FPM1]])
+; CHECK-NEXT:    [[FDOT3:%.*]] = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> [[FDOT2]], <8 x i8> [[VN]], <8 x i8> [[VM]])
+; CHECK-NEXT:    call void @llvm.aarch64.set.fpmr(i64 [[FPM2]])
+; CHECK-NEXT:    [[FDOT4:%.*]] = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> [[FDOT3]], <8 x i8> [[VN]], <8 x i8> [[VM]])
+; CHECK-NEXT:    ret <4 x half> [[FDOT4]]
+;
+entry:
+  call void @llvm.aarch64.set.fpmr(i64 %fpm1)
+  %fdot1 = tail call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> %vd, <8 x i8> %vn, <16 x i8> %vm2, i32 2)
+  call void @llvm.aarch64.set.fpmr(i64 %fpm2)
+  %fdot2 = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> %fdot1, <8 x i8> %vn, <8 x i8> %vm)
+  call void @llvm.aarch64.set.fpmr(i64 %fpm1)
+  %fdot3 = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half>  %fdot2, <8 x i8> %vn, <8 x i8> %vm)
+  call void @llvm.aarch64.set.fpmr(i64 %fpm2)
+  %fdot4 = tail call fast <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half>  %fdot3, <8 x i8> %vn, <8 x i8> %vm)
+  ret <4 x half> %fdot4
+}
+
+
+;; sme.fp8.fdot.lane.za16  ZA: write FPMR: read
+define void @neg_test_fpmr_za_inaccessible(i32 %slice.0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, i64 %fpm1, i64 %fpm2) {
+; CHECK-LABEL: define void @neg_test_fpmr_za_inaccessible(
+; CHECK-SAME: i32 [[SLICE_0:%.*]], <vscale x 16 x i8> [[ZN1:%.*]], <vscale x 16 x i8> [[ZN2:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 [[FPM1:%.*]], i64 [[FPM2:%.*]]) {
+; CHECK-NEXT:    [[SLICE:%.*]] = add i32 [[SLICE_0]], 7
+; CHECK-NEXT:    call void @llvm.aarch64.set.fpmr(i64 [[FPM1]])
+; CHECK-NEXT:    call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN1]], <vscale x 16 x i8> [[ZN2]], <vscale x 16 x i8> [[ZM]], i32 1)
+; CHECK-NEXT:    call void @llvm.aarch64.set.fpmr(i64 [[FPM2]])
+; CHECK-NEXT:    call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN2]], <vscale x 16 x i8> [[ZN1]], <vscale x 16 x i8> [[ZM]], i32 0)
+; CHECK-NEXT:    call void @llvm.aarch64.set.fpmr(i64 [[FPM1]])
+; CHECK-NEXT:    call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 [[SLICE_0]], <vscale x 16 x i8> [[ZN1]], <vscale x 16 x i8> [[ZN2]], <vscale x 16 x i8> [[ZM]], i32 0)
+; CHECK-NEXT:    ret void
+;
+  %slice = add i32 %slice.0, 7
+  call void @llvm.aarch64.set.fpmr(i64 %fpm1)
+  call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 %slice,
+  <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
+  <vscale x 16 x i8> %zm, i32 1)
+  call void @llvm.aarch64.set.fpmr(i64 %fpm2)
+  call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 %slice,
+  <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn1,
+  <vscale x 16 x i8> %zm, i32 0)
+  call void @llvm.aarch64.set.fpmr(i64 %fpm1)
+  call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 %slice.0,
+  <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
+  <vscale x 16 x i8> %zm, i32 0)
+  ret void
+}

>From 4cf28cdf411d39106648a81772d67e96088b8ad2 Mon Sep 17 00:00:00 2001
From: CarolineConcatto <caroline.concatto at arm.com>
Date: Fri, 29 Aug 2025 08:45:58 +0000
Subject: [PATCH 5/5] Modify logic to select the ModRef for other conditions
 when Target Mem location is used

---
 llvm/lib/Analysis/BasicAliasAnalysis.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index a021a8f7aa512..b0ab43e13ed77 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1086,10 +1086,13 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
     // Example Call1 = sme.fdot.lane and Call2 = set.fpmr
     if (LocCall1MR == ModRefInfo::Ref && LocCall2MR == ModRefInfo::Mod &&
         MECall2Loc.getWithoutLoc(static_cast<IRMemLocation>(TargetLoc))
-            .doesNotAccessMemory())
+            .doesNotAccessMemory()) {
       Changed = true;
-    else
-      Changed = false;
+      NewMR |= ModRefInfo::NoModRef;
+    } else {
+      Changed = true;
+      NewMR |= ModRefInfo::ModRef;
+    }
   }
 
   if (Changed)



More information about the cfe-commits mailing list