[llvm] 6e535a9 - [LLVM][MC][AArch64] Assembler support for Armv9.6-A memory systems extensions (#112341)

via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 22 08:06:18 PDT 2024


Author: Nashe Mncube
Date: 2024-10-22T16:06:14+01:00
New Revision: 6e535a9ac70fc0e69778f0d6c4568cb64a0e25db

URL: https://github.com/llvm/llvm-project/commit/6e535a9ac70fc0e69778f0d6c4568cb64a0e25db
DIFF: https://github.com/llvm/llvm-project/commit/6e535a9ac70fc0e69778f0d6c4568cb64a0e25db.diff

LOG: [LLVM][MC][AArch64] Assembler support for Armv9.6-A memory systems extensions (#112341)

Add support for the following Armv9.6-A memory systems extensions:
  FEAT_LSUI      - Unprivileged Load Store
  FEAT_OCCMO     - Outer Cacheable Cache Maintenance Operation
  FEAT_PCDPHINT  - Producer-Consumer Data Placement Hints
  FEAT_SRMASK    - Bitwise System Register Write Masks

as documented here:

https://developer.arm.com/documentation/109697/2024_09/Feature-descriptions/The-Armv9-6-architecture-extension

Co-authored-by: Jonathan Thackray <jonathan.thackray at arm.com>

---------

Co-authored-by: Jonathan Thackray <jonathan.thackray at arm.com>

Added: 
    llvm/test/MC/AArch64/armv9.6a-lsui.s
    llvm/test/MC/AArch64/armv9.6a-occmo.s
    llvm/test/MC/AArch64/armv9.6a-pcdphint.s
    llvm/test/MC/AArch64/armv9.6a-rme-gpc3.s
    llvm/test/MC/AArch64/armv9.6a-srmask.s
    llvm/test/MC/Disassembler/AArch64/armv9.6a-lsui.txt
    llvm/test/MC/Disassembler/AArch64/armv9.6a-occmo.txt
    llvm/test/MC/Disassembler/AArch64/armv9.6a-pcdphint.txt
    llvm/test/MC/Disassembler/AArch64/armv9.6a-rme-gpc3.txt
    llvm/test/MC/Disassembler/AArch64/armv9.6a-srmask.txt

Modified: 
    clang/test/Driver/aarch64-v96a.c
    clang/test/Driver/print-supported-extensions-aarch64.c
    llvm/lib/Target/AArch64/AArch64Features.td
    llvm/lib/Target/AArch64/AArch64InstrFormats.td
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/lib/Target/AArch64/AArch64SystemOperands.td
    llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
    llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
    llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
    llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
    llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
    llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
    llvm/unittests/TargetParser/TargetParserTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/test/Driver/aarch64-v96a.c b/clang/test/Driver/aarch64-v96a.c
index 80c99be934334e..343e347c928cab 100644
--- a/clang/test/Driver/aarch64-v96a.c
+++ b/clang/test/Driver/aarch64-v96a.c
@@ -53,3 +53,16 @@
 // RUN: %clang -target aarch64 -march=armv9.6a+sve-f16f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-F16F32MM %s
 // RUN: %clang -target aarch64 -march=armv9.6-a+sve-f16f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-F16F32MM %s
 // V96A-SVE-F16F32MM: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sve-f16f32mm"
+//
+// RUN: %clang -target aarch64 -march=armv9.6a+lsui -### -c %s 2>&1 | FileCheck -check-prefix=V96A-LSUI %s
+// RUN: %clang -target aarch64 -march=armv9.6-a+lsui -### -c %s 2>&1 | FileCheck -check-prefix=V96A-LSUI %s
+// V96A-LSUI: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+lsui"
+//
+// RUN: %clang -target aarch64 -march=armv9.6a+occmo -### -c %s 2>&1 | FileCheck -check-prefix=V96A-OCCMO %s
+// RUN: %clang -target aarch64 -march=armv9.6-a+occmo -### -c %s 2>&1 | FileCheck -check-prefix=V96A-OCCMO %s
+// V96A-OCCMO: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+occmo"
+//
+// RUN: %clang -target aarch64 -march=armv9.6a+pcdphint -### -c %s 2>&1 | FileCheck -check-prefix=V96A-PCDPHINT %s
+// RUN: %clang -target aarch64 -march=armv9.6-a+pcdphint -### -c %s 2>&1 | FileCheck -check-prefix=V96A-PCDPHINT %s
+// V96A-PCDPHINT: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+pcdphint"
+//

diff  --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index fbc0d70c4901c9..7ff4f17beff75e 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -40,12 +40,15 @@
 // CHECK-NEXT:     lse                 FEAT_LSE                                               Enable Armv8.1-A Large System Extension (LSE) atomic instructions
 // CHECK-NEXT:     lse128              FEAT_LSE128                                            Enable Armv9.4-A 128-bit Atomic instructions
 // CHECK-NEXT:     lsfe                FEAT_LSFE                                              Enable Armv9.6-A base Atomic floating-point in-memory instructions
+// CHECK-NEXT:     lsui                FEAT_LSUI                                              Enable Armv9.6-A unprivileged load/store instructions
 // CHECK-NEXT:     lut                 FEAT_LUT                                               Enable Lookup Table instructions
 // CHECK-NEXT:     mops                FEAT_MOPS                                              Enable Armv8.8-A memcpy and memset acceleration instructions
 // CHECK-NEXT:     memtag              FEAT_MTE, FEAT_MTE2                                    Enable Memory Tagging Extension
 // CHECK-NEXT:     simd                FEAT_AdvSIMD                                           Enable Advanced SIMD instructions
+// CHECK-NEXT:     occmo               FEAT_OCCMO                                             Enable Armv9.6-A Outer cacheable cache maintenance operations
 // CHECK-NEXT:     pauth               FEAT_PAuth                                             Enable Armv8.3-A Pointer Authentication extension
 // CHECK-NEXT:     pauth-lr            FEAT_PAuth_LR                                          Enable Armv9.5-A PAC enhancements
+// CHECK-NEXT:     pcdphint            FEAT_PCDPHINT                                          Enable Armv9.6-A Producer Consumer Data Placement hints
 // CHECK-NEXT:     pmuv3               FEAT_PMUv3                                             Enable Armv8.0-A PMUv3 Performance Monitors extension
 // CHECK-NEXT:     predres             FEAT_SPECRES                                           Enable Armv8.5-A execution and data prediction invalidation instructions
 // CHECK-NEXT:     rng                 FEAT_RNG                                               Enable Random Number generation instructions

diff  --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 831f311b236441..476d5a4e093efa 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -439,7 +439,7 @@ def FeatureSVEB16B16: ExtensionWithMArch<"sve-b16b16", "SVEB16B16", "FEAT_SVE_B1
   "Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions">;
 
 def FeatureSMEB16B16 : ExtensionWithMArch<"sme-b16b16", "SMEB16B16", "FEAT_SME_B16B16",
-  "Enable SME2.1 ZA-targeting non-widening BFloat16 instructions", 
+  "Enable SME2.1 ZA-targeting non-widening BFloat16 instructions",
   [FeatureSME2, FeatureSVEB16B16]>;
 
 def FeatureSMEF16F16 : ExtensionWithMArch<"sme-f16f16", "SMEF16F16", "FEAT_SME_F16F16",
@@ -478,10 +478,10 @@ def FeatureFAMINMAX: ExtensionWithMArch<"faminmax", "FAMINMAX", "FEAT_FAMINMAX",
 
 def FeatureLUT: ExtensionWithMArch<"lut", "LUT", "FEAT_LUT",
  "Enable Lookup Table instructions">;
-   
+
 def FeatureFP8 : ExtensionWithMArch<"fp8", "FP8", "FEAT_FP8",
   "Enable FP8 instructions", [FeatureFAMINMAX, FeatureLUT, FeatureBF16]>;
-  
+
 def FeatureFP8FMA : ExtensionWithMArch<"fp8fma", "FP8FMA", "FEAT_FP8FMA",
   "Enable Armv9.5-A FP8 multiply-add instructions", [FeatureFP8]>;
 
@@ -490,7 +490,7 @@ def FeatureSSVE_FP8FMA : ExtensionWithMArch<"ssve-fp8fma", "SSVE_FP8FMA", "FEAT_
 
 def FeatureFP8DOT4: ExtensionWithMArch<"fp8dot4", "FP8DOT4", "FEAT_FP8DOT4",
   "Enable FP8 4-way dot instructions", [FeatureFP8FMA]>;
-  
+
 def FeatureFP8DOT2: ExtensionWithMArch<"fp8dot2", "FP8DOT2", "FEAT_FP8DOT2",
   "Enable FP8 2-way dot instructions", [FeatureFP8DOT4]>;
 
@@ -555,6 +555,14 @@ def FeatureSVEBFSCALE: ExtensionWithMArch<"sve-bfscale", "SVE_BFSCALE", "FEAT_SV
 
 def FeatureSVE_F16F32MM: ExtensionWithMArch<"sve-f16f32mm", "SVE_F16F32MM", "FEAT_SVE_F16F32MM",
   "Enable Armv9.6-A FP16 to FP32 Matrix Multiply instructions">;
+def FeatureLSUI: ExtensionWithMArch<"lsui", "LSUI", "FEAT_LSUI",
+  "Enable Armv9.6-A unprivileged load/store instructions">;
+
+def FeatureOCCMO: ExtensionWithMArch<"occmo", "OCCMO", "FEAT_OCCMO",
+  "Enable Armv9.6-A Outer cacheable cache maintenance operations">;
+
+def FeaturePCDPHINT: ExtensionWithMArch<"pcdphint", "PCDPHINT", "FEAT_PCDPHINT",
+  "Enable Armv9.6-A Producer Consumer Data Placement hints">;
 
 //===----------------------------------------------------------------------===//
 //  Other Features
@@ -866,8 +874,9 @@ def HasV9_5aOps : Architecture64<9, 5, "a", "v9.5a",
   [HasV9_4aOps, FeatureCPA],
   !listconcat(HasV9_4aOps.DefaultExts, [FeatureCPA,  FeatureLUT, FeatureFAMINMAX])>;
 def HasV9_6aOps : Architecture64<9, 6, "a", "v9.6a",
-  [HasV9_5aOps, FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2],
-  !listconcat(HasV9_5aOps.DefaultExts, [FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2])>;
+  [HasV9_5aOps, FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2, FeatureLSUI, FeatureOCCMO],
+  !listconcat(HasV9_5aOps.DefaultExts, [FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2,
+    FeatureLSUI, FeatureOCCMO])>;
 def HasV8_0rOps : Architecture64<8, 0, "r", "v8r",
   [ //v8.1
     FeatureCRC, FeaturePAN, FeatureLSE, FeatureCONTEXTIDREL2,

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 4b24b166143dca..b690a41621f10d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1729,6 +1729,11 @@ class TMSystemException<bits<3> op1, string asm, list<dag> pattern>
   let Inst{4-0}   = 0b00000;
 }
 
+class APASI : SimpleSystemI<0, (ins GPR64:$Xt), "apas", "\t$Xt">, Sched<[]> {
+  let Inst{20-5} = 0b0111001110000000;
+  let DecoderNamespace = "APAS";
+}
+
 // Hint instructions that take both a CRm and a 3-bit immediate.
 // NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
 // model patterns with sufficiently fine granularity
@@ -1742,6 +1747,25 @@ let mayStore = 1, mayLoad = 1, hasSideEffects = 1 in
     let Inst{11-5} = imm;
   }
 
+def PHintInstOperand : AsmOperandClass {
+    let Name = "PHint";
+    let ParserMethod = "tryParsePHintInstOperand";
+}
+
+def phint_op : Operand<i32> {
+    let ParserMatchClass = PHintInstOperand;
+   let PrintMethod = "printPHintOp";
+}
+
+class STSHHI
+    : SimpleSystemI<0, (ins phint_op:$policy), "stshh", "\t$policy", []>,
+      Sched<[WriteHint]> {
+  bits<3> policy;
+  let Inst{20-12} = 0b000011001;
+  let Inst{11-8} = 0b0110;
+  let Inst{7-5} = policy;
+}
+
 // System instructions taking a single literal operand which encodes into
 // CRm. op2 
diff erentiates the opcodes.
 def BarrierAsmOperand : AsmOperandClass {
@@ -4689,6 +4713,56 @@ multiclass StorePairNoAlloc<bits<2> opc, bit V, DAGOperand regtype,
                                                   GPR64sp:$Rn, 0)>;
 }
 
+//  armv9.6-a load/store no-allocate pair FEAT_LSUI (no-allocate)
+
+class BaseLoadStorePairNoAllocLSUI<bits<2> opc, bit V, bit L, dag oops, dag iops,
+                              string asm>
+    : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, $offset]", "", []> {
+  bits<5> Rt;
+  bits<5> Rt2;
+  bits<5> Rn;
+  bits<7> offset;
+  let Inst{31-30} = opc;
+  let Inst{29-27} = 0b101;
+  let Inst{26}    = V;
+  let Inst{25-23} = 0b000;
+  let Inst{22}    = L;
+  let Inst{21-15} = offset;
+  let Inst{14-10} = Rt2;
+  let Inst{9-5}   = Rn;
+  let Inst{4-0}   = Rt;
+
+  let DecoderMethod = "DecodePairLdStInstruction";
+}
+
+multiclass LoadPairNoAllocLSUI<bits<2> opc, bit V, DAGOperand regtype,
+                           Operand indextype, string asm> {
+  let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in
+  def i : BaseLoadStorePairNoAllocLSUI<opc, V, 1,
+                                   (outs regtype:$Rt, regtype:$Rt2),
+                                   (ins GPR64sp:$Rn, indextype:$offset), asm>,
+          Sched<[WriteLD, WriteLDHi]>;
+
+
+  def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
+                  (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
+                                                  GPR64sp:$Rn, 0)>;
+}
+
+multiclass StorePairNoAllocLSUI<bits<2> opc, bit V, DAGOperand regtype,
+                      Operand indextype, string asm> {
+  let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in
+  def i : BaseLoadStorePairNoAllocLSUI<opc, V, 0, (outs),
+                                   (ins regtype:$Rt, regtype:$Rt2,
+                                        GPR64sp:$Rn, indextype:$offset),
+                                   asm>,
+          Sched<[WriteSTP]>;
+
+  def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
+                  (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
+                                                  GPR64sp:$Rn, 0)>;
+}
+
 //---
 // Load/store exclusive
 //---
@@ -4769,6 +4843,109 @@ class LoadExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0,
   let PostEncoderMethod = "fixLoadStoreExclusive<0,1>";
 }
 
+// Armv9.6-a load-store exclusive instructions
+let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
+class BaseLoadStoreExclusiveLSUI<bits<2> sz, bit L, bit o0,
+                             dag oops, dag iops, string asm, string operands>
+    : I<oops, iops, asm, operands, "", []> {
+  let Inst{31-30} = sz;
+  let Inst{29-23} = 0b0010010;
+  let Inst{22}    = L;
+  let Inst{15}    = o0;
+}
+
+
+// Neither Rs nor Rt2 operands.
+
+class LoadExclusiveLSUI<bits<2> sz, bit L, bit o0,
+                    RegisterClass regtype, string asm>
+    : BaseLoadStoreExclusiveLSUI<sz, L, o0, (outs regtype:$Rt),
+                               (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]">,
+      Sched<[WriteLD]>
+{
+  bits<5> Rt;
+  bits<5> Rn;
+  let Inst{20-16} = 0b11111;
+  let Unpredictable{20-16} = 0b11111;
+  let Inst{14-10} = 0b11111;
+  let Unpredictable{14-10} = 0b11111;
+  let Inst{9-5} = Rn;
+  let Inst{4-0} = Rt;
+
+  let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+ class StoreExclusiveLSUI<bits<2> sz, bit L, bit o0,
+                      RegisterClass regtype, string asm>
+     : BaseLoadStoreExclusiveLSUI<sz, L, o0, (outs GPR32:$Ws),
+                              (ins regtype:$Rt, GPR64sp0:$Rn),
+                              asm, "\t$Ws, $Rt, [$Rn]">,
+       Sched<[WriteSTX]> {
+   bits<5> Ws;
+   bits<5> Rt;
+   bits<5> Rn;
+   let Inst{20-16} = Ws;
+   let Inst{15} = o0;
+   let Inst{14-10} = 0b11111;
+   let Unpredictable{14-10} = 0b11111;
+   let Inst{9-5} = Rn;
+   let Inst{4-0} = Rt;
+
+   let Constraints = "@earlyclobber $Ws";
+   let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
+ }
+
+// Armv9.6-a load-store unprivileged instructions
+class BaseLoadUnprivilegedLSUI<bits<2> sz, dag oops, dag iops, string asm>
+    : I<oops, iops, asm, "\t$Rt, [$Rn]", "", []> {
+   bits<5> Rt;
+   bits<5> Rn;
+   let Inst{31-30} = sz;
+   let Inst{29-23} = 0b0010010;
+   let Inst{22}  = 0b1;
+   let Inst{21} = 0b0;
+   let Inst{20-16} = 0b11111;
+   let Unpredictable{20-16} = 0b11111;
+   let Inst{15} = 0b0;
+   let Inst{14-10} = 0b11111;
+   let Unpredictable{14-10} = 0b11111;
+   let Inst{9-5} = Rn;
+   let Inst{4-0} = Rt;
+   let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+multiclass LoadUnprivilegedLSUI<bits<2> sz, RegisterClass regtype, string asm> {
+  def r : BaseLoadUnprivilegedLSUI<sz, (outs regtype:$Rt),
+                                    (ins GPR64sp0:$Rn), asm>,
+          Sched<[WriteLD]>;
+
+}
+
+class BaseStoreUnprivilegedLSUI<bits<2> sz, dag oops, dag iops, string asm>
+    : I<oops, iops, asm, "\t$Ws, $Rt, [$Rn]", "", []> {
+   bits<5> Rt;
+   bits<5> Rn;
+   bits<5> Ws;
+   let Inst{31-30} = sz;
+   let Inst{29-23} = 0b0010010;
+   let Inst{22}  = 0b0;
+   let Inst{21} = 0b0;
+   let Inst{20-16} = Ws;
+   let Inst{15} = 0b0;
+   let Inst{14-10} = 0b11111;
+   let Unpredictable{14-10} = 0b11111;
+   let Inst{9-5} = Rn;
+   let Inst{4-0} = Rt;
+   let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
+}
+
+multiclass StoreUnprivilegedLSUI<bits<2> sz, RegisterClass regtype, string asm> {
+  def r : BaseStoreUnprivilegedLSUI<sz, (outs GPR32: $Ws),
+                                 (ins regtype:$Rt, GPR64sp0:$Rn),
+                                 asm>,
+          Sched<[WriteSTX]>;
+}
+
 // Simple store release operations do not check the exclusive monitor.
 let mayLoad = 0, mayStore = 1 in
 class StoreRelease<bits<2> sz, bit o2, bit L, bit o1, bit o0,
@@ -11845,6 +12022,48 @@ multiclass CompareAndSwapPair<bits<1> Acq, bits<1> Rel, string order> {
     def X : BaseCASP<order, "", XSeqPairClassOperand>;
 }
 
+// v9.6-a CAST unprivileged instructions
+let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
+class BaseCASTEncoding<dag oops, dag iops, string asm,
+                      string cstr, list<dag> pattern>
+      : I<oops, iops, asm, "\t$Rs, $Rt, [$Rn]", cstr, pattern> {
+  bits<5> Rs;
+  bits<5> Rn;
+  bits<5> Rt;
+  bit L;
+  bit o0;
+  bits<2> Sz;
+  let Inst{31-30} = Sz;
+  let Inst{29-23} = 0b0010011;
+  let Inst{22} = L;
+  let Inst{21} = 0b0;
+  let Inst{20-16} = Rs;
+  let Inst{15} = o0;
+  let Inst{14-10} = 0b11111;
+  let Unpredictable{14-10} = 0b11111;
+  let Inst{9-5} = Rn;
+  let Inst{4-0} = Rt;
+}
+
+multiclass CompareAndSwapUnprivileged<bits<2> Sz, bit L, bit o0, string order> {
+  let Sz = Sz, L = L, o0 = o0 in
+    def X : BaseCASTEncoding <
+        (outs GPR64:$out),
+        (ins GPR64:$Rs, GPR64:$Rt, GPR64sp0:$Rn),
+        "cas" # order # "t",
+        "$out = $Rs",[]>, Sched<[WriteAtomic]>;
+
+}
+
+multiclass CompareAndSwapPairUnprivileged<bits<2> Sz, bit L, bit o0, string order> {
+  let Sz = Sz, L = L, o0 = o0 in
+    def X : BaseCASTEncoding<(outs XSeqPairClassOperand:$out),
+            (ins XSeqPairClassOperand:$Rs, XSeqPairClassOperand:$Rt, GPR64sp0:$Rn),
+                            "casp" # order # "t",
+                            "$out = $Rs",[]>,
+            Sched<[WriteAtomic]>;
+}
+
 let Predicates = [HasLSE] in
 class BaseSWP<string order, string size, RegisterClass RC>
       : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size,
@@ -11878,6 +12097,35 @@ multiclass Swap<bits<1> Acq, bits<1> Rel, string order> {
   let Sz = 0b11, Acq = Acq, Rel = Rel in def X : BaseSWP<order, "", GPR64>;
 }
 
+// v9.6a swap operations
+class BaseSWPLSUI<string order, RegisterClass RC>
+       : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swpt" # order,
+           "\t$Rs, $Rt, [$Rn]","",[]>,
+         Sched<[WriteAtomic]> {
+   bits<2> Sz;
+   bit Acq;
+   bit Rel;
+   bits<5> Rs;
+   bits<5> Rn;
+   bits<5> Rt;
+   let Inst{31-30} = Sz;
+   let Inst{29-24} = 0b011001;
+   let Inst{23} = Acq;
+   let Inst{22} = Rel;
+   let Inst{21} = 0b1;
+   let Inst{20-16} = Rs;
+   let Inst{15} = 0b1;
+   let Inst{14-12} = 0b000;
+   let Inst{11-10} = 0b01;
+   let Inst{9-5} = Rn;
+   let Inst{4-0} = Rt;
+}
+
+multiclass SwapLSUI<bits<1> Acq, bits<1> Rel, string order> {
+  let Sz = 0b00, Acq = Acq, Rel = Rel in def W : BaseSWPLSUI<order, GPR32>;
+  let Sz = 0b01, Acq = Acq, Rel = Rel in def X : BaseSWPLSUI<order, GPR64>;
+}
+
 let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
 class BaseLDOPregister<string op, string order, string size, RegisterClass RC>
       : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size,
@@ -11916,6 +12164,39 @@ multiclass LDOPregister<bits<3> opc, string op, bits<1> Acq, bits<1> Rel,
     def X : BaseLDOPregister<op, order, "", GPR64>;
 }
 
+class BaseLDOPregisterLSUI<string op, string order, RegisterClass RC>
+      : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ldt" # op # order,
+          "\t$Rs, $Rt, [$Rn]","",[]>,
+        Sched<[WriteAtomic]> {
+  bits<2> Sz;
+  bit Acq;
+  bit Rel;
+  bits<5> Rs;
+  bits<3> opc;
+  bits<5> Rn;
+  bits<5> Rt;
+  let Inst{31-30} = Sz;
+  let Inst{29-24} = 0b011001;
+  let Inst{23} = Acq;
+  let Inst{22} = Rel;
+  let Inst{21} = 0b1;
+  let Inst{20-16} = Rs;
+  let Inst{15} = 0b0;
+  let Inst{14-12} = opc;
+  let Inst{11-10} = 0b01;
+  let Inst{9-5} = Rn;
+  let Inst{4-0} = Rt;
+}
+
+
+multiclass LDOPregisterLSUI<bits<3> opc, string op, bits<1> Acq, bits<1> Rel,
+                        string order> {
+  let Sz = 0b00, Acq = Acq, Rel = Rel, opc = opc in
+    def W : BaseLDOPregisterLSUI<op, order, GPR32>;
+  let Sz = 0b01, Acq = Acq, Rel = Rel, opc = opc in
+    def X : BaseLDOPregisterLSUI<op, order, GPR64>;
+}
+
 // Differing SrcRHS and DstRHS allow you to cover CLR & SUB by giving a more
 // complex DAG for DstRHS.
 let Predicates = [HasLSE] in
@@ -12020,6 +12301,29 @@ multiclass STOPregister<string asm, string instr> {
                     !cast<Instruction>(instr # "X")>;
 }
 
+class BaseSTOPregisterLSUI<string asm, RegisterClass OP, Register Reg,
+                        Instruction inst> :
+      InstAlias<asm # "\t$Rs, [$Rn]", (inst Reg, OP:$Rs, GPR64sp:$Rn)>;
+
+multiclass STOPregisterLSUI<string asm, string instr> {
+  def : BaseSTOPregisterLSUI<asm # "a",        GPR32, WZR,
+                    !cast<Instruction>(instr # "W")>;
+  def : BaseSTOPregisterLSUI<asm # "a",        GPR64, XZR,
+                    !cast<Instruction>(instr # "X")>;
+  def : BaseSTOPregisterLSUI<asm # "l",        GPR32, WZR,
+                    !cast<Instruction>(instr # "W")>;
+  def : BaseSTOPregisterLSUI<asm # "l",        GPR64, XZR,
+                    !cast<Instruction>(instr # "X")>;
+  def : BaseSTOPregisterLSUI<asm # "al",        GPR32, WZR,
+                    !cast<Instruction>(instr # "W")>;
+  def : BaseSTOPregisterLSUI<asm # "al",        GPR64, XZR,
+                    !cast<Instruction>(instr # "X")>;
+  def : BaseSTOPregisterLSUI<asm,        GPR32, WZR,
+                    !cast<Instruction>(instr # "W")>;
+  def : BaseSTOPregisterLSUI<asm,        GPR64, XZR,
+                    !cast<Instruction>(instr # "X")>;
+}
+
 class LoadStore64B_base<bits<3> opc, string asm_inst, string asm_ops,
                         dag iops, dag oops, list<dag> pat>
     : I<oops, iops, asm_inst, asm_ops, "", pat>,

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 4bd36e9eacbc68..63f239020179b2 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -231,6 +231,13 @@ def HasSVEBFSCALE   : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Su
                                  AssemblerPredicateWithAll<(all_of FeatureSVEBFSCALE), "sve-bfscale">;
 def HasSVE_F16F32MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE_F16F32MM()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSVE_F16F32MM), "sve-f16f32mm">;
+def HasPCDPHINT      : Predicate<"Subtarget->hasPCDPHINT()">,
+                       AssemblerPredicateWithAll<(all_of FeaturePCDPHINT), "pcdphint">;
+def HasLSUI          : Predicate<"Subtarget->hasLSUI()">,
+                       AssemblerPredicateWithAll<(all_of FeatureLSUI), "lsui">;
+def HasOCCMO         : Predicate<"Subtarget->hasOCCMO()">,
+                       AssemblerPredicateWithAll<(all_of FeatureOCCMO), "occmo">;
+
 // A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
 // they should be enabled if either has been specified.
 def HasSVEorSME
@@ -1111,7 +1118,7 @@ def PROBED_STACKALLOC_DYN : Pseudo<(outs),
                                    [(AArch64probedalloca GPR64common:$target)]>,
                                    Sched<[]>;
 
-} // Defs = [SP, NZCV], Uses = [SP] in 
+} // Defs = [SP, NZCV], Uses = [SP] in
 } // hasSideEffects = 1, isCodeGenOnly = 1
 
 let isReMaterializable = 1, isCodeGenOnly = 1 in {
@@ -1264,6 +1271,11 @@ def : InstAlias<"sevl", (HINT 0b101)>;
 def : InstAlias<"dgh",  (HINT 0b110)>;
 def : InstAlias<"esb",  (HINT 0b10000)>, Requires<[HasRAS]>;
 def : InstAlias<"csdb", (HINT 20)>;
+
+let Predicates = [HasPCDPHINT] in {
+    def STSHH: STSHHI;
+}
+
 // In order to be able to write readable assembly, LLVM should accept assembly
 // inputs that use Branch Target Indentification mnemonics, even with BTI disabled.
 // However, in order to be compatible with other assemblers (e.g. GAS), LLVM
@@ -2601,12 +2613,74 @@ defm CASPA  : CompareAndSwapPair<1, 0, "a">;
 defm CASPL  : CompareAndSwapPair<0, 1, "l">;
 defm CASPAL : CompareAndSwapPair<1, 1, "al">;
 
+// v9.6-a atomic CAST
+let Predicates = [HasLSUI] in {
+defm CAST   : CompareAndSwapUnprivileged<0b11, 0, 0, "">;
+defm CASLT  : CompareAndSwapUnprivileged<0b11, 0, 1, "l">;
+defm CASAT  : CompareAndSwapUnprivileged<0b11, 1, 0, "a">;
+defm CASALT : CompareAndSwapUnprivileged<0b11, 1, 1, "al">;
+
+def : MnemonicAlias<"cas", "cast">;
+def : MnemonicAlias<"casl", "caslt">;
+def : MnemonicAlias<"casa", "casat">;
+def : MnemonicAlias<"casal", "casalt">;
+
+// v9.6-a atomic CASPT
+defm CASPT   : CompareAndSwapPairUnprivileged<0b01, 0, 0, "">;
+defm CASPLT  : CompareAndSwapPairUnprivileged<0b01, 0, 1, "l">;
+defm CASPAT  : CompareAndSwapPairUnprivileged<0b01, 1, 0, "a">;
+defm CASPALT : CompareAndSwapPairUnprivileged<0b01, 1, 1, "al">;
+
+def : MnemonicAlias<"casp", "caspt">;
+def : MnemonicAlias<"caspl", "casplt">;
+def : MnemonicAlias<"caspa", "caspat">;
+def : MnemonicAlias<"caspal", "caspalt">;
+}
+
 // v8.1 atomic SWP
 defm SWP   : Swap<0, 0, "">;
 defm SWPA  : Swap<1, 0, "a">;
 defm SWPL  : Swap<0, 1, "l">;
 defm SWPAL : Swap<1, 1, "al">;
 
+// v9.6a atomic swap (FEAT_LSUI)
+let Predicates = [HasLSUI] in {
+  defm SWPT   : SwapLSUI<0, 0, "">;
+  defm SWPTA  : SwapLSUI<1, 0, "a">;
+  defm SWPTL  : SwapLSUI<0, 1, "l">;
+  defm SWPTAL : SwapLSUI<1, 1, "al">;
+
+  def : MnemonicAlias<"swp", "swpt">;
+  def : MnemonicAlias<"swpa", "swpta">;
+  def : MnemonicAlias<"swpl", "swptl">;
+  def : MnemonicAlias<"swpal", "swptal">;
+}
+
+// v9.6-a unprivileged atomic LD<OP> (FEAT_LSUI)
+let Predicates = [HasLSUI] in {
+  defm LDTADD   : LDOPregisterLSUI<0b000, "add", 0, 0, "">;
+  defm LDTADDA  : LDOPregisterLSUI<0b000, "add", 1, 0, "a">;
+  defm LDTADDL  : LDOPregisterLSUI<0b000, "add", 0, 1, "l">;
+  defm LDTADDAL : LDOPregisterLSUI<0b000, "add", 1, 1, "al">;
+
+  defm LDTCLR   : LDOPregisterLSUI<0b001, "clr", 0, 0, "">;
+  defm LDTCLRA  : LDOPregisterLSUI<0b001, "clr", 1, 0, "a">;
+  defm LDTCLRL  : LDOPregisterLSUI<0b001, "clr", 0, 1, "l">;
+  defm LDTCLRAL : LDOPregisterLSUI<0b001, "clr", 1, 1, "al">;
+
+  defm LDTSET   : LDOPregisterLSUI<0b011, "set", 0, 0, "">;
+  defm LDTSETA  : LDOPregisterLSUI<0b011, "set", 1, 0, "a">;
+  defm LDTSETL  : LDOPregisterLSUI<0b011, "set", 0, 1, "l">;
+  defm LDTSETAL : LDOPregisterLSUI<0b011, "set", 1, 1, "al">;
+
+  defm : STOPregisterLSUI<"sttadd","LDTADD">; // STTADDx
+  defm : STOPregisterLSUI<"sttclr","LDTCLR">; // STTCLRx
+  defm : STOPregisterLSUI<"sttset","LDTSET">; // STTSETx
+}
+
+// v9.6-a FEAT_RME_GPC3
+def APAS : APASI;
+
 // v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
 defm LDADD   : LDOPregister<0b000, "add", 0, 0, "">;
 defm LDADDA  : LDOPregister<0b000, "add", 1, 0, "a">;
@@ -4053,6 +4127,33 @@ defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
 defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
 }
 
+// Armv9.6-a Load/store pair (FEAT_LSUI)
+let Predicates = [HasLSUI] in {
+  defm LDTP    : LoadPairOffset<0b11, 0, GPR64z, simm7s8, "ldtp">;
+  def LDTPpre  : LoadPairPreIdx<0b11, 0, GPR64z, simm7s8, "ldtp">;
+  def LDTPpost : LoadPairPostIdx<0b11, 0, GPR64z, simm7s8, "ldtp">;
+
+  defm STTNPX : StorePairNoAllocLSUI<0b11, 0, GPR64z, simm7s8, "sttnp">;
+  defm LDTNPX : LoadPairNoAllocLSUI<0b11, 0, GPR64z, simm7s8, "ldtnp">;
+
+  defm STTP    : StorePairOffset<0b11, 0, GPR64z, simm7s8, "sttp">;
+  def STTPpre  : StorePairPreIdx<0b11, 0, GPR64z, simm7s8, "sttp">;
+  def STTPpost : StorePairPostIdx<0b11, 0, GPR64z, simm7s8, "sttp">;
+}
+
+let Predicates = [HasLSUI, HasNEON] in {
+  defm LDTPQ    : LoadPairOffset<0b11, 1, FPR128Op, simm7s16, "ldtp">;
+  def LDTPQpre  : LoadPairPreIdx<0b11, 1, FPR128Op, simm7s16, "ldtp">;
+  def LDTPQpost : LoadPairPostIdx<0b11, 1, FPR128Op, simm7s16, "ldtp">;
+
+  defm STTNPQ : StorePairNoAllocLSUI<0b11, 1, FPR128Op, simm7s16, "sttnp">;
+  defm LDTNPQ : LoadPairNoAllocLSUI<0b11, 1, FPR128Op, simm7s16, "ldtnp">;
+
+  defm STTPQ    : StorePairOffset<0b11, 1, FPR128Op, simm7s16, "sttp">;
+  def STTPQpre  : StorePairPreIdx<0b11, 1, FPR128Op, simm7s16, "sttp">;
+  def STTPQpost : StorePairPostIdx<0b11, 1, FPR128Op, simm7s16, "sttp">;
+}
+
 def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
           (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>;
 
@@ -4693,6 +4794,29 @@ let Predicates = [HasLOR] in {
   def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]",  (STLLRH   GPR32: $Rt, GPR64sp:$Rn)>;
 }
 
+// v9.6-a Unprivileged load store operations
+let Predicates = [HasLSUI] in {
+defm LDTXRW : LoadUnprivilegedLSUI<0b10, GPR32, "ldtxr">;
+defm LDTXRX : LoadUnprivilegedLSUI<0b11, GPR64, "ldtxr">;
+
+def : MnemonicAlias<"ldxr", "ldtxr">;
+
+def LDATXRW : LoadExclusiveLSUI <0b10, 1, 1, GPR32, "ldatxr">;
+def LDATXRX : LoadExclusiveLSUI <0b11, 1, 1, GPR64, "ldatxr">;
+
+def : MnemonicAlias<"ldaxr", "ldatxr">;
+
+defm STTXRW : StoreUnprivilegedLSUI<0b10, GPR32, "sttxr">;
+defm STTXRX : StoreUnprivilegedLSUI<0b11, GPR64, "sttxr">;
+
+def : MnemonicAlias<"stxr", "sttxr">;
+
+def STLTXRW : StoreExclusiveLSUI<0b10, 0, 1, GPR32, "stltxr">;
+def STLTXRX : StoreExclusiveLSUI<0b11, 0, 1, GPR64, "stltxr">;
+
+def : MnemonicAlias<"stlxr", "stltxr">;
+}
+
 //===----------------------------------------------------------------------===//
 // Scaled floating point to integer conversion instructions.
 //===----------------------------------------------------------------------===//
@@ -5477,15 +5601,15 @@ defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
 defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
 
 // Select BSWAP vector instructions into REV instructions
-def : Pat<(v4i16 (bswap (v4i16 V64:$Rn))), 
+def : Pat<(v4i16 (bswap (v4i16 V64:$Rn))),
           (v4i16 (REV16v8i8 (v4i16 V64:$Rn)))>;
-def : Pat<(v8i16 (bswap (v8i16 V128:$Rn))), 
+def : Pat<(v8i16 (bswap (v8i16 V128:$Rn))),
           (v8i16 (REV16v16i8 (v8i16 V128:$Rn)))>;
-def : Pat<(v2i32 (bswap (v2i32 V64:$Rn))), 
+def : Pat<(v2i32 (bswap (v2i32 V64:$Rn))),
           (v2i32 (REV32v8i8 (v2i32 V64:$Rn)))>;
-def : Pat<(v4i32 (bswap (v4i32 V128:$Rn))), 
+def : Pat<(v4i32 (bswap (v4i32 V128:$Rn))),
           (v4i32 (REV32v16i8 (v4i32 V128:$Rn)))>;
-def : Pat<(v2i64 (bswap (v2i64 V128:$Rn))), 
+def : Pat<(v2i64 (bswap (v2i64 V128:$Rn))),
           (v2i64 (REV64v16i8 (v2i64 V128:$Rn)))>;
 
 //===----------------------------------------------------------------------===//
@@ -6210,7 +6334,7 @@ def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
           (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
 }
 
-// int -> float conversion of value in lane 0 of simd vector should use 
+// int -> float conversion of value in lane 0 of simd vector should use
 // correct cvtf variant to avoid costly fpr <-> gpr register transfers.
 def : Pat<(f32 (sint_to_fp (i32 (vector_extract (v4i32 FPR128:$Rn), (i64 0))))),
           (SCVTFv1i32 (i32 (EXTRACT_SUBREG (v4i32 FPR128:$Rn), ssub)))>;
@@ -6233,7 +6357,7 @@ def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract
           (SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
 
 // unsigned 32-bit extracted element is truncated to 16-bits using AND
-def : Pat<(f16 (uint_to_fp (i32 (and (i32 (vector_extract 
+def : Pat<(f16 (uint_to_fp (i32 (and (i32 (vector_extract
                 (v8i16 FPR128:$Rn), (i64 0))), (i32 65535))))),
           (UCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
 }
@@ -6626,19 +6750,19 @@ def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
 let Predicates = [HasLUT] in {
   defm LUT2 : BaseSIMDTableLookupIndexed2<"luti2">;
   defm LUT4 : BaseSIMDTableLookupIndexed4<"luti4">;
-  
+
   multiclass Luti2_patterns<Instruction Instr, ValueType VT64, ValueType VT128>{
-    def : Pat<(VT128 (int_aarch64_neon_vluti2_lane VT64:$Rn, 
+    def : Pat<(VT128 (int_aarch64_neon_vluti2_lane VT64:$Rn,
                    v8i8:$Rm, i32:$idx)),
-              (Instr (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 
+              (Instr (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub),
               (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),  VectorIndexS32b_timm:$idx)>;
     def : Pat<(VT128 (int_aarch64_neon_vluti2_laneq VT64:$Rn,
                    v16i8:$Rm, i32:$idx)),
-              (Instr (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 
+              (Instr (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub),
               V128:$Rm,  VectorIndexS32b_timm:$idx)>;
-    def : Pat<(VT128 (int_aarch64_neon_vluti2_lane VT128:$Rn, 
+    def : Pat<(VT128 (int_aarch64_neon_vluti2_lane VT128:$Rn,
                    v8i8:$Rm, i32:$idx)),
-              (Instr V128:$Rn, (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),  
+              (Instr V128:$Rn, (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),
               VectorIndexS32b_timm:$idx)>;
     def : Pat<(VT128 (int_aarch64_neon_vluti2_laneq VT128:$Rn,
                    v16i8:$Rm, i32:$idx)),
@@ -6649,8 +6773,8 @@ let Predicates = [HasLUT] in {
   defm : Luti2_patterns<LUT2_H, v4i16, v8i16>;
   defm : Luti2_patterns<LUT2_H, v4f16, v8f16>;
   defm : Luti2_patterns<LUT2_H, v4bf16, v8bf16>;
- 
-  def : Pat<(v16i8 (int_aarch64_neon_vluti4q_laneq v16i8:$Rn, 
+
+  def : Pat<(v16i8 (int_aarch64_neon_vluti4q_laneq v16i8:$Rn,
                     v16i8:$Rm, i32:$idx)),
             (LUT4_B VecListOne16b:$Rn, V128:$Rm,  VectorIndexD32b_timm:$idx)>;
   def : Pat<(v16i8 (int_aarch64_neon_vluti4q_lane v16i8:$Rn,
@@ -6658,7 +6782,7 @@ let Predicates = [HasLUT] in {
             (LUT4_B VecListOne16b:$Rn, (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),  VectorIndexD32b_timm:$idx)>;
 
   foreach VT = [v8i16, v8f16, v8bf16] in {
-    def : Pat<(VT (int_aarch64_neon_vluti4q_laneq_x2 VT:$Rn1, 
+    def : Pat<(VT (int_aarch64_neon_vluti4q_laneq_x2 VT:$Rn1,
                    VT:$Rn2, v16i8:$Rm, i32:$idx)),
               (LUT4_H (REG_SEQUENCE QQ, VecListOne8h:$Rn1, qsub0, VecListOne8h:$Rn2, qsub1), V128:$Rm,  VectorIndexS32b_timm:$idx)>;
     def : Pat<(VT (int_aarch64_neon_vluti4q_lane_x2 VT:$Rn1,
@@ -7391,19 +7515,19 @@ def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
 }
 
 // For vecreduce_add, used by GlobalISel not SDAG
-def : Pat<(i8 (vecreduce_add (v8i8 V64:$Rn))), 
+def : Pat<(i8 (vecreduce_add (v8i8 V64:$Rn))),
           (i8 (ADDVv8i8v V64:$Rn))>;
-def : Pat<(i8 (vecreduce_add (v16i8 V128:$Rn))), 
+def : Pat<(i8 (vecreduce_add (v16i8 V128:$Rn))),
           (i8 (ADDVv16i8v V128:$Rn))>;
-def : Pat<(i16 (vecreduce_add (v4i16 V64:$Rn))), 
+def : Pat<(i16 (vecreduce_add (v4i16 V64:$Rn))),
           (i16 (ADDVv4i16v V64:$Rn))>;
-def : Pat<(i16 (vecreduce_add (v8i16 V128:$Rn))), 
+def : Pat<(i16 (vecreduce_add (v8i16 V128:$Rn))),
           (i16 (ADDVv8i16v V128:$Rn))>;
-def : Pat<(i32 (vecreduce_add (v2i32 V64:$Rn))), 
+def : Pat<(i32 (vecreduce_add (v2i32 V64:$Rn))),
           (i32 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub))>;
-def : Pat<(i32 (vecreduce_add (v4i32 V128:$Rn))), 
+def : Pat<(i32 (vecreduce_add (v4i32 V128:$Rn))),
           (i32 (ADDVv4i32v V128:$Rn))>;
-def : Pat<(i64 (vecreduce_add (v2i64 V128:$Rn))), 
+def : Pat<(i64 (vecreduce_add (v2i64 V128:$Rn))),
           (i64 (ADDPv2i64p V128:$Rn))>;
 
 defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  AArch64saddv>;
@@ -7448,25 +7572,25 @@ def : Pat<(i16 (opNode (v4i16 FPR64:$Rn))),
 def : Pat<(i16 (opNode (v8i16 FPR128:$Rn))),
           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) FPR128:$Rn)>;
 
-def : Pat<(i32 (opNode (v4i32 V128:$Rn))), 
+def : Pat<(i32 (opNode (v4i32 V128:$Rn))),
           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn)>;
 }
 
 // For v2i32 source type, the pairwise instruction can be used instead
 defm : SIMDAcrossLanesVecReductionIntrinsic<"UMINV", vecreduce_umin>;
-def : Pat<(i32 (vecreduce_umin (v2i32 V64:$Rn))), 
+def : Pat<(i32 (vecreduce_umin (v2i32 V64:$Rn))),
           (i32 (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub))>;
 
 defm : SIMDAcrossLanesVecReductionIntrinsic<"UMAXV", vecreduce_umax>;
-def : Pat<(i32 (vecreduce_umax (v2i32 V64:$Rn))), 
+def : Pat<(i32 (vecreduce_umax (v2i32 V64:$Rn))),
           (i32 (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>;
 
 defm : SIMDAcrossLanesVecReductionIntrinsic<"SMINV", vecreduce_smin>;
-def : Pat<(i32 (vecreduce_smin (v2i32 V64:$Rn))), 
+def : Pat<(i32 (vecreduce_smin (v2i32 V64:$Rn))),
           (i32 (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub))>;
 
 defm : SIMDAcrossLanesVecReductionIntrinsic<"SMAXV", vecreduce_smax>;
-def : Pat<(i32 (vecreduce_smax (v2i32 V64:$Rn))), 
+def : Pat<(i32 (vecreduce_smax (v2i32 V64:$Rn))),
           (i32 (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>;
 
 // The SADDLV v2i32 gets mapped to SADDLP.

diff  --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index 474a543a857494..1d2bb1d732346c 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -177,6 +177,17 @@ def : DC<"CIPAE",   0b100, 0b0111, 0b1110, 0b000>;
 def : DC<"CIGDPAE", 0b100, 0b0111, 0b1110, 0b111>;
 }
 
+let Requires = [{ {AArch64::FeatureOCCMO} }] in {
+// Outer cacheable CMO (FEAT_OCCMO)
+def : DC<"CIVAOC", 0b011, 0b0111, 0b1111, 0b000>;
+def : DC<"CVAOC",  0b011, 0b0111, 0b1011, 0b000>;
+}
+
+let Requires = [{ {AArch64::FeatureOCCMO, AArch64::FeatureMTE} }] in {
+def : DC<"CIGDVAOC", 0b011, 0b0111, 0b1111, 0b111>;
+def : DC<"CGDVAOC",  0b011, 0b0111, 0b1011, 0b111>;
+}
+
 //===----------------------------------------------------------------------===//
 // IC (instruction cache maintenance) instruction options.
 //===----------------------------------------------------------------------===//
@@ -1981,3 +1992,59 @@ def : RWSysReg<"MPAMBW1_EL12",            0b11, 0b101, 0b1010, 0b0101, 0b100>;
 def : RWSysReg<"MPAMBW0_EL1",             0b11, 0b000, 0b1010, 0b0101, 0b101>;
 def : RWSysReg<"MPAMBWCAP_EL2",           0b11, 0b100, 0b1010, 0b0101, 0b110>;
 def : RWSysReg<"MPAMBWSM_EL1",            0b11, 0b000, 0b1010, 0b0101, 0b111>;
+
+//===----------------------------------------------------------------------===//
+// FEAT_SRMASK v9.6a registers
+//===----------------------------------------------------------------------===//
+def : RWSysReg<"SCTLRMASK_EL1",   0b11, 0b000, 0b0001, 0b0100, 0b000>;
+def : RWSysReg<"SCTLRMASK_EL2",   0b11, 0b100, 0b0001, 0b0100, 0b000>;
+def : RWSysReg<"SCTLRMASK_EL12",  0b11, 0b101, 0b0001, 0b0100, 0b000>;
+def : RWSysReg<"CPACRMASK_EL1",   0b11, 0b000, 0b0001, 0b0100, 0b010>;
+def : RWSysReg<"CPTRMASK_EL2",    0b11, 0b100, 0b0001, 0b0100, 0b010>;
+def : RWSysReg<"CPACRMASK_EL12",  0b11, 0b101, 0b0001, 0b0100, 0b010>;
+def : RWSysReg<"SCTLR2MASK_EL1",  0b11, 0b000, 0b0001, 0b0100, 0b011>;
+def : RWSysReg<"SCTLR2MASK_EL2",  0b11, 0b100, 0b0001, 0b0100, 0b011>;
+def : RWSysReg<"SCTLR2MASK_EL12", 0b11, 0b101, 0b0001, 0b0100, 0b011>;
+def : RWSysReg<"CPACRALIAS_EL1",  0b11, 0b000, 0b0001, 0b0100, 0b100>;
+def : RWSysReg<"SCTLRALIAS_EL1",  0b11, 0b000, 0b0001, 0b0100, 0b110>;
+def : RWSysReg<"SCTLR2ALIAS_EL1", 0b11, 0b000, 0b0001, 0b0100, 0b111>;
+def : RWSysReg<"TCRMASK_EL1",     0b11, 0b000, 0b0010, 0b0111, 0b010>;
+def : RWSysReg<"TCRMASK_EL2",     0b11, 0b100, 0b0010, 0b0111, 0b010>;
+def : RWSysReg<"TCRMASK_EL12",    0b11, 0b101, 0b0010, 0b0111, 0b010>;
+def : RWSysReg<"TCR2MASK_EL1",    0b11, 0b000, 0b0010, 0b0111, 0b011>;
+def : RWSysReg<"TCR2MASK_EL2",    0b11, 0b100, 0b0010, 0b0111, 0b011>;
+def : RWSysReg<"TCR2MASK_EL12",   0b11, 0b101, 0b0010, 0b0111, 0b011>;
+def : RWSysReg<"TCRALIAS_EL1",    0b11, 0b000, 0b0010, 0b0111, 0b110>;
+def : RWSysReg<"TCR2ALIAS_EL1",   0b11, 0b000, 0b0010, 0b0111, 0b111>;
+def : RWSysReg<"ACTLRMASK_EL1",   0b11, 0b000, 0b0001, 0b0100, 0b001>;
+def : RWSysReg<"ACTLRMASK_EL2",   0b11, 0b100, 0b0001, 0b0100, 0b001>;
+def : RWSysReg<"ACTLRMASK_EL12",  0b11, 0b101, 0b0001, 0b0100, 0b001>;
+def : RWSysReg<"ACTLRALIAS_EL1",  0b11, 0b000, 0b0001, 0b0100, 0b101>;
+
+//===----------------------------------------------------------------------===//
+// v9.6a PCDPHINT instruction options.
+//===----------------------------------------------------------------------===//
+
+class PHint<bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
+              bits<3> op2, string name> : SearchableTable {
+  let SearchableFields = ["Name", "Encoding"];
+  let EnumValueField = "Encoding";
+
+  string Name = name;
+  string AltName = name;
+  bits<16> Encoding;
+  let Encoding{15-14} = op0;
+  let Encoding{13-11} = op1;
+  let Encoding{10-7} = crn;
+  let Encoding{6-3} = crm;
+  let Encoding{2-0} = op2;
+  code Requires = [{ {} }];
+}
+
+let Requires = [{ {AArch64::FeaturePCDPHINT} }] in {
+  def KEEP : PHint<0b00, 0b000, 0b0000, 0b0000, 0b000, "keep">;
+  def STRM : PHint<0b00, 0b000, 0b0000, 0b0000, 0b001, "strm">;
+}
+
+// v9.6a Realm management extension enhancements
+def : RWSysReg<"GPCBW_EL3", 0b11, 0b110, 0b0010, 0b0001, 0b101>;

diff  --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 72b9f252a71878..9fb3501286e531 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -286,6 +286,8 @@ class AArch64AsmParser : public MCTargetAsmParser {
   ParseStatus tryParseSVEVecLenSpecifier(OperandVector &Operands);
   ParseStatus tryParseGPR64x8(OperandVector &Operands);
   ParseStatus tryParseImmRange(OperandVector &Operands);
+  template <int> ParseStatus tryParseAdjImm0_63(OperandVector &Operands);
+  ParseStatus tryParsePHintInstOperand(OperandVector &Operands);
 
 public:
   enum AArch64MatchResultTy {
@@ -361,6 +363,7 @@ class AArch64Operand : public MCParsedAsmOperand {
     k_FPImm,
     k_Barrier,
     k_PSBHint,
+    k_PHint,
     k_BTIHint,
   } Kind;
 
@@ -481,7 +484,11 @@ class AArch64Operand : public MCParsedAsmOperand {
     unsigned Length;
     unsigned Val;
   };
-
+  struct PHintOp {
+    const char *Data;
+    unsigned Length;
+    unsigned Val;
+  };
   struct BTIHintOp {
     const char *Data;
     unsigned Length;
@@ -511,6 +518,7 @@ class AArch64Operand : public MCParsedAsmOperand {
     struct SysCRImmOp SysCRImm;
     struct PrefetchOp Prefetch;
     struct PSBHintOp PSBHint;
+    struct PHintOp PHint;
     struct BTIHintOp BTIHint;
     struct ShiftExtendOp ShiftExtend;
     struct SVCROp SVCR;
@@ -576,6 +584,9 @@ class AArch64Operand : public MCParsedAsmOperand {
     case k_PSBHint:
       PSBHint = o.PSBHint;
       break;
+    case k_PHint:
+      PHint = o.PHint;
+      break;
     case k_BTIHint:
       BTIHint = o.BTIHint;
       break;
@@ -728,11 +739,21 @@ class AArch64Operand : public MCParsedAsmOperand {
     return PSBHint.Val;
   }
 
+  unsigned getPHint() const {
+    assert(Kind == k_PHint && "Invalid access!");
+    return PHint.Val;
+  }
+
   StringRef getPSBHintName() const {
     assert(Kind == k_PSBHint && "Invalid access!");
     return StringRef(PSBHint.Data, PSBHint.Length);
   }
 
+  StringRef getPHintName() const {
+    assert(Kind == k_PHint && "Invalid access!");
+    return StringRef(PHint.Data, PHint.Length);
+  }
+
   unsigned getBTIHint() const {
     assert(Kind == k_BTIHint && "Invalid access!");
     return BTIHint.Val;
@@ -1486,6 +1507,7 @@ class AArch64Operand : public MCParsedAsmOperand {
   bool isSysCR() const { return Kind == k_SysCR; }
   bool isPrefetch() const { return Kind == k_Prefetch; }
   bool isPSBHint() const { return Kind == k_PSBHint; }
+  bool isPHint() const { return Kind == k_PHint; }
   bool isBTIHint() const { return Kind == k_BTIHint; }
   bool isShiftExtend() const { return Kind == k_ShiftExtend; }
   bool isShifter() const {
@@ -2145,6 +2167,11 @@ class AArch64Operand : public MCParsedAsmOperand {
     Inst.addOperand(MCOperand::createImm(getPSBHint()));
   }
 
+  void addPHintOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createImm(getPHint()));
+  }
+
   void addBTIHintOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createImm(getBTIHint()));
@@ -2442,6 +2469,17 @@ class AArch64Operand : public MCParsedAsmOperand {
     return Op;
   }
 
+  static std::unique_ptr<AArch64Operand>
+  CreatePHintInst(unsigned Val, StringRef Str, SMLoc S, MCContext &Ctx) {
+    auto Op = std::make_unique<AArch64Operand>(k_PHint, Ctx);
+    Op->PHint.Val = Val;
+    Op->PHint.Data = Str.data();
+    Op->PHint.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
   static std::unique_ptr<AArch64Operand> CreateSysCR(unsigned Val, SMLoc S,
                                                      SMLoc E, MCContext &Ctx) {
     auto Op = std::make_unique<AArch64Operand>(k_SysCR, Ctx);
@@ -2594,6 +2632,9 @@ void AArch64Operand::print(raw_ostream &OS) const {
   case k_PSBHint:
     OS << getPSBHintName();
     break;
+  case k_PHint:
+    OS << getPHintName();
+    break;
   case k_BTIHint:
     OS << getBTIHintName();
     break;
@@ -3749,6 +3790,9 @@ static const struct Extension {
     {"sve-aes2", {AArch64::FeatureSVEAES2}},
     {"sve-bfscale", {AArch64::FeatureSVEBFSCALE}},
     {"sve-f16f32mm", {AArch64::FeatureSVE_F16F32MM}},
+    {"lsui", {AArch64::FeatureLSUI}},
+    {"occmo", {AArch64::FeatureOCCMO}},
+    {"pcdphint", {AArch64::FeaturePCDPHINT}},
 };
 
 static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
@@ -4125,6 +4169,23 @@ ParseStatus AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
   return ParseStatus::Success;
 }
 
+ParseStatus
+AArch64AsmParser::tryParsePHintInstOperand(OperandVector &Operands) {
+  SMLoc S = getLoc();
+  const AsmToken &Tok = getTok();
+  if (Tok.isNot(AsmToken::Identifier))
+    return TokError("invalid operand for instruction");
+
+  auto PH = AArch64PHint::lookupPHintByName(Tok.getString());
+  if (!PH)
+    return TokError("invalid operand for instruction");
+
+  Operands.push_back(AArch64Operand::CreatePHintInst(
+      PH->Encoding, Tok.getString(), S, getContext()));
+  Lex(); // Eat identifier token.
+  return ParseStatus::Success;
+}
+
 /// tryParseNeonVectorRegister - Parse a vector register operand.
 bool AArch64AsmParser::tryParseNeonVectorRegister(OperandVector &Operands) {
   if (getTok().isNot(AsmToken::Identifier))

diff  --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 87c4245b55357c..8e2dc3d217cb9f 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1135,6 +1135,14 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
   case AArch64::STPSpre:
   case AArch64::STGPpre:
   case AArch64::STGPpost:
+  case AArch64::LDTPpre:
+  case AArch64::LDTPpost:
+  case AArch64::LDTPQpost:
+  case AArch64::LDTPQpre:
+  case AArch64::STTPpost:
+  case AArch64::STTPpre:
+  case AArch64::STTPQpost:
+  case AArch64::STTPQpre:
     DecodeSimpleRegisterClass<AArch64::GPR64spRegClassID, 0, 32>(Inst, Rn, Addr,
                                                                  Decoder);
     break;
@@ -1151,6 +1159,10 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
   case AArch64::LDPSWpre:
   case AArch64::STGPpre:
   case AArch64::STGPpost:
+  case AArch64::LDTPpost:
+  case AArch64::LDTPpre:
+  case AArch64::STTPpost:
+  case AArch64::STTPpre:
     NeedsDisjointWritebackTransfer = true;
     [[fallthrough]];
   case AArch64::LDNPXi:
@@ -1159,6 +1171,10 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
   case AArch64::STPXi:
   case AArch64::LDPSWi:
   case AArch64::STGPi:
+  case AArch64::LDTPi:
+  case AArch64::STTPi:
+  case AArch64::STTNPXi:
+  case AArch64::LDTNPXi:
     DecodeSimpleRegisterClass<AArch64::GPR64RegClassID, 0, 32>(Inst, Rt, Addr,
                                                                Decoder);
     DecodeSimpleRegisterClass<AArch64::GPR64RegClassID, 0, 32>(Inst, Rt2, Addr,
@@ -1187,6 +1203,14 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
   case AArch64::STPQi:
   case AArch64::LDPQpre:
   case AArch64::STPQpre:
+  case AArch64::LDTPQi:
+  case AArch64::LDTPQpost:
+  case AArch64::LDTPQpre:
+  case AArch64::LDTNPQi:
+  case AArch64::STTPQi:
+  case AArch64::STTPQpost:
+  case AArch64::STTPQpre:
+  case AArch64::STTNPQi:
     DecodeSimpleRegisterClass<AArch64::FPR128RegClassID, 0, 32>(Inst, Rt, Addr,
                                                                 Decoder);
     DecodeSimpleRegisterClass<AArch64::FPR128RegClassID, 0, 32>(Inst, Rt2, Addr,

diff  --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 7c9113f6bc2380..762a7af8c3ddb3 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -988,6 +988,22 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
       Name = std::string(AT->Name);
     }
     break;
+    // Overlaps with AT and DC
+    case 15: {
+      const AArch64AT::AT *AT = AArch64AT::lookupATByEncoding(Encoding);
+      const AArch64DC::DC *DC = AArch64DC::lookupDCByEncoding(Encoding);
+      if (AT && AT->haveFeatures(STI.getFeatureBits())) {
+        NeedsReg = true;
+        Ins = "at\t";
+        Name = std::string(AT->Name);
+      } else if (DC && DC->haveFeatures(STI.getFeatureBits())) {
+        NeedsReg = true;
+        Ins = "dc\t";
+        Name = std::string(DC->Name);
+      } else {
+        return false;
+      }
+    } break;
     }
   } else if (CnVal == 8 || CnVal == 9) {
     // TLBI aliases
@@ -2125,3 +2141,14 @@ void AArch64InstPrinter::printSyspXzrPair(const MCInst *MI, unsigned OpNum,
          "MC representation of SyspXzrPair should be XZR");
   O << getRegisterName(Reg) << ", " << getRegisterName(Reg);
 }
+
+void AArch64InstPrinter::printPHintOp(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  unsigned Op = MI->getOperand(OpNum).getImm();
+  auto PH = AArch64PHint::lookupPHintByEncoding(Op);
+  if (PH)
+    O << PH->Name;
+  else
+    markup(O, Markup::Immediate) << '#' << formatImm(Op);
+}

diff  --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index 9dccdf42361b21..e7b62b3203681b 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -236,6 +236,8 @@ class AArch64InstPrinter : public MCInstPrinter {
   template <unsigned ImmIs0, unsigned ImmIs1>
   void printExactFPImm(const MCInst *MI, unsigned OpNum,
                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPHintOp(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
 };
 
 class AArch64AppleInstPrinter : public AArch64InstPrinter {

diff  --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index a742c406537bee..59937a7d2a1f68 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -132,6 +132,13 @@ namespace llvm {
   }
 }
 
+namespace llvm {
+namespace AArch64PHint {
+#define GET_PHINT_IMPL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64PHint
+} // namespace llvm
+
 namespace llvm {
   namespace AArch64BTIHint {
 #define GET_BTI_IMPL

diff  --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 9faecccb1bd104..8f34cf054fe286 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -582,6 +582,26 @@ namespace AArch64PSBHint {
   #include "AArch64GenSystemOperands.inc"
 }
 
+namespace AArch64PHint {
+struct PHint {
+  const char *Name;
+  const char *AltName;
+  unsigned Encoding;
+  FeatureBitset FeaturesRequired;
+
+  bool haveFeatures(FeatureBitset ActiveFeatures) const {
+    return ActiveFeatures[llvm::AArch64::FeatureAll] ||
+           (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
+  }
+};
+
+#define GET_PHINT_DECL
+#include "AArch64GenSystemOperands.inc"
+
+const PHint *lookupPHintByName(StringRef);
+const PHint *lookupPHintByEncoding(uint16_t);
+} // namespace AArch64PHint
+
 namespace AArch64BTIHint {
   struct BTI : SysAlias {
     using SysAlias::SysAlias;

diff  --git a/llvm/test/MC/AArch64/armv9.6a-lsui.s b/llvm/test/MC/AArch64/armv9.6a-lsui.s
new file mode 100644
index 00000000000000..b48db1f9b5570c
--- /dev/null
+++ b/llvm/test/MC/AArch64/armv9.6a-lsui.s
@@ -0,0 +1,486 @@
+// RUN: llvm-mc -triple aarch64 -mattr=+lsui -show-encoding %s  | FileCheck %s
+// RUN: not llvm-mc -triple aarch64 -show-encoding %s 2>&1  | FileCheck %s --check-prefix=ERROR
+
+_func:
+// CHECK: _func:
+//------------------------------------------------------------------------------
+// Unprivileged load/store operations
+//------------------------------------------------------------------------------
+  ldtxr       x9, [sp]
+// CHECK: ldtxr	x9, [sp]                        // encoding: [0xe9,0x7f,0x5f,0xc9]
+// ERROR: error: instruction requires: lsui
+  ldtxr       x9, [sp, #0]
+// CHECK: ldtxr	x9, [sp]                        // encoding: [0xe9,0x7f,0x5f,0xc9]
+// ERROR: error: instruction requires: lsui
+  ldtxr       x10, [x11]
+// CHECK: ldtxr	x10, [x11]                      // encoding: [0x6a,0x7d,0x5f,0xc9]
+// ERROR: error: instruction requires: lsui
+  ldtxr       x10, [x11, #0]
+// CHECK: ldtxr	x10, [x11]                      // encoding: [0x6a,0x7d,0x5f,0xc9]
+// ERROR: error: instruction requires: lsui
+
+  ldatxr      x9, [sp]
+// CHECK: ldatxr	x9, [sp]                        // encoding: [0xe9,0xff,0x5f,0xc9]
+// ERROR: error: instruction requires: lsui
+  ldatxr      x10, [x11]
+// CHECK: ldatxr	x10, [x11]                      // encoding: [0x6a,0xfd,0x5f,0xc9]
+// ERROR: error: instruction requires: lsui
+
+  sttxr       wzr, w4, [sp]
+// CHECK: sttxr	wzr, w4, [sp]                   // encoding: [0xe4,0x7f,0x1f,0x89]
+// ERROR: error: instruction requires: lsui
+  sttxr       wzr, w4, [sp, #0]
+// CHECK: sttxr	wzr, w4, [sp]                   // encoding: [0xe4,0x7f,0x1f,0x89]
+// ERROR: error: instruction requires: lsui
+  sttxr       w5, x6, [x7]
+// CHECK: sttxr	w5, x6, [x7]                    // encoding: [0xe6,0x7c,0x05,0xc9]
+// ERROR: error: instruction requires: lsui
+  sttxr       w5, x6, [x7, #0]
+// CHECK: sttxr	w5, x6, [x7]                    // encoding: [0xe6,0x7c,0x05,0xc9]
+// ERROR: error: instruction requires: lsui
+
+  stltxr      w2, w4, [sp]
+// CHECK: stltxr	w2, w4, [sp]                    // encoding: [0xe4,0xff,0x02,0x89]
+// ERROR: error: instruction requires: lsui
+  stltxr      w5, x6, [x7]
+// CHECK: stltxr	w5, x6, [x7]                    // encoding: [0xe6,0xfc,0x05,0xc9]
+// ERROR: error: instruction requires: lsui
+
+//------------------------------------------------------------------------------
+// Unprivileged load/store register pair (offset)
+//------------------------------------------------------------------------------
+
+  ldtp       x21, x29, [x2, #504]
+// CHECK: ldtp	x21, x29, [x2, #504]            // encoding: [0x55,0xf4,0x5f,0xe9]
+// ERROR: instruction requires: lsui
+  ldtp       x22, x23, [x3, #-512]
+// CHECK: ldtp	x22, x23, [x3, #-512]           // encoding: [0x76,0x5c,0x60,0xe9]
+// ERROR: instruction requires: lsui
+  ldtp       x24, x25, [x4, #8]
+// CHECK: ldtp	x24, x25, [x4, #8]              // encoding: [0x98,0xe4,0x40,0xe9]
+// ERROR: instruction requires: lsui
+
+  sttp       x3, x5, [sp], #16
+// CHECK: sttp	x3, x5, [sp], #16               // encoding: [0xe3,0x17,0x81,0xe8]
+// ERROR: instruction requires: lsui
+  sttp       x3, x5, [sp, #8]!
+// CHECK: sttp	x3, x5, [sp, #8]!               // encoding: [0xe3,0x97,0x80,0xe9]
+// ERROR: instruction requires: lsui
+
+  sttp       q3, q5, [sp]
+// CHECK: sttp	q3, q5, [sp]                    // encoding: [0xe3,0x17,0x00,0xed]
+// ERROR: instruction requires: lsui
+  sttp       q17, q19, [sp, #1008]
+// CHECK: sttp	q17, q19, [sp, #1008]           // encoding: [0xf1,0xcf,0x1f,0xed]
+// ERROR: instruction requires: lsui
+
+//------------------------------------------------------------------------------
+// Load/store register pair (post-indexed)
+//------------------------------------------------------------------------------
+
+  ldtp       x21, x29, [x2], #504
+// CHECK: ldtp	x21, x29, [x2], #504            // encoding: [0x55,0xf4,0xdf,0xe8]
+// ERROR: instruction requires: lsui
+  ldtp       x22, x23, [x3], #-512
+// CHECK: ldtp	x22, x23, [x3], #-512           // encoding: [0x76,0x5c,0xe0,0xe8]
+// ERROR: instruction requires: lsui
+  ldtp       x24, x25, [x4], #8
+// CHECK: ldtp	x24, x25, [x4], #8              // encoding: [0x98,0xe4,0xc0,0xe8]
+// ERROR: instruction requires: lsui
+
+  sttp       q3, q5, [sp], #0
+// CHECK: sttp	q3, q5, [sp], #0                // encoding: [0xe3,0x17,0x80,0xec]
+// ERROR: instruction requires: lsui
+  sttp       q17, q19, [sp], #1008
+// CHECK: sttp	q17, q19, [sp], #1008           // encoding: [0xf1,0xcf,0x9f,0xec]
+// ERROR: instruction requires: lsui
+  ldtp       q23, q29, [x1], #-1024
+// CHECK: ldtp	q23, q29, [x1], #-1024          // encoding: [0x37,0x74,0xe0,0xec]
+// ERROR: instruction requires: lsui
+
+//------------------------------------------------------------------------------
+// Load/store register pair (pre-indexed)
+//------------------------------------------------------------------------------
+  ldtp       x21, x29, [x2, #504]!
+// CHECK: ldtp	x21, x29, [x2, #504]!           // encoding: [0x55,0xf4,0xdf,0xe9]
+// ERROR: instruction requires: lsui
+  ldtp       x22, x23, [x3, #-512]!
+// CHECK: ldtp	x22, x23, [x3, #-512]!          // encoding: [0x76,0x5c,0xe0,0xe9]
+// ERROR: instruction requires: lsui
+  ldtp       x24, x25, [x4, #8]!
+// CHECK: ldtp	x24, x25, [x4, #8]!             // encoding: [0x98,0xe4,0xc0,0xe9]
+// ERROR: instruction requires: lsui
+
+  sttp       q3, q5, [sp, #0]!
+// CHECK: sttp	q3, q5, [sp, #0]!               // encoding: [0xe3,0x17,0x80,0xed]
+// ERROR: instruction requires: lsui
+  sttp       q17, q19, [sp, #1008]!
+// CHECK: sttp	q17, q19, [sp, #1008]!          // encoding: [0xf1,0xcf,0x9f,0xed]
+// ERROR: instruction requires: lsui
+  ldtp       q23, q29, [x1, #-1024]!
+// CHECK: ldtp	q23, q29, [x1, #-1024]!         // encoding: [0x37,0x74,0xe0,0xed]
+// ERROR: instruction requires: lsui
+
+//------------------------------------------------------------------------------
+// CAS(P)T instructions
+//------------------------------------------------------------------------------
+  //64 bits
+  cast       x0, x1, [x2]
+// CHECK: cast	x0, x1, [x2]                    // encoding: [0x41,0x7c,0x80,0xc9]
+// ERROR: instruction requires: lsui
+  cast       x0, x1, [sp, #0]
+// CHECK: cast	x0, x1, [sp]                    // encoding: [0xe1,0x7f,0x80,0xc9]
+// ERROR: instruction requires: lsui
+  casat      x0, x1, [x2]
+// CHECK: casat	x0, x1, [x2]                    // encoding: [0x41,0x7c,0xc0,0xc9]
+// ERROR: instruction requires: lsui
+  casat      x0, x1, [sp, #0]
+// CHECK: casat	x0, x1, [sp]                    // encoding: [0xe1,0x7f,0xc0,0xc9]
+// ERROR: instruction requires: lsui
+  casalt     x0, x1, [x2]
+// CHECK: casalt	x0, x1, [x2]                    // encoding: [0x41,0xfc,0xc0,0xc9]
+// ERROR: instruction requires: lsui
+  casalt     x0, x1, [sp, #0]
+// CHECK: casalt	x0, x1, [sp]                    // encoding: [0xe1,0xff,0xc0,0xc9]
+// ERROR: instruction requires: lsui
+  caslt      x0, x1, [x2]
+// CHECK: caslt	x0, x1, [x2]                    // encoding: [0x41,0xfc,0x80,0xc9]
+// ERROR: instruction requires: lsui
+  caslt      x0, x1, [sp, #0]
+// CHECK: caslt	x0, x1, [sp]                    // encoding: [0xe1,0xff,0x80,0xc9]
+// ERROR: instruction requires: lsui
+
+  //CASP instruction
+  caspt      x0, x1, x2, x3, [x4]
+// CHECK: caspt	x0, x1, x2, x3, [x4]            // encoding: [0x82,0x7c,0x80,0x49]
+// ERROR: instruction requires: lsui
+  caspt      x0, x1, x2, x3, [sp, #0]
+// CHECK: caspt	x0, x1, x2, x3, [sp]            // encoding: [0xe2,0x7f,0x80,0x49]
+// ERROR: instruction requires: lsui
+  caspat     x0, x1, x2, x3, [x4]
+// CHECK: caspat	x0, x1, x2, x3, [x4]            // encoding: [0x82,0x7c,0xc0,0x49]
+// ERROR: instruction requires: lsui
+  caspat     x0, x1, x2, x3, [sp, #0]
+// CHECK: caspat	x0, x1, x2, x3, [sp]            // encoding: [0xe2,0x7f,0xc0,0x49]
+// ERROR: instruction requires: lsui
+  casplt     x0, x1, x2, x3, [x4]
+// CHECK: casplt	x0, x1, x2, x3, [x4]            // encoding: [0x82,0xfc,0x80,0x49]
+// ERROR: instruction requires: lsui
+  casplt     x0, x1, x2, x3, [sp, #0]
+// CHECK: casplt	x0, x1, x2, x3, [sp]            // encoding: [0xe2,0xff,0x80,0x49]
+// ERROR: instruction requires: lsui
+  caspalt    x0, x1, x2, x3, [x4]
+// CHECK: caspalt	x0, x1, x2, x3, [x4]            // encoding: [0x82,0xfc,0xc0,0x49]
+// ERROR: instruction requires: lsui
+  caspalt    x0, x1, x2, x3, [sp, #0]
+// CHECK: caspalt	x0, x1, x2, x3, [sp]            // encoding: [0xe2,0xff,0xc0,0x49]
+// ERROR: instruction requires: lsui
+
+//------------------------------------------------------------------------------
+// SWP(A|L)T instructions
+//------------------------------------------------------------------------------
+  swpt       w7, wzr, [x5]
+// CHECK: swpt	w7, wzr, [x5]                   // encoding: [0xbf,0x84,0x27,0x19]
+// ERROR: instruction requires: lsui
+  swpt       x9, xzr, [sp]
+// CHECK: swpt	x9, xzr, [sp]                   // encoding: [0xff,0x87,0x29,0x59]
+// ERROR: instruction requires: lsui
+
+  swpta      w7, wzr, [x5]
+// CHECK: swpta	w7, wzr, [x5]                   // encoding: [0xbf,0x84,0xa7,0x19]
+// ERROR: instruction requires: lsui
+  swpta      x9, xzr, [sp]
+// CHECK: swpta	x9, xzr, [sp]                   // encoding: [0xff,0x87,0xa9,0x59]
+// ERROR: instruction requires: lsui
+
+  swptl      w7, wzr, [x5]
+// CHECK: swptl	w7, wzr, [x5]                   // encoding: [0xbf,0x84,0x67,0x19]
+// ERROR: instruction requires: lsui
+  swptl      x9, xzr, [sp]
+// CHECK: swptl	x9, xzr, [sp]                   // encoding: [0xff,0x87,0x69,0x59]
+// ERROR: instruction requires: lsui
+
+  swptal     w7, wzr, [x5]
+// CHECK: swptal	w7, wzr, [x5]                   // encoding: [0xbf,0x84,0xe7,0x19]
+// ERROR: instruction requires: lsui
+  swptal     x9, xzr, [sp]
+// CHECK: swptal	x9, xzr, [sp]                   // encoding: [0xff,0x87,0xe9,0x59]
+// ERROR: instruction requires: lsui
+
+//------------------------------------------------------------------------------
+// LD{ADD|CLR|SET)(A|L|AL)T instructions
+//------------------------------------------------------------------------------
+
+  ldtadd     w7, wzr, [x5]
+// CHECK: ldtadd	w7, wzr, [x5]                   // encoding: [0xbf,0x04,0x27,0x19]
+// ERROR: instruction requires: lsui
+  ldtadd     x9, xzr, [sp]
+// CHECK: ldtadd	x9, xzr, [sp]                   // encoding: [0xff,0x07,0x29,0x59]
+// ERROR: instruction requires: lsui
+
+  ldtadda    w7, wzr, [x5]
+// CHECK: ldtadda	w7, wzr, [x5]                   // encoding: [0xbf,0x04,0xa7,0x19]
+// ERROR: instruction requires: lsui
+  ldtadda    x9, xzr, [sp]
+// CHECK: ldtadda	x9, xzr, [sp]                   // encoding: [0xff,0x07,0xa9,0x59]
+// ERROR: instruction requires: lsui
+
+  ldtaddl    w7, wzr, [x5]
+// CHECK: ldtaddl	w7, wzr, [x5]                   // encoding: [0xbf,0x04,0x67,0x19]
+// ERROR: instruction requires: lsui
+  ldtaddl    x9, xzr, [sp]
+// CHECK: ldtaddl	x9, xzr, [sp]                   // encoding: [0xff,0x07,0x69,0x59]
+// ERROR: instruction requires: lsui
+
+  ldtaddal   w7, wzr, [x5]
+// CHECK: ldtaddal	w7, wzr, [x5]                   // encoding: [0xbf,0x04,0xe7,0x19]
+// ERROR: instruction requires: lsui
+  ldtaddal   x9, xzr, [sp]
+// CHECK: ldtaddal	x9, xzr, [sp]                   // encoding: [0xff,0x07,0xe9,0x59]
+// ERROR: instruction requires: lsui
+
+  ldtclr     w7, wzr, [x5]
+// CHECK: ldtclr	w7, wzr, [x5]                   // encoding: [0xbf,0x14,0x27,0x19]
+// ERROR: instruction requires: lsui
+  ldtclr     x9, xzr, [sp]
+// CHECK: ldtclr	x9, xzr, [sp]                   // encoding: [0xff,0x17,0x29,0x59]
+// ERROR: instruction requires: lsui
+
+  ldtclrl    w7, wzr, [x5]
+// CHECK: ldtclrl	w7, wzr, [x5]                   // encoding: [0xbf,0x14,0x67,0x19]
+// ERROR: instruction requires: lsui
+  ldtclrl    x9, xzr, [sp]
+// CHECK: ldtclrl	x9, xzr, [sp]                   // encoding: [0xff,0x17,0x69,0x59]
+// ERROR: instruction requires: lsui
+
+  ldtclra    w7, wzr, [x5]
+// CHECK: ldtclra	w7, wzr, [x5]                   // encoding: [0xbf,0x14,0xa7,0x19]
+// ERROR: instruction requires: lsui
+  ldtclra    x9, xzr, [sp]
+// CHECK: ldtclra	x9, xzr, [sp]                   // encoding: [0xff,0x17,0xa9,0x59]
+// ERROR: instruction requires: lsui
+
+  ldtclral   w7, wzr, [x5]
+// CHECK: ldtclral	w7, wzr, [x5]                   // encoding: [0xbf,0x14,0xe7,0x19]
+// ERROR: instruction requires: lsui
+  ldtclral   x9, xzr, [sp]
+// CHECK: ldtclral	x9, xzr, [sp]                   // encoding: [0xff,0x17,0xe9,0x59]
+// ERROR: instruction requires: lsui
+
+  ldtset     w7, wzr, [x5]
+// CHECK: ldtset	w7, wzr, [x5]                   // encoding: [0xbf,0x34,0x27,0x19]
+// ERROR: instruction requires: lsui
+  ldtset     x9, xzr, [sp]
+// CHECK: ldtset	x9, xzr, [sp]                   // encoding: [0xff,0x37,0x29,0x59]
+// ERROR: instruction requires: lsui
+
+  ldtsetl    w7, wzr, [x5]
+// CHECK: ldtsetl	w7, wzr, [x5]                   // encoding: [0xbf,0x34,0x67,0x19]
+// ERROR: instruction requires: lsui
+  ldtsetl    x9, xzr, [sp]
+// CHECK: ldtsetl	x9, xzr, [sp]                   // encoding: [0xff,0x37,0x69,0x59]
+// ERROR: instruction requires: lsui
+
+  ldtseta    w7, wzr, [x5]
+// CHECK: ldtseta	w7, wzr, [x5]                   // encoding: [0xbf,0x34,0xa7,0x19]
+// ERROR: instruction requires: lsui
+  ldtseta    x9, xzr, [sp]
+// CHECK: ldtseta	x9, xzr, [sp]                   // encoding: [0xff,0x37,0xa9,0x59]
+// ERROR: instruction requires: lsui
+
+  ldtsetal   w7, wzr, [x5]
+// CHECK: ldtsetal	w7, wzr, [x5]                   // encoding: [0xbf,0x34,0xe7,0x19]
+// ERROR: instruction requires: lsui
+  ldtsetal   x9, xzr, [sp]
+// CHECK: ldtsetal	x9, xzr, [sp]                   // encoding: [0xff,0x37,0xe9,0x59]
+// ERROR: instruction requires: lsui
+
+//------------------------------------------------------------------------------
+// ST{ADD|CLR|SET)(A|L|AL)T instructions
+//------------------------------------------------------------------------------
+
+  sttadd     w0, [x2]
+// CHECK: ldtadd	w0, wzr, [x2]                   // encoding: [0x5f,0x04,0x20,0x19]
+// ERROR: instruction requires: lsui
+  sttadd     w2, [sp]
+// CHECK: ldtadd	w2, wzr, [sp]                   // encoding: [0xff,0x07,0x22,0x19]
+// ERROR: instruction requires: lsui
+  sttadd     x0, [x2]
+// CHECK: ldtadd	x0, xzr, [x2]                   // encoding: [0x5f,0x04,0x20,0x59]
+// ERROR: instruction requires: lsui
+  sttadd     x2, [sp]
+// CHECK: ldtadd	x2, xzr, [sp]                   // encoding: [0xff,0x07,0x22,0x59]
+// ERROR: instruction requires: lsui
+
+  sttaddl    w0, [x2]
+// CHECK: ldtadd	w0, wzr, [x2]                   // encoding: [0x5f,0x04,0x20,0x19]
+// ERROR: instruction requires: lsui
+  sttaddl    w2, [sp]
+// CHECK: ldtadd	w2, wzr, [sp]                   // encoding: [0xff,0x07,0x22,0x19]
+// ERROR: instruction requires: lsui
+  sttaddl    x0, [x2]
+// CHECK: ldtadd	x0, xzr, [x2]                   // encoding: [0x5f,0x04,0x20,0x59]
+// ERROR: instruction requires: lsui
+  sttaddl    x2, [sp]
+// CHECK: ldtadd	x2, xzr, [sp]                   // encoding: [0xff,0x07,0x22,0x59]
+// ERROR: instruction requires: lsui
+
+  sttadda    w0, [x2]
+// CHECK: ldtadd	w0, wzr, [x2]                   // encoding: [0x5f,0x04,0x20,0x19]
+// ERROR: instruction requires: lsui
+  sttadda    w2, [sp]
+// CHECK: ldtadd	w2, wzr, [sp]                   // encoding: [0xff,0x07,0x22,0x19]
+// ERROR: instruction requires: lsui
+  sttadda    x0, [x2]
+// CHECK: ldtadd	x0, xzr, [x2]                   // encoding: [0x5f,0x04,0x20,0x59]
+// ERROR: instruction requires: lsui
+  sttadda    x2, [sp]
+// CHECK: ldtadd	x2, xzr, [sp]                   // encoding: [0xff,0x07,0x22,0x59]
+// ERROR: instruction requires: lsui
+
+  sttaddal   w0, [x2]
+// CHECK: ldtadd	w0, wzr, [x2]                   // encoding: [0x5f,0x04,0x20,0x19]
+// ERROR: instruction requires: lsui
+  sttaddal   w2, [sp]
+// CHECK: ldtadd	w2, wzr, [sp]                   // encoding: [0xff,0x07,0x22,0x19]
+// ERROR: instruction requires: lsui
+  sttaddal   x0, [x2]
+// CHECK: ldtadd	x0, xzr, [x2]                   // encoding: [0x5f,0x04,0x20,0x59]
+// ERROR: instruction requires: lsui
+  sttaddal   x2, [sp]
+// CHECK: ldtadd	x2, xzr, [sp]                   // encoding: [0xff,0x07,0x22,0x59]
+// ERROR: instruction requires: lsui
+
+  sttclr     w0, [x2]
+// CHECK: ldtclr	w0, wzr, [x2]                   // encoding: [0x5f,0x14,0x20,0x19]
+// ERROR: instruction requires: lsui
+  sttclr     w2, [sp]
+// CHECK: ldtclr	w2, wzr, [sp]                   // encoding: [0xff,0x17,0x22,0x19]
+// ERROR: instruction requires: lsui
+  sttclr     x0, [x2]
+// CHECK: ldtclr	x0, xzr, [x2]                   // encoding: [0x5f,0x14,0x20,0x59]
+// ERROR: instruction requires: lsui
+  sttclr     x2, [sp]
+// CHECK: ldtclr	x2, xzr, [sp]                   // encoding: [0xff,0x17,0x22,0x59]
+// ERROR: instruction requires: lsui
+
+  sttclra    w0, [x2]
+// CHECK: ldtclr	w0, wzr, [x2]                   // encoding: [0x5f,0x14,0x20,0x19]
+// ERROR: instruction requires: lsui
+  sttclra    w2, [sp]
+// CHECK: ldtclr	w2, wzr, [sp]                   // encoding: [0xff,0x17,0x22,0x19]
+// ERROR: instruction requires: lsui
+  sttclra    x0, [x2]
+// CHECK: ldtclr	x0, xzr, [x2]                   // encoding: [0x5f,0x14,0x20,0x59]
+// ERROR: instruction requires: lsui
+  sttclra    x2, [sp]
+// CHECK: ldtclr	x2, xzr, [sp]                   // encoding: [0xff,0x17,0x22,0x59]
+// ERROR: instruction requires: lsui
+
+  sttclrl    w0, [x2]
+// CHECK: ldtclr	w0, wzr, [x2]                   // encoding: [0x5f,0x14,0x20,0x19]
+// ERROR: instruction requires: lsui
+  sttclrl    w2, [sp]
+// CHECK: ldtclr	w2, wzr, [sp]                   // encoding: [0xff,0x17,0x22,0x19]
+// ERROR: instruction requires: lsui
+  sttclrl    x0, [x2]
+// CHECK: ldtclr	x0, xzr, [x2]                   // encoding: [0x5f,0x14,0x20,0x59]
+// ERROR: instruction requires: lsui
+  sttclrl    x2, [sp]
+// CHECK: ldtclr	x2, xzr, [sp]                   // encoding: [0xff,0x17,0x22,0x59]
+// ERROR: instruction requires: lsui
+
+  sttclral   w0, [x2]
+// CHECK: ldtclr	w0, wzr, [x2]                   // encoding: [0x5f,0x14,0x20,0x19]
+// ERROR: instruction requires: lsui
+  sttclral   x2, [sp]
+// CHECK: ldtclr	x2, xzr, [sp]                   // encoding: [0xff,0x17,0x22,0x59]
+// ERROR: instruction requires: lsui
+  sttclral   x0, [x2]
+// CHECK: ldtclr	x0, xzr, [x2]                   // encoding: [0x5f,0x14,0x20,0x59]
+// ERROR: instruction requires: lsui
+  sttclral   x2, [sp]
+// CHECK: ldtclr	x2, xzr, [sp]                   // encoding: [0xff,0x17,0x22,0x59]
+// ERROR: instruction requires: lsui
+
+  sttset     w0, [x2]
+// CHECK: ldtset	w0, wzr, [x2]                   // encoding: [0x5f,0x34,0x20,0x19]
+// ERROR: instruction requires: lsui
+  sttset     w2, [sp]
+// CHECK: ldtset	w2, wzr, [sp]                   // encoding: [0xff,0x37,0x22,0x19]
+// ERROR: instruction requires: lsui
+  sttset     x0, [x2]
+// CHECK: ldtset	x0, xzr, [x2]                   // encoding: [0x5f,0x34,0x20,0x59]
+// ERROR: instruction requires: lsui
+  sttset     x2, [sp]
+// CHECK: ldtset	x2, xzr, [sp]                   // encoding: [0xff,0x37,0x22,0x59]
+// ERROR: instruction requires: lsui
+
+  sttseta    w0, [x2]
+// CHECK: ldtset	w0, wzr, [x2]                   // encoding: [0x5f,0x34,0x20,0x19]
+// ERROR: instruction requires: lsui
+  sttseta    w2, [sp]
+// CHECK: ldtset	w2, wzr, [sp]                   // encoding: [0xff,0x37,0x22,0x19]
+// ERROR: instruction requires: lsui
+  sttseta    x0, [x2]
+// CHECK: ldtset	x0, xzr, [x2]                   // encoding: [0x5f,0x34,0x20,0x59]
+// ERROR: instruction requires: lsui
+  sttseta    x2, [sp]
+// CHECK: ldtset	x2, xzr, [sp]                   // encoding: [0xff,0x37,0x22,0x59]
+// ERROR: instruction requires: lsui
+
+  sttsetl    w0, [x2]
+// CHECK: ldtset	w0, wzr, [x2]                   // encoding: [0x5f,0x34,0x20,0x19]
+// ERROR: instruction requires: lsui
+  sttsetl    w2, [sp]
+// CHECK: ldtset	w2, wzr, [sp]                   // encoding: [0xff,0x37,0x22,0x19]
+// ERROR: instruction requires: lsui
+  sttsetl    x0, [x2]
+// CHECK: ldtset	x0, xzr, [x2]                   // encoding: [0x5f,0x34,0x20,0x59]
+// ERROR: instruction requires: lsui
+  sttsetl    x2, [sp]
+// CHECK: ldtset	x2, xzr, [sp]                   // encoding: [0xff,0x37,0x22,0x59]
+// ERROR: instruction requires: lsui
+
+  sttsetal   w0, [x2]
+// CHECK: ldtset	w0, wzr, [x2]                   // encoding: [0x5f,0x34,0x20,0x19]
+// ERROR: instruction requires: lsui
+  sttsetal   x2, [sp]
+// CHECK: ldtset	x2, xzr, [sp]                   // encoding: [0xff,0x37,0x22,0x59]
+// ERROR: instruction requires: lsui
+  sttsetal   x0, [x2]
+// CHECK: ldtset	x0, xzr, [x2]                   // encoding: [0x5f,0x34,0x20,0x59]
+// ERROR: instruction requires: lsui
+  sttsetal   x2, [sp]
+// CHECK: ldtset	x2, xzr, [sp]                   // encoding: [0xff,0x37,0x22,0x59]
+// ERROR: instruction requires: lsui
+
+//------------------------------------------------------------------------------
+// Load/store non-temporal register pair (offset)
+//------------------------------------------------------------------------------
+  ldtnp      x21, x29, [x2, #504]
+// CHECK: ldtnp	x21, x29, [x2, #504]            // encoding: [0x55,0xf4,0x5f,0xe8]
+// ERROR: instruction requires: lsui
+  ldtnp      x22, x23, [x3, #-512]
+// CHECK: ldtnp	x22, x23, [x3, #-512]           // encoding: [0x76,0x5c,0x60,0xe8]
+// ERROR: instruction requires: lsui
+  ldtnp      x24, x25, [x4, #8]
+// CHECK: ldtnp	x24, x25, [x4, #8]              // encoding: [0x98,0xe4,0x40,0xe8]
+// ERROR: instruction requires: lsui
+  ldtnp      q23, q29, [x1, #-1024]
+// CHECK: ldtnp	q23, q29, [x1, #-1024]          // encoding: [0x37,0x74,0x60,0xec]
+// ERROR: instruction requires: lsui
+
+  sttnp      x3, x5, [sp]
+// CHECK: sttnp	x3, x5, [sp]                    // encoding: [0xe3,0x17,0x00,0xe8]
+// ERROR: instruction requires: lsui
+  sttnp      x17, x19, [sp, #64]
+// CHECK: sttnp	x17, x19, [sp, #64]             // encoding: [0xf1,0x4f,0x04,0xe8]
+// ERROR: instruction requires: lsui
+  sttnp      q3, q5, [sp]
+// CHECK: sttnp	q3, q5, [sp]                    // encoding: [0xe3,0x17,0x00,0xec]
+// ERROR: instruction requires: lsui
+  sttnp      q17, q19, [sp, #1008]
+// CHECK: sttnp	q17, q19, [sp, #1008]           // encoding: [0xf1,0xcf,0x1f,0xec]
+// ERROR: instruction requires: lsui
+

diff  --git a/llvm/test/MC/AArch64/armv9.6a-occmo.s b/llvm/test/MC/AArch64/armv9.6a-occmo.s
new file mode 100644
index 00000000000000..d6548f98645a5f
--- /dev/null
+++ b/llvm/test/MC/AArch64/armv9.6a-occmo.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+occmo -mattr=+mte %s | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding %s -mattr=+mte 2>&1 | FileCheck --check-prefix=ERROR %s
+.func:
+// CHECK: .func:
+  dc civaoc, x12
+// CHECK: dc	civaoc, x12                     // encoding: [0x0c,0x7f,0x0b,0xd5]
+// ERROR: error: DC CIVAOC requires: occmo
+  dc cigdvaoc, x0
+// CHECK: dc	cigdvaoc, x0                    // encoding: [0xe0,0x7f,0x0b,0xd5]
+// ERROR: error: DC CIGDVAOC requires: mte, memtag, occmo
+  dc cvaoc, x13
+// CHECK: dc	cvaoc, x13                      // encoding: [0x0d,0x7b,0x0b,0xd5]
+// ERROR: error: DC CVAOC requires: occmo
+  dc cgdvaoc, x1
+// CHECK: dc	cgdvaoc, x1                     // encoding: [0xe1,0x7b,0x0b,0xd5]
+// ERROR: error: DC CGDVAOC requires: mte, memtag, occmo
+

diff  --git a/llvm/test/MC/AArch64/armv9.6a-pcdphint.s b/llvm/test/MC/AArch64/armv9.6a-pcdphint.s
new file mode 100644
index 00000000000000..6314e534318c46
--- /dev/null
+++ b/llvm/test/MC/AArch64/armv9.6a-pcdphint.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+pcdphint %s | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding %s 2>&1 | FileCheck --check-prefix=ERROR %s
+
+.func:
+// CHECK: .func:
+  stshh keep
+// CHECK: stshh	keep                            // encoding: [0x1f,0x96,0x01,0xd5]
+// ERROR: error: instruction requires: pcdphint
+  stshh strm
+// CHECK: stshh	strm                            // encoding: [0x3f,0x96,0x01,0xd5]
+// ERROR: error: instruction requires: pcdphint
+
+

diff  --git a/llvm/test/MC/AArch64/armv9.6a-rme-gpc3.s b/llvm/test/MC/AArch64/armv9.6a-rme-gpc3.s
new file mode 100644
index 00000000000000..c6387ea2ef2abd
--- /dev/null
+++ b/llvm/test/MC/AArch64/armv9.6a-rme-gpc3.s
@@ -0,0 +1,12 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+// RUN: llvm-mc -triple aarch64 -show-encoding %s  | FileCheck %s
+.func:
+  apas x0
+  mrs x3, GPCBW_EL3
+  msr GPCBW_EL3, x4
+
+# CHECK:      	.text
+# CHECK-NEXT: .func:
+# CHECK-NEXT: 	apas	x0                              // encoding: [0x1f,0x70,0x0e,0xd5]
+# CHECK-NEXT: 	mrs	x3, GPCBW_EL3                   // encoding: [0xa3,0x21,0x3e,0xd5]
+# CHECK-NEXT: 	msr	GPCBW_EL3, x4                   // encoding: [0xa4,0x21,0x1e,0xd5]

diff  --git a/llvm/test/MC/AArch64/armv9.6a-srmask.s b/llvm/test/MC/AArch64/armv9.6a-srmask.s
new file mode 100644
index 00000000000000..40f0e98494d4c1
--- /dev/null
+++ b/llvm/test/MC/AArch64/armv9.6a-srmask.s
@@ -0,0 +1,102 @@
+// RUN: llvm-mc -triple aarch64 -show-encoding %s   | FileCheck %s
+
+mrs x3, SCTLRMASK_EL1
+// CHECK: mrs	x3, SCTLRMASK_EL1               // encoding: [0x03,0x14,0x38,0xd5]
+mrs x3, SCTLRMASK_EL2
+// CHECK: mrs	x3, SCTLRMASK_EL2               // encoding: [0x03,0x14,0x3c,0xd5]
+mrs x3, SCTLRMASK_EL12
+// CHECK: mrs	x3, SCTLRMASK_EL12              // encoding: [0x03,0x14,0x3d,0xd5]
+mrs x3, CPACRMASK_EL1
+// CHECK: mrs	x3, CPACRMASK_EL1               // encoding: [0x43,0x14,0x38,0xd5]
+mrs x3, CPTRMASK_EL2
+// CHECK: mrs	x3, CPTRMASK_EL2                // encoding: [0x43,0x14,0x3c,0xd5]
+mrs x3, CPACRMASK_EL12
+// CHECK: mrs	x3, CPACRMASK_EL12              // encoding: [0x43,0x14,0x3d,0xd5]
+mrs x3, SCTLR2MASK_EL1
+// CHECK: mrs	x3, SCTLR2MASK_EL1              // encoding: [0x63,0x14,0x38,0xd5]
+mrs x3, SCTLR2MASK_EL2
+// CHECK: mrs	x3, SCTLR2MASK_EL2              // encoding: [0x63,0x14,0x3c,0xd5]
+mrs x3, SCTLR2MASK_EL12
+// CHECK: mrs	x3, SCTLR2MASK_EL12             // encoding: [0x63,0x14,0x3d,0xd5]
+mrs x3, CPACRALIAS_EL1
+// CHECK: mrs	x3, CPACRALIAS_EL1              // encoding: [0x83,0x14,0x38,0xd5]
+mrs x3, SCTLRALIAS_EL1
+// CHECK: mrs	x3, SCTLRALIAS_EL1              // encoding: [0xc3,0x14,0x38,0xd5]
+mrs x3, SCTLR2ALIAS_EL1
+// CHECK: mrs	x3, SCTLR2ALIAS_EL1             // encoding: [0xe3,0x14,0x38,0xd5]
+mrs x3, TCRMASK_EL1
+// CHECK: mrs	x3, TCRMASK_EL1                 // encoding: [0x43,0x27,0x38,0xd5]
+mrs x3, TCRMASK_EL2
+// CHECK: mrs	x3, TCRMASK_EL2                 // encoding: [0x43,0x27,0x3c,0xd5]
+mrs x3, TCRMASK_EL12
+// CHECK: mrs	x3, TCRMASK_EL12                // encoding: [0x43,0x27,0x3d,0xd5]
+mrs x3, TCR2MASK_EL1
+// CHECK: mrs	x3, TCR2MASK_EL1                // encoding: [0x63,0x27,0x38,0xd5]
+mrs x3, TCR2MASK_EL2
+// CHECK: mrs	x3, TCR2MASK_EL2                // encoding: [0x63,0x27,0x3c,0xd5]
+mrs x3, TCR2MASK_EL12
+// CHECK: mrs	x3, TCR2MASK_EL12               // encoding: [0x63,0x27,0x3d,0xd5]
+mrs x3, TCRALIAS_EL1
+// CHECK: mrs	x3, TCRALIAS_EL1                // encoding: [0xc3,0x27,0x38,0xd5]
+mrs x3, TCR2ALIAS_EL1
+// CHECK: mrs	x3, TCR2ALIAS_EL1               // encoding: [0xe3,0x27,0x38,0xd5]
+mrs x3, ACTLRMASK_EL1
+// CHECK: mrs	x3, ACTLRMASK_EL1               // encoding: [0x23,0x14,0x38,0xd5]
+mrs x3, ACTLRMASK_EL2
+// CHECK: mrs	x3, ACTLRMASK_EL2               // encoding: [0x23,0x14,0x3c,0xd5]
+mrs x3, ACTLRMASK_EL12
+// CHECK: mrs	x3, ACTLRMASK_EL12              // encoding: [0x23,0x14,0x3d,0xd5]
+mrs x3, ACTLRALIAS_EL1
+// CHECK: mrs	x3, ACTLRALIAS_EL1              // encoding: [0xa3,0x14,0x38,0xd5]
+
+msr SCTLRMASK_EL1, x3
+// CHECK: msr	SCTLRMASK_EL1, x3               // encoding: [0x03,0x14,0x18,0xd5]
+msr SCTLRMASK_EL2, x3
+// CHECK: msr	SCTLRMASK_EL2, x3               // encoding: [0x03,0x14,0x1c,0xd5]
+msr SCTLRMASK_EL12, x3
+// CHECK: msr	SCTLRMASK_EL12, x3              // encoding: [0x03,0x14,0x1d,0xd5]
+msr CPACRMASK_EL1, x3
+// CHECK: msr	CPACRMASK_EL1, x3               // encoding: [0x43,0x14,0x18,0xd5]
+msr CPTRMASK_EL2, x3
+// CHECK: msr	CPTRMASK_EL2, x3                // encoding: [0x43,0x14,0x1c,0xd5]
+msr CPACRMASK_EL12, x3
+// CHECK: msr	CPACRMASK_EL12, x3              // encoding: [0x43,0x14,0x1d,0xd5]
+msr SCTLR2MASK_EL1, x3
+// CHECK: msr	SCTLR2MASK_EL1, x3              // encoding: [0x63,0x14,0x18,0xd5]
+msr SCTLR2MASK_EL2, x3
+// CHECK: msr	SCTLR2MASK_EL2, x3              // encoding: [0x63,0x14,0x1c,0xd5]
+msr SCTLR2MASK_EL12, x3
+// CHECK: msr	SCTLR2MASK_EL12, x3             // encoding: [0x63,0x14,0x1d,0xd5]
+msr CPACRALIAS_EL1, x3
+// CHECK: msr	CPACRALIAS_EL1, x3              // encoding: [0x83,0x14,0x18,0xd5]
+msr SCTLRALIAS_EL1, x3
+// CHECK: msr	SCTLRALIAS_EL1, x3              // encoding: [0xc3,0x14,0x18,0xd5]
+msr SCTLR2ALIAS_EL1, x3
+// CHECK: msr	SCTLR2ALIAS_EL1, x3             // encoding: [0xe3,0x14,0x18,0xd5]
+msr TCRMASK_EL1, x3
+// CHECK: msr	TCRMASK_EL1, x3                 // encoding: [0x43,0x27,0x18,0xd5]
+msr TCRMASK_EL2, x3
+// CHECK: msr	TCRMASK_EL2, x3                 // encoding: [0x43,0x27,0x1c,0xd5]
+msr TCRMASK_EL12, x3
+// CHECK: msr	TCRMASK_EL12, x3                // encoding: [0x43,0x27,0x1d,0xd5]
+msr TCR2MASK_EL1, x3
+// CHECK: msr	TCR2MASK_EL1, x3                // encoding: [0x63,0x27,0x18,0xd5]
+msr TCR2MASK_EL2, x3
+// CHECK: msr	TCR2MASK_EL2, x3                // encoding: [0x63,0x27,0x1c,0xd5]
+msr TCR2MASK_EL12, x3
+// CHECK: msr	TCR2MASK_EL12, x3               // encoding: [0x63,0x27,0x1d,0xd5]
+msr TCRALIAS_EL1, x3
+// CHECK: msr	TCRALIAS_EL1, x3                // encoding: [0xc3,0x27,0x18,0xd5]
+msr TCR2ALIAS_EL1, x3
+// CHECK: msr	TCR2ALIAS_EL1, x3               // encoding: [0xe3,0x27,0x18,0xd5]
+msr ACTLRMASK_EL1, x3
+// CHECK: msr	ACTLRMASK_EL1, x3               // encoding: [0x23,0x14,0x18,0xd5]
+msr ACTLRMASK_EL2, x3
+// CHECK: msr	ACTLRMASK_EL2, x3               // encoding: [0x23,0x14,0x1c,0xd5]
+msr ACTLRMASK_EL12, x3
+// CHECK: msr	ACTLRMASK_EL12, x3              // encoding: [0x23,0x14,0x1d,0xd5]
+msr ACTLRALIAS_EL1, x3
+// CHECK: msr	ACTLRALIAS_EL1, x3              // encoding: [0xa3,0x14,0x18,0xd5]
+
+
+

diff  --git a/llvm/test/MC/Disassembler/AArch64/armv9.6a-lsui.txt b/llvm/test/MC/Disassembler/AArch64/armv9.6a-lsui.txt
new file mode 100644
index 00000000000000..7073ade6a309b6
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AArch64/armv9.6a-lsui.txt
@@ -0,0 +1,324 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mc -triple aarch64 -mattr=+lsui -disassemble %s  | FileCheck %s
+
+# LDTXR and STTXR
+[0xe9,0x7f,0x5f,0xc9]
+[0xe9,0x7f,0x5f,0xc9]
+[0x6a,0x7d,0x5f,0xc9]
+[0x6a,0x7d,0x5f,0xc9]
+
+[0xe4,0x7f,0x1f,0x89]
+[0xe4,0x7f,0x1f,0x89]
+[0xe6,0x7c,0x05,0xc9]
+[0xe6,0x7c,0x05,0xc9]
+
+# LDATXR and STLTXR
+[0xe9,0xff,0x5f,0xc9]
+[0x6a,0xfd,0x5f,0xc9]
+
+[0xe4,0xff,0x02,0x89]
+[0xe6,0xfc,0x05,0xc9]
+
+# STTP and LDTP
+[0x55,0xf4,0x5f,0xe9]
+[0x76,0x5c,0x60,0xe9]
+[0x98,0xe4,0x40,0xe9]
+
+[0xe3,0x17,0x81,0xe8]
+[0xe3,0x97,0x80,0xe9]
+[0xe3,0x17,0x00,0xed]
+[0xf1,0xcf,0x1f,0xed]
+
+[0x55,0xf4,0xdf,0xe8]
+[0x76,0x5c,0xe0,0xe8]
+[0x98,0xe4,0xc0,0xe8]
+
+[0xe3,0x17,0x80,0xec]
+[0xf1,0xcf,0x9f,0xec]
+[0x37,0x74,0xe0,0xec]
+
+[0x55,0xf4,0xdf,0xe9]
+[0x76,0x5c,0xe0,0xe9]
+[0x98,0xe4,0xc0,0xe9]
+
+[0xe3,0x17,0x80,0xed]
+[0xf1,0xcf,0x9f,0xed]
+[0x37,0x74,0xe0,0xed]
+
+[0x55,0xf4,0x5f,0xe8]
+[0x76,0x5c,0x60,0xe8]
+[0x98,0xe4,0x40,0xe8]
+[0x37,0x74,0x60,0xec]
+
+[0xe3,0x17,0x00,0xe8]
+[0xf1,0x4f,0x04,0xe8]
+[0xe3,0x17,0x00,0xec]
+[0xf1,0xcf,0x1f,0xec]
+
+# SWPT{A|L}
+[0xbf,0x84,0x27,0x19]
+[0xff,0x87,0x29,0x59]
+
+[0xbf,0x84,0xa7,0x19]
+[0xff,0x87,0xa9,0x59]
+
+[0xbf,0x84,0x67,0x19]
+[0xff,0x87,0x69,0x59]
+
+[0xbf,0x84,0xe7,0x19]
+[0xff,0x87,0xe9,0x59]
+
+# CAS{A|L}T
+[0x41,0x7c,0x80,0xc9]
+[0xe1,0x7f,0x80,0xc9]
+[0x41,0x7c,0xc0,0xc9]
+[0xe1,0x7f,0xc0,0xc9]
+[0x41,0xfc,0xc0,0xc9]
+[0xe1,0xff,0xc0,0xc9]
+[0x41,0xfc,0x80,0xc9]
+[0xe1,0xff,0x80,0xc9]
+
+# CASP{A|L}T
+[0x82,0x7c,0x80,0x49]
+[0xe2,0x7f,0x80,0x49]
+[0x82,0x7c,0xc0,0x49]
+[0xe2,0x7f,0xc0,0x49]
+[0x82,0xfc,0x80,0x49]
+[0xe2,0xff,0x80,0x49]
+[0x82,0xfc,0xc0,0x49]
+[0xe2,0xff,0xc0,0x49]
+
+#LDT{SET|ADD|CLR}{A|L} and STT{ADD|SET|CLR}{L}
+
+[0xbf,0x04,0x27,0x19]
+[0xff,0x07,0x29,0x59]
+
+[0xbf,0x04,0xa7,0x19]
+[0xff,0x07,0xa9,0x59]
+
+[0xbf,0x04,0x67,0x19]
+[0xff,0x07,0x69,0x59]
+
+[0xbf,0x04,0xe7,0x19]
+[0xff,0x07,0xe9,0x59]
+
+[0xbf,0x14,0x27,0x19]
+[0xff,0x17,0x29,0x59]
+
+[0xbf,0x14,0x67,0x19]
+[0xff,0x17,0x69,0x59]
+
+[0xbf,0x14,0xa7,0x19]
+[0xff,0x17,0xa9,0x59]
+
+[0xbf,0x14,0xe7,0x19]
+[0xff,0x17,0xe9,0x59]
+
+[0xbf,0x34,0x27,0x19]
+[0xff,0x37,0x29,0x59]
+
+[0xbf,0x34,0x67,0x19]
+[0xff,0x37,0x69,0x59]
+
+[0xbf,0x34,0xa7,0x19]
+[0xff,0x37,0xa9,0x59]
+
+[0xbf,0x34,0xe7,0x19]
+[0xff,0x37,0xe9,0x59]
+
+[0x5f,0x04,0x20,0x19]
+[0xff,0x07,0x22,0x19]
+[0x5f,0x04,0x20,0x59]
+[0xff,0x07,0x22,0x59]
+
+[0x5f,0x04,0x20,0x19]
+[0xff,0x07,0x22,0x19]
+[0x5f,0x04,0x20,0x59]
+[0xff,0x07,0x22,0x59]
+
+[0x5f,0x04,0x20,0x19]
+[0xff,0x07,0x22,0x19]
+[0x5f,0x04,0x20,0x59]
+[0xff,0x07,0x22,0x59]
+
+[0x5f,0x04,0x20,0x19]
+[0xff,0x07,0x22,0x19]
+[0x5f,0x04,0x20,0x59]
+[0xff,0x07,0x22,0x59]
+
+[0x5f,0x14,0x20,0x19]
+[0xff,0x17,0x22,0x19]
+[0x5f,0x14,0x20,0x59]
+[0xff,0x17,0x22,0x59]
+
+[0x5f,0x14,0x20,0x19]
+[0xff,0x17,0x22,0x19]
+[0x5f,0x14,0x20,0x59]
+[0xff,0x17,0x22,0x59]
+
+[0x5f,0x14,0x20,0x19]
+[0xff,0x17,0x22,0x19]
+[0x5f,0x14,0x20,0x59]
+[0xff,0x17,0x22,0x59]
+
+[0x5f,0x14,0x20,0x19]
+[0xff,0x17,0x22,0x59]
+[0x5f,0x14,0x20,0x59]
+[0xff,0x17,0x22,0x59]
+
+[0x5f,0x34,0x20,0x19]
+[0xff,0x37,0x22,0x19]
+[0x5f,0x34,0x20,0x59]
+[0xff,0x37,0x22,0x59]
+
+[0x5f,0x34,0x20,0x19]
+[0xff,0x37,0x22,0x19]
+[0x5f,0x34,0x20,0x59]
+[0xff,0x37,0x22,0x59]
+
+[0x5f,0x34,0x20,0x19]
+[0xff,0x37,0x22,0x19]
+[0x5f,0x34,0x20,0x59]
+[0xff,0x37,0x22,0x59]
+
+[0x5f,0x34,0x20,0x19]
+[0xff,0x37,0x22,0x59]
+[0x5f,0x34,0x20,0x59]
+[0xff,0x37,0x22,0x59]
+
+# CHECK:      	.text
+# CHECK-NEXT: 	ldtxr	x9, [sp]
+# CHECK-NEXT: 	ldtxr	x9, [sp]
+# CHECK-NEXT: 	ldtxr	x10, [x11]
+# CHECK-NEXT: 	ldtxr	x10, [x11]
+# CHECK-NEXT: 	sttxr	wzr, w4, [sp]
+# CHECK-NEXT: 	sttxr	wzr, w4, [sp]
+# CHECK-NEXT: 	sttxr	w5, x6, [x7]
+# CHECK-NEXT: 	sttxr	w5, x6, [x7]
+# CHECK-NEXT: 	ldatxr	x9, [sp]
+# CHECK-NEXT: 	ldatxr	x10, [x11]
+# CHECK-NEXT: 	stltxr	w2, w4, [sp]
+# CHECK-NEXT: 	stltxr	w5, x6, [x7]
+# CHECK-NEXT: 	ldtp	x21, x29, [x2, #504]
+# CHECK-NEXT: 	ldtp	x22, x23, [x3, #-512]
+# CHECK-NEXT: 	ldtp	x24, x25, [x4, #8]
+# CHECK-NEXT:	sttp	x3, x5, [sp], #16
+# CHECK-NEXT:	sttp	x3, x5, [sp, #8]!
+# CHECK-NEXT: 	sttp	q3, q5, [sp]
+# CHECK-NEXT: 	sttp	q17, q19, [sp, #1008]
+# CHECK-NEXT: 	ldtp	x21, x29, [x2], #504
+# CHECK-NEXT: 	ldtp	x22, x23, [x3], #-512
+# CHECK-NEXT: 	ldtp	x24, x25, [x4], #8
+# CHECK-NEXT: 	sttp	q3, q5, [sp], #0
+# CHECK-NEXT: 	sttp	q17, q19, [sp], #1008
+# CHECK-NEXT: 	ldtp	q23, q29, [x1], #-1024
+# CHECK-NEXT: 	ldtp	x21, x29, [x2, #504]!
+# CHECK-NEXT: 	ldtp	x22, x23, [x3, #-512]!
+# CHECK-NEXT: 	ldtp	x24, x25, [x4, #8]!
+# CHECK-NEXT: 	sttp	q3, q5, [sp, #0]!
+# CHECK-NEXT: 	sttp	q17, q19, [sp, #1008]!
+# CHECK-NEXT: 	ldtp	q23, q29, [x1, #-1024]!
+# CHECK-NEXT: 	ldtnp	x21, x29, [x2, #504]
+# CHECK-NEXT: 	ldtnp	x22, x23, [x3, #-512]
+# CHECK-NEXT: 	ldtnp	x24, x25, [x4, #8]
+# CHECK-NEXT: 	ldtnp	q23, q29, [x1, #-1024]
+# CHECK-NEXT: 	sttnp	x3, x5, [sp]
+# CHECK-NEXT: 	sttnp	x17, x19, [sp, #64]
+# CHECK-NEXT: 	sttnp	q3, q5, [sp]
+# CHECK-NEXT: 	sttnp	q17, q19, [sp, #1008]
+# CHECK-NEXT: 	swpt	w7, wzr, [x5]
+# CHECK-NEXT: 	swpt	x9, xzr, [sp]
+# CHECK-NEXT: 	swpta	w7, wzr, [x5]
+# CHECK-NEXT: 	swpta	x9, xzr, [sp]
+# CHECK-NEXT: 	swptl	w7, wzr, [x5]
+# CHECK-NEXT: 	swptl	x9, xzr, [sp]
+# CHECK-NEXT: 	swptal	w7, wzr, [x5]
+# CHECK-NEXT: 	swptal	x9, xzr, [sp]
+# CHECK-NEXT: 	cast	x0, x1, [x2]
+# CHECK-NEXT: 	cast	x0, x1, [sp]
+# CHECK-NEXT: 	casat	x0, x1, [x2]
+# CHECK-NEXT: 	casat	x0, x1, [sp]
+# CHECK-NEXT: 	casalt	x0, x1, [x2]
+# CHECK-NEXT: 	casalt	x0, x1, [sp]
+# CHECK-NEXT: 	caslt	x0, x1, [x2]
+# CHECK-NEXT: 	caslt	x0, x1, [sp]
+# CHECK-NEXT: 	caspt	x0, x1, x2, x3, [x4]
+# CHECK-NEXT: 	caspt	x0, x1, x2, x3, [sp]
+# CHECK-NEXT: 	caspat	x0, x1, x2, x3, [x4]
+# CHECK-NEXT: 	caspat	x0, x1, x2, x3, [sp]
+# CHECK-NEXT: 	casplt	x0, x1, x2, x3, [x4]
+# CHECK-NEXT: 	casplt	x0, x1, x2, x3, [sp]
+# CHECK-NEXT: 	caspalt	x0, x1, x2, x3, [x4]
+# CHECK-NEXT: 	caspalt	x0, x1, x2, x3, [sp]
+# CHECK-NEXT: 	ldtadd	w7, wzr, [x5]
+# CHECK-NEXT: 	ldtadd	x9, xzr, [sp]
+# CHECK-NEXT: 	ldtadda	w7, wzr, [x5]
+# CHECK-NEXT: 	ldtadda	x9, xzr, [sp]
+# CHECK-NEXT: 	ldtaddl	w7, wzr, [x5]
+# CHECK-NEXT: 	ldtaddl	x9, xzr, [sp]
+# CHECK-NEXT: 	ldtaddal	w7, wzr, [x5]
+# CHECK-NEXT: 	ldtaddal	x9, xzr, [sp]
+# CHECK-NEXT: 	ldtclr	w7, wzr, [x5]
+# CHECK-NEXT: 	ldtclr	x9, xzr, [sp]
+# CHECK-NEXT: 	ldtclrl	w7, wzr, [x5]
+# CHECK-NEXT: 	ldtclrl	x9, xzr, [sp]
+# CHECK-NEXT: 	ldtclra	w7, wzr, [x5]
+# CHECK-NEXT: 	ldtclra	x9, xzr, [sp]
+# CHECK-NEXT: 	ldtclral	w7, wzr, [x5]
+# CHECK-NEXT: 	ldtclral	x9, xzr, [sp]
+# CHECK-NEXT: 	ldtset	w7, wzr, [x5]
+# CHECK-NEXT: 	ldtset	x9, xzr, [sp]
+# CHECK-NEXT: 	ldtsetl	w7, wzr, [x5]
+# CHECK-NEXT: 	ldtsetl	x9, xzr, [sp]
+# CHECK-NEXT: 	ldtseta	w7, wzr, [x5]
+# CHECK-NEXT: 	ldtseta	x9, xzr, [sp]
+# CHECK-NEXT: 	ldtsetal	w7, wzr, [x5]
+# CHECK-NEXT: 	ldtsetal	x9, xzr, [sp]
+# CHECK-NEXT: 	ldtadd	w0, wzr, [x2]
+# CHECK-NEXT: 	ldtadd	w2, wzr, [sp]
+# CHECK-NEXT: 	ldtadd	x0, xzr, [x2]
+# CHECK-NEXT: 	ldtadd	x2, xzr, [sp]
+# CHECK-NEXT: 	ldtadd	w0, wzr, [x2]
+# CHECK-NEXT: 	ldtadd	w2, wzr, [sp]
+# CHECK-NEXT: 	ldtadd	x0, xzr, [x2]
+# CHECK-NEXT: 	ldtadd	x2, xzr, [sp]
+# CHECK-NEXT: 	ldtadd	w0, wzr, [x2]
+# CHECK-NEXT: 	ldtadd	w2, wzr, [sp]
+# CHECK-NEXT: 	ldtadd	x0, xzr, [x2]
+# CHECK-NEXT: 	ldtadd	x2, xzr, [sp]
+# CHECK-NEXT: 	ldtadd	w0, wzr, [x2]
+# CHECK-NEXT: 	ldtadd	w2, wzr, [sp]
+# CHECK-NEXT: 	ldtadd	x0, xzr, [x2]
+# CHECK-NEXT: 	ldtadd	x2, xzr, [sp]
+# CHECK-NEXT: 	ldtclr	w0, wzr, [x2]
+# CHECK-NEXT: 	ldtclr	w2, wzr, [sp]
+# CHECK-NEXT: 	ldtclr	x0, xzr, [x2]
+# CHECK-NEXT: 	ldtclr	x2, xzr, [sp]
+# CHECK-NEXT: 	ldtclr	w0, wzr, [x2]
+# CHECK-NEXT: 	ldtclr	w2, wzr, [sp]
+# CHECK-NEXT: 	ldtclr	x0, xzr, [x2]
+# CHECK-NEXT: 	ldtclr	x2, xzr, [sp]
+# CHECK-NEXT: 	ldtclr	w0, wzr, [x2]
+# CHECK-NEXT: 	ldtclr	w2, wzr, [sp]
+# CHECK-NEXT: 	ldtclr	x0, xzr, [x2]
+# CHECK-NEXT: 	ldtclr	x2, xzr, [sp]
+# CHECK-NEXT: 	ldtclr	w0, wzr, [x2]
+# CHECK-NEXT: 	ldtclr	x2, xzr, [sp]
+# CHECK-NEXT: 	ldtclr	x0, xzr, [x2]
+# CHECK-NEXT: 	ldtclr	x2, xzr, [sp]
+# CHECK-NEXT: 	ldtset	w0, wzr, [x2]
+# CHECK-NEXT: 	ldtset	w2, wzr, [sp]
+# CHECK-NEXT: 	ldtset	x0, xzr, [x2]
+# CHECK-NEXT: 	ldtset	x2, xzr, [sp]
+# CHECK-NEXT: 	ldtset	w0, wzr, [x2]
+# CHECK-NEXT: 	ldtset	w2, wzr, [sp]
+# CHECK-NEXT: 	ldtset	x0, xzr, [x2]
+# CHECK-NEXT: 	ldtset	x2, xzr, [sp]
+# CHECK-NEXT: 	ldtset	w0, wzr, [x2]
+# CHECK-NEXT: 	ldtset	w2, wzr, [sp]
+# CHECK-NEXT: 	ldtset	x0, xzr, [x2]
+# CHECK-NEXT: 	ldtset	x2, xzr, [sp]
+# CHECK-NEXT: 	ldtset	w0, wzr, [x2]
+# CHECK-NEXT: 	ldtset	x2, xzr, [sp]
+# CHECK-NEXT: 	ldtset	x0, xzr, [x2]
+# CHECK-NEXT: 	ldtset	x2, xzr, [sp]

diff  --git a/llvm/test/MC/Disassembler/AArch64/armv9.6a-occmo.txt b/llvm/test/MC/Disassembler/AArch64/armv9.6a-occmo.txt
new file mode 100644
index 00000000000000..ccc65e747bc0b4
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AArch64/armv9.6a-occmo.txt
@@ -0,0 +1,12 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+occmo -mattr=+mte -disassemble < %s | FileCheck %s
+[0x0c,0x7f,0x0b,0xd5]
+[0xe0,0x7f,0x0b,0xd5]
+[0x0d,0x7b,0x0b,0xd5]
+[0xe1,0x7b,0x0b,0xd5]
+
+# CHECK:      	.text
+# CHECK-NEXT: 	dc	civaoc, x12
+# CHECK-NEXT: 	dc	cigdvaoc, x0
+# CHECK-NEXT: 	dc	cvaoc, x13
+# CHECK-NEXT: 	dc	cgdvaoc, x1

diff  --git a/llvm/test/MC/Disassembler/AArch64/armv9.6a-pcdphint.txt b/llvm/test/MC/Disassembler/AArch64/armv9.6a-pcdphint.txt
new file mode 100644
index 00000000000000..0c73b2248849d5
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AArch64/armv9.6a-pcdphint.txt
@@ -0,0 +1,9 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mc -triple aarch64 -disassemble -mattr=+pcdphint %s | FileCheck %s
+
+[0x1f,0x96,0x01,0xd5]
+[0x3f,0x96,0x01,0xd5]
+
+# CHECK:      	.text
+# CHECK-NEXT: 	stshh	keep
+# CHECK-NEXT: 	stshh	strm

diff  --git a/llvm/test/MC/Disassembler/AArch64/armv9.6a-rme-gpc3.txt b/llvm/test/MC/Disassembler/AArch64/armv9.6a-rme-gpc3.txt
new file mode 100644
index 00000000000000..a641731b2f1b2a
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AArch64/armv9.6a-rme-gpc3.txt
@@ -0,0 +1,11 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mc -triple aarch64 -disassemble %s  | FileCheck %s
+
+[0x1f,0x70,0x0e,0xd5]
+[0xa3,0x21,0x3e,0xd5]
+[0xa4,0x21,0x1e,0xd5]
+
+# CHECK:      	.text
+# CHECK-NEXT: 	sys	#6, c7, c0, #0
+# CHECK-NEXT: 	mrs	x3, GPCBW_EL3
+# CHECK-NEXT: 	msr	GPCBW_EL3, x4

diff  --git a/llvm/test/MC/Disassembler/AArch64/armv9.6a-srmask.txt b/llvm/test/MC/Disassembler/AArch64/armv9.6a-srmask.txt
new file mode 100644
index 00000000000000..1401af18aad5d3
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AArch64/armv9.6a-srmask.txt
@@ -0,0 +1,102 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mc -triple aarch64 -disassemble < %s 2> %t | FileCheck %s
+
+[0x03,0x14,0x38,0xd5]
+[0x03,0x14,0x3c,0xd5]
+[0x03,0x14,0x3d,0xd5]
+[0x43,0x14,0x38,0xd5]
+[0x43,0x14,0x3c,0xd5]
+[0x43,0x14,0x3d,0xd5]
+[0x63,0x14,0x38,0xd5]
+[0x63,0x14,0x3c,0xd5]
+[0x63,0x14,0x3d,0xd5]
+[0x83,0x14,0x38,0xd5]
+[0xc3,0x14,0x38,0xd5]
+[0xe3,0x14,0x38,0xd5]
+[0x43,0x27,0x38,0xd5]
+[0x43,0x27,0x3c,0xd5]
+[0x43,0x27,0x3d,0xd5]
+[0x63,0x27,0x38,0xd5]
+[0x63,0x27,0x3c,0xd5]
+[0x63,0x27,0x3d,0xd5]
+[0xc3,0x27,0x38,0xd5]
+[0xe3,0x27,0x38,0xd5]
+[0x23,0x14,0x38,0xd5]
+[0x23,0x14,0x3c,0xd5]
+[0x23,0x14,0x3d,0xd5]
+[0xa3,0x14,0x38,0xd5]
+
+[0x03,0x14,0x18,0xd5]
+[0x03,0x14,0x1c,0xd5]
+[0x03,0x14,0x1d,0xd5]
+[0x43,0x14,0x18,0xd5]
+[0x43,0x14,0x1c,0xd5]
+[0x43,0x14,0x1d,0xd5]
+[0x63,0x14,0x18,0xd5]
+[0x63,0x14,0x1c,0xd5]
+[0x63,0x14,0x1d,0xd5]
+[0x83,0x14,0x18,0xd5]
+[0xc3,0x14,0x18,0xd5]
+[0xe3,0x14,0x18,0xd5]
+[0x43,0x27,0x18,0xd5]
+[0x43,0x27,0x1c,0xd5]
+[0x43,0x27,0x1d,0xd5]
+[0x63,0x27,0x18,0xd5]
+[0x63,0x27,0x1c,0xd5]
+[0x63,0x27,0x1d,0xd5]
+[0xc3,0x27,0x18,0xd5]
+[0xe3,0x27,0x18,0xd5]
+[0x23,0x14,0x18,0xd5]
+[0x23,0x14,0x1c,0xd5]
+[0x23,0x14,0x1d,0xd5]
+[0xa3,0x14,0x18,0xd5]
+
+# CHECK:      	.text
+# CHECK-NEXT: 	mrs	x3, SCTLRMASK_EL1
+# CHECK-NEXT: 	mrs	x3, SCTLRMASK_EL2
+# CHECK-NEXT: 	mrs	x3, SCTLRMASK_EL12
+# CHECK-NEXT: 	mrs	x3, CPACRMASK_EL1
+# CHECK-NEXT: 	mrs	x3, CPTRMASK_EL2
+# CHECK-NEXT: 	mrs	x3, CPACRMASK_EL12
+# CHECK-NEXT: 	mrs	x3, SCTLR2MASK_EL1
+# CHECK-NEXT: 	mrs	x3, SCTLR2MASK_EL2
+# CHECK-NEXT: 	mrs	x3, SCTLR2MASK_EL12
+# CHECK-NEXT: 	mrs	x3, CPACRALIAS_EL1
+# CHECK-NEXT: 	mrs	x3, SCTLRALIAS_EL1
+# CHECK-NEXT: 	mrs	x3, SCTLR2ALIAS_EL1
+# CHECK-NEXT: 	mrs	x3, TCRMASK_EL1
+# CHECK-NEXT: 	mrs	x3, TCRMASK_EL2
+# CHECK-NEXT: 	mrs	x3, TCRMASK_EL12
+# CHECK-NEXT: 	mrs	x3, TCR2MASK_EL1
+# CHECK-NEXT: 	mrs	x3, TCR2MASK_EL2
+# CHECK-NEXT: 	mrs	x3, TCR2MASK_EL12
+# CHECK-NEXT: 	mrs	x3, TCRALIAS_EL1
+# CHECK-NEXT: 	mrs	x3, TCR2ALIAS_EL1
+# CHECK-NEXT: 	mrs	x3, ACTLRMASK_EL1
+# CHECK-NEXT: 	mrs	x3, ACTLRMASK_EL2
+# CHECK-NEXT: 	mrs	x3, ACTLRMASK_EL12
+# CHECK-NEXT: 	mrs	x3, ACTLRALIAS_EL1
+# CHECK-NEXT: 	msr	SCTLRMASK_EL1, x3
+# CHECK-NEXT: 	msr	SCTLRMASK_EL2, x3
+# CHECK-NEXT: 	msr	SCTLRMASK_EL12, x3
+# CHECK-NEXT: 	msr	CPACRMASK_EL1, x3
+# CHECK-NEXT: 	msr	CPTRMASK_EL2, x3
+# CHECK-NEXT: 	msr	CPACRMASK_EL12, x3
+# CHECK-NEXT: 	msr	SCTLR2MASK_EL1, x3
+# CHECK-NEXT: 	msr	SCTLR2MASK_EL2, x3
+# CHECK-NEXT: 	msr	SCTLR2MASK_EL12, x3
+# CHECK-NEXT: 	msr	CPACRALIAS_EL1, x3
+# CHECK-NEXT: 	msr	SCTLRALIAS_EL1, x3
+# CHECK-NEXT: 	msr	SCTLR2ALIAS_EL1, x3
+# CHECK-NEXT: 	msr	TCRMASK_EL1, x3
+# CHECK-NEXT: 	msr	TCRMASK_EL2, x3
+# CHECK-NEXT: 	msr	TCRMASK_EL12, x3
+# CHECK-NEXT: 	msr	TCR2MASK_EL1, x3
+# CHECK-NEXT: 	msr	TCR2MASK_EL2, x3
+# CHECK-NEXT: 	msr	TCR2MASK_EL12, x3
+# CHECK-NEXT: 	msr	TCRALIAS_EL1, x3
+# CHECK-NEXT: 	msr	TCR2ALIAS_EL1, x3
+# CHECK-NEXT: 	msr	ACTLRMASK_EL1, x3
+# CHECK-NEXT: 	msr	ACTLRMASK_EL2, x3
+# CHECK-NEXT: 	msr	ACTLRMASK_EL12, x3
+# CHECK-NEXT: 	msr	ACTLRALIAS_EL1, x3

diff  --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 369e5346348014..572006166d0d8b 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1331,7 +1331,10 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
       AArch64::AEK_SVE_F16F32MM, AArch64::AEK_SVE_AES2,
       AArch64::AEK_SSVE_AES,     AArch64::AEK_F8F32MM,
       AArch64::AEK_F8F16MM,      AArch64::AEK_LSFE,
-      AArch64::AEK_FPRCVT,       AArch64::AEK_CMPBR};
+      AArch64::AEK_FPRCVT,       AArch64::AEK_CMPBR,
+      AArch64::AEK_LSUI,         AArch64::AEK_OCCMO,
+      AArch64::AEK_PCDPHINT,
+  };
 
   std::vector<StringRef> Features;
 
@@ -1431,6 +1434,9 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
   EXPECT_TRUE(llvm::is_contained(Features, "+lsfe"));
   EXPECT_TRUE(llvm::is_contained(Features, "+fprcvt"));
   EXPECT_TRUE(llvm::is_contained(Features, "+cmpbr"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+lsui"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+occmo"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+pcdphint"));
 
   // Assuming we listed every extension above, this should produce the same
   // result.
@@ -1582,7 +1588,11 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
       {"sme-f8f32", "nosme-f8f32", "+sme-f8f32", "-sme-f8f32"},
       {"lsfe", "nolsfe", "+lsfe", "-lsfe"},
       {"fprcvt", "nofprcvt", "+fprcvt", "-fprcvt"},
-      {"cmpbr", "nocmpbr", "+cmpbr", "-cmpbr"}};
+      {"cmpbr", "nocmpbr", "+cmpbr", "-cmpbr"},
+      {"lsui", "nolsui", "+lsui", "-lsui"},
+      {"occmo", "nooccmo", "+occmo", "-occmo"},
+      {"pcdphint", "nopcdphint", "+pcdphint", "-pcdphint"},
+  };
 
   for (unsigned i = 0; i < std::size(ArchExt); i++) {
     EXPECT_EQ(StringRef(ArchExt[i][2]),


        


More information about the llvm-commits mailing list