[llvm] [AArch64] Remove redundant FMOV for zero-extended i32/i16 loads to f64 (PR #146920)

Wed Jul 23 02:04:05 PDT 2025

https://github.com/Amichaxx updated https://github.com/llvm/llvm-project/pull/146920

>From b5757cb6c0525d9ff249475ff2164219cff6e0f5 Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Fri, 4 Jul 2025 10:31:38 +0000
Subject: [PATCH 1/3] [AArch64] Remove redundant FMOV for zero-extended i32/i16
 loads to f64

Previously, a load from i32 or i16, followed by zero-extension to i64 and a
bitcast to f64, would emit separate FMOV instructions.

This patch introduces new corresponding TableGen patterns to avoid the unnecessary FMOV.

Tests added:
  - load_u64_from_u32.ll
  - load_u64_from_u16.ll
---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td    | 10 +++++++++-
 llvm/test/CodeGen/AArch64/load_u64_from_u16.ll | 15 +++++++++++++++
 llvm/test/CodeGen/AArch64/load_u64_from_u32.ll | 14 ++++++++++++++
 3 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/load_u64_from_u16.ll
 create mode 100644 llvm/test/CodeGen/AArch64/load_u64_from_u32.ll

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index efe6cc1aa8aec..fb2c7c9f6adfe 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3913,6 +3913,14 @@ defm LDRSW  : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
 def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
       (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
 
+// load zero-extended i32, bitcast to f64
+def : Pat <(f64 (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+       (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
+
+// load zero-extended i16, bitcast to f64
+def : Pat <(f64 (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))),
+           (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+    
 // Pre-fetch.
 def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
                         [(AArch64Prefetch timm:$Rt,
@@ -10986,4 +10994,4 @@ defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
 include "AArch64InstrAtomics.td"
 include "AArch64SVEInstrInfo.td"
 include "AArch64SMEInstrInfo.td"
-include "AArch64InstrGISel.td"
+include "AArch64InstrGISel.td"
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AArch64/load_u64_from_u16.ll b/llvm/test/CodeGen/AArch64/load_u64_from_u16.ll
new file mode 100644
index 0000000000000..ee02895b55221
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/load_u64_from_u16.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+define double @_Z9load_u64_from_u16_testPj(ptr %n){
+; CHECK-LABEL: _Z9load_u64_from_u16_testPj:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = load i16, ptr %n, align 2
+  %conv = zext i16 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
diff --git a/llvm/test/CodeGen/AArch64/load_u64_from_u32.ll b/llvm/test/CodeGen/AArch64/load_u64_from_u32.ll
new file mode 100644
index 0000000000000..ad30981012112
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/load_u64_from_u32.ll
@@ -0,0 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+define double @_Z9load_u64_from_u32_testPj(ptr %n) {
+; CHECK-LABEL: _Z9load_u64_from_u32_testPj:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = load i32, ptr %n, align 4
+  %conv = zext i32 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}

>From fabb75dfc3b041cfdba1ad9792fa2fdf44857bec Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Mon, 7 Jul 2025 15:30:07 +0000
Subject: [PATCH 2/3] [AArch64] This patch removes redundant FMOV instructions
 for patterns where an integer is loaded from memory, zero-extended, and
 bitcast to a floating-point type. Patterns are added for i8/i16/i32 to f64
 and f32. A single test file (load-zext-bitcast.ll) is included for all cases,
 which combines the previously created test files.

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   | 14 +++-
 .../test/CodeGen/AArch64/load-zext-bitcast.ll | 82 +++++++++++++++++++
 .../test/CodeGen/AArch64/load_u64_from_u16.ll | 15 ----
 .../test/CodeGen/AArch64/load_u64_from_u32.ll | 14 ----
 4 files changed, 95 insertions(+), 30 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/load-zext-bitcast.ll
 delete mode 100644 llvm/test/CodeGen/AArch64/load_u64_from_u16.ll
 delete mode 100644 llvm/test/CodeGen/AArch64/load_u64_from_u32.ll

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index fb2c7c9f6adfe..87e151a04ceae 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3920,7 +3920,19 @@ def : Pat <(f64 (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s
 // load zero-extended i16, bitcast to f64
 def : Pat <(f64 (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))),
            (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
-    
+
+// load zero-extended i8, bitcast to f64
+def : Pat <(f64 (bitconvert (i64 (zextloadi8 (am_indexed32 GPR64sp:$Rn, uimm12s1:$offset))))),
+           (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+
+// load zero-extended i16, bitcast to f32
+def : Pat <(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+           (SUBREG_TO_REG (i32 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+
+// load zero-extended i8, bitcast to f32
+def : Pat <(f32 (bitconvert (i32 (zextloadi8 (am_indexed16 GPR64sp:$Rn, uimm12s1:$offset))))),
+           (SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+
 // Pre-fetch.
 def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
                         [(AArch64Prefetch timm:$Rt,
diff --git a/llvm/test/CodeGen/AArch64/load-zext-bitcast.ll b/llvm/test/CodeGen/AArch64/load-zext-bitcast.ll
new file mode 100644
index 0000000000000..1a83930dfafa2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/load-zext-bitcast.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+; load zero-extended i32, bitcast to f64
+define double @_Z9load_u64_from_u32_testPj(ptr %n){
+; CHECK-LABEL: _Z9load_u64_from_u32_testPj:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = load i32, ptr %n, align 4
+  %conv = zext i32 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+; load zero-extended i16, bitcast to f64
+define double @_Z9load_u64_from_u16_testPj(ptr %n){
+; CHECK-LABEL: _Z9load_u64_from_u16_testPj:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = load i16, ptr %n, align 2
+  %conv = zext i16 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+; load zero-extended i8, bitcast to f64
+define double @_Z16load_u64_from_u8Ph(ptr %n){
+; CHECK-LABEL: _Z16load_u64_from_u8Ph:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr b0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = load i8, ptr %n, align 1
+  %conv = zext i8 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
+; load zero-extended i16, bitcast to f32
+define float @_Z17load_u32_from_u16Pt(ptr %n){
+; CHECK-LABEL: _Z17load_u32_from_u16Pt:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = load i16, ptr %n, align 2
+  %conv = zext i16 %0 to i32
+  %1 = bitcast i32 %conv to float
+  ret float %1
+}
+
+; load zero-extended i8, bitcast to f32
+define float @_Z16load_u32_from_u8Ph(ptr %n){
+; CHECK-LABEL: _Z16load_u32_from_u8Ph:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr b0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = load i8, ptr %n, align 1
+  %conv = zext i8 %0 to i32
+  %1 = bitcast i32 %conv to float
+  ret float %1
+}
+
+; load zero-extended i8, bitcast to f16
+define half @_Z16load_u16_from_u8Ph(ptr %n){
+; CHECK-LABEL: _Z16load_u16_from_u8Ph:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr b0, [x0]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; CHECK-NEXT:    ret
+entry:
+  %0 = load i8, ptr %n, align 1
+  %conv = zext i8 %0 to i16
+  %1 = bitcast i16 %conv to half
+  ret half %1
+}
+
diff --git a/llvm/test/CodeGen/AArch64/load_u64_from_u16.ll b/llvm/test/CodeGen/AArch64/load_u64_from_u16.ll
deleted file mode 100644
index ee02895b55221..0000000000000
--- a/llvm/test/CodeGen/AArch64/load_u64_from_u16.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
-
-define double @_Z9load_u64_from_u16_testPj(ptr %n){
-; CHECK-LABEL: _Z9load_u64_from_u16_testPj:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldr h0, [x0]
-; CHECK-NEXT:    ret
-entry:
-  %0 = load i16, ptr %n, align 2
-  %conv = zext i16 %0 to i64
-  %1 = bitcast i64 %conv to double
-  ret double %1
-}
-
diff --git a/llvm/test/CodeGen/AArch64/load_u64_from_u32.ll b/llvm/test/CodeGen/AArch64/load_u64_from_u32.ll
deleted file mode 100644
index ad30981012112..0000000000000
--- a/llvm/test/CodeGen/AArch64/load_u64_from_u32.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
-
-define double @_Z9load_u64_from_u32_testPj(ptr %n) {
-; CHECK-LABEL: _Z9load_u64_from_u32_testPj:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldr s0, [x0]
-; CHECK-NEXT:    ret
-entry:
-  %0 = load i32, ptr %n, align 4
-  %conv = zext i32 %0 to i64
-  %1 = bitcast i64 %conv to double
-  ret double %1
-}

>From 0162869225b826bdac81bbdab19d8af7ea52e22f Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Wed, 23 Jul 2025 09:03:07 +0000
Subject: [PATCH 3/3] Restore AArch64InstrInfo.td to remove accidental editor
 change

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td | 88 +++++++++++----------
 1 file changed, 47 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 87e151a04ceae..0cb7b02d84a6e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -153,8 +153,8 @@ def HasSVEAES       : Predicate<"Subtarget->hasSVEAES()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSVEAES), "sve-aes">;
 def HasSVE2SM4       : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SM4()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">;
-def HasSVE2SHA3      : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SHA3()">,
-                                 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">;
+def HasSVESHA3      : Predicate<"Subtarget->hasSVESHA3()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureSVESHA3), "sve-sha3">;
 def HasSVEBitPerm   : Predicate<"Subtarget->hasSVEBitPerm()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSVEBitPerm), "sve-bitperm">;
 def HasSMEandIsNonStreamingSafe
@@ -248,43 +248,59 @@ def HasSVE_or_SME
     : Predicate<"Subtarget->isSVEorStreamingSVEAvailable()">,
                 AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
                 "sve or sme">;
+def HasNonStreamingSVE_or_SME2p1
+    : Predicate<"Subtarget->isSVEAvailable() ||"
+                "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p1())">,
+                AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME2p1),
+                "sve or sme2p1">;
 def HasNonStreamingSVE_or_SME2p2
-    : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE()) ||"
+    : Predicate<"Subtarget->isSVEAvailable() ||"
                 "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">,
                 AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME2p2),
                 "sve or sme2p2">;
+def HasNonStreamingSVE_or_SSVE_AES
+    : Predicate<"Subtarget->isSVEAvailable() ||"
+                "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_AES())">,
+                AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSSVE_AES),
+                "sve or ssve-aes">;
+def HasNonStreamingSVE_or_SSVE_BitPerm
+    : Predicate<"Subtarget->isSVEAvailable() ||"
+                "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_BitPerm())">,
+                AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSSVE_BitPerm),
+                "sve or ssve-bitperm">;
 def HasNonStreamingSVE_or_SSVE_FEXPA
-    : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE()) ||"
+    : Predicate<"Subtarget->isSVEAvailable() ||"
                 "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_FEXPA())">,
                 AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSSVE_FEXPA),
                 "sve or ssve-fexpa">;
 
 def HasSVE2_or_SME
-    : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
+    : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2() || Subtarget->hasSME())">,
                 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
                 "sve2 or sme">;
-def HasSVE2_or_SME2
-    : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
+def HasNonStreamingSVE2_or_SME2
+    : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2()) ||"
+                "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2())">,
                 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2),
                 "sve2 or sme2">;
-def HasNonStreamingSVE2_or_SSVE_AES
-    : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2()) ||"
-                "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_AES())">,
-                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSSVE_AES),
-                "sve2 or ssve-aes">;
 
 def HasSVE2p1_or_SME
-    : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
+    : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p1() || Subtarget->hasSME())">,
                 AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1),
                 "sme or sve2p1">;
 def HasSVE2p1_or_SME2
-    : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
+    : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p1() || Subtarget->hasSME2())">,
                 AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1),
                 "sme2 or sve2p1">;
 def HasSVE2p1_or_SME2p1
-    : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2p1())">,
+    : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p1() || Subtarget->hasSME2p1())">,
                 AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1),
                 "sme2p1 or sve2p1">;
+def HasSVE2p1_or_StreamingSME2
+    : Predicate<"(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVE2p1()) ||"
+                "(Subtarget->isStreaming() && Subtarget->hasSME2())">,
+                AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1),
+                "sme2 or sve2p1">;
 
 def HasSVE2p2_or_SME2p2
     : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p2() || Subtarget->hasSME2p2())">,
@@ -300,11 +316,6 @@ def HasSMEF16F16_or_SMEF8F16
     : Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">,
                 AssemblerPredicateWithAll<(any_of FeatureSMEF16F16, FeatureSMEF8F16),
                 "sme-f16f16 or sme-f8f16">;
-def HasNonStreamingSVE2_or_SSVE_BitPerm
-    : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2()) ||"
-                "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_BitPerm())">,
-                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSSVE_BitPerm),
-                "sve2 or ssve-bitperm">;
 
 // A subset of NEON instructions are legal in Streaming SVE execution mode,
 // so don't need the additional check for 'isNeonAvailable'.
@@ -3913,26 +3924,6 @@ defm LDRSW  : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
 def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
       (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
 
-// load zero-extended i32, bitcast to f64
-def : Pat <(f64 (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
-       (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
-
-// load zero-extended i16, bitcast to f64
-def : Pat <(f64 (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))),
-           (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
-
-// load zero-extended i8, bitcast to f64
-def : Pat <(f64 (bitconvert (i64 (zextloadi8 (am_indexed32 GPR64sp:$Rn, uimm12s1:$offset))))),
-           (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
-
-// load zero-extended i16, bitcast to f32
-def : Pat <(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
-           (SUBREG_TO_REG (i32 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
-
-// load zero-extended i8, bitcast to f32
-def : Pat <(f32 (bitconvert (i32 (zextloadi8 (am_indexed16 GPR64sp:$Rn, uimm12s1:$offset))))),
-           (SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
-
 // Pre-fetch.
 def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
                         [(AArch64Prefetch timm:$Rt,
@@ -10761,6 +10752,21 @@ let Predicates = [HasCPA] in {
   // Scalar multiply-add/subtract
   def MADDPT : MulAccumCPA<0, "maddpt">;
   def MSUBPT : MulAccumCPA<1, "msubpt">;
+
+  def : Pat<(ptradd GPR64sp:$Rn, GPR64sp:$Rm),
+            (ADDPT_shift GPR64sp:$Rn, GPR64sp:$Rm, (i64 0))>;
+  def : Pat<(ptradd GPR64sp:$Rn, (shl GPR64sp:$Rm, (i64 imm0_7:$imm))),
+            (ADDPT_shift GPR64sp:$Rn, GPR64sp:$Rm,
+                         (i64 imm0_7:$imm))>;
+  def : Pat<(ptradd GPR64sp:$Rn, (ineg GPR64sp:$Rm)),
+            (SUBPT_shift GPR64sp:$Rn, GPR64sp:$Rm, (i64 0))>;
+  def : Pat<(ptradd GPR64sp:$Rn, (ineg (shl GPR64sp:$Rm, (i64 imm0_7:$imm)))),
+            (SUBPT_shift GPR64sp:$Rn, GPR64sp:$Rm,
+                         (i64 imm0_7:$imm))>;
+  def : Pat<(ptradd GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)),
+            (MADDPT GPR64:$Rn, GPR64:$Rm, GPR64:$Ra)>;
+  def : Pat<(ptradd GPR64:$Ra, (mul GPR64:$Rn, (ineg GPR64:$Rm))),
+            (MSUBPT GPR64:$Rn, GPR64:$Rm, GPR64:$Ra)>;
 }
 
 def round_v4fp32_to_v4bf16 :
@@ -11006,4 +11012,4 @@ defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
 include "AArch64InstrAtomics.td"
 include "AArch64SVEInstrInfo.td"
 include "AArch64SMEInstrInfo.td"
-include "AArch64InstrGISel.td"
\ No newline at end of file
+include "AArch64InstrGISel.td"