[llvm] Adding support in llvm-exegesis for Aarch64 for handling FPR64/128, PPR16 and ZPR128 reg class. (PR #127564)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 5 23:50:31 PST 2025


https://github.com/lakshayk-nv updated https://github.com/llvm/llvm-project/pull/127564

>From 624a7eeeb08df37b2cb7349dc3ce9bfa198d2abe Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Mon, 17 Feb 2025 02:25:29 -0800
Subject: [PATCH 01/18] Adding support for FPR64/128, PPR16 and ZPR128 in
 setReg of llvm-exegesis for Aarch64

---
 .../llvm-exegesis/lib/AArch64/Target.cpp      | 57 ++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 5a7cc6f5e30d3..806565a26f19b 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -35,6 +35,48 @@ static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth,
       .addImm(Value.getZExtValue());
 }
 
+static MCInst loadZPRImmediate(MCRegister Reg, unsigned RegBitWidth,
+                               const APInt &Value) {
+  if (Value.getBitWidth() > RegBitWidth)
+    llvm_unreachable("Value must fit in the ZPR Register");
+  // For ZPR, we typically use DUPM instruction to load immediate values
+  return MCInstBuilder(AArch64::DUPM_ZI)
+      .addReg(Reg)
+      .addImm(Value.getZExtValue());
+}
+
+static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
+                               const APInt &Value) {
+  if (Value.getBitWidth() > RegBitWidth)
+    llvm_unreachable("Value must fit in the PPR Register");
+  // For PPR, we typically use PTRUE instruction to set predicate registers
+  return MCInstBuilder(AArch64::PTRUE_B)
+      .addReg(Reg)
+      .addImm(31); // All lanes true
+}
+
+// Generates instruction to load an FP immediate value into a register.
+static unsigned getLoadFPImmediateOpcode(unsigned RegBitWidth) {
+  switch (RegBitWidth) {
+  case 64:
+    return AArch64::FMOVDi; 
+  case 128:
+    return AArch64::MOVIv2d_ns;
+  }
+  llvm_unreachable("Invalid Value Width");
+}
+
+
+// Generates instruction to load an FP immediate value into a register.
+static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth,
+                            const APInt &Value) {
+  if (Value.getBitWidth() > RegBitWidth)
+    llvm_unreachable("Value must fit in the FP Register");
+  return MCInstBuilder(getLoadFPImmediateOpcode(RegBitWidth))
+      .addReg(Reg)
+      .addImm(Value.getZExtValue());
+}
+
 #include "AArch64GenExegesis.inc"
 
 namespace {
@@ -49,8 +91,21 @@ class ExegesisAArch64Target : public ExegesisTarget {
                                const APInt &Value) const override {
     if (AArch64::GPR32RegClass.contains(Reg))
       return {loadImmediate(Reg, 32, Value)};
+    
     if (AArch64::GPR64RegClass.contains(Reg))
       return {loadImmediate(Reg, 64, Value)};
+
+    if (AArch64::PPRRegClass.contains(Reg))
+      return {loadPPRImmediate(Reg, 16, Value)}; 
+
+    if (AArch64::FPR64RegClass.contains(Reg)) 
+      return {loadFPImmediate(Reg, 64, Value)};
+    if (AArch64::FPR128RegClass.contains(Reg)) 
+      return {loadFPImmediate(Reg, 128, Value)};
+
+    if (AArch64::ZPRRegClass.contains(Reg)) 
+      return {loadZPRImmediate(Reg, 128, Value)};
+    
     errs() << "setRegTo is not implemented, results will be unreliable\n";
     return {};
   }
@@ -77,4 +132,4 @@ void InitializeAArch64ExegesisTarget() {
 }
 
 } // namespace exegesis
-} // namespace llvm
+} // namespace llvm
\ No newline at end of file

>From 4c4d605ba2aa3a4b1f510bac246d7ff75eb2993a Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Mon, 17 Feb 2025 19:51:16 -0800
Subject: [PATCH 02/18] Adding support for FPR64/128, PPR16 and ZPR128 in
 setReg of llvm-exegesis for Aarch64

---
 llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 806565a26f19b..dc312f4916703 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -91,7 +91,6 @@ class ExegesisAArch64Target : public ExegesisTarget {
                                const APInt &Value) const override {
     if (AArch64::GPR32RegClass.contains(Reg))
       return {loadImmediate(Reg, 32, Value)};
-    
     if (AArch64::GPR64RegClass.contains(Reg))
       return {loadImmediate(Reg, 64, Value)};
 

>From d34cb6d157ccc8b1abf55c71494144a42b5bb546 Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Thu, 20 Feb 2025 02:27:00 -0800
Subject: [PATCH 03/18] Added assert that Value is in range for the generated
 instructions and testcases for patch FPR64/128, PPR, ZPR128

---
 .../AArch64/reg_based/reg_fpr128.s             |  8 ++++++++
 .../AArch64/reg_based/reg_fpr64.s              |  7 +++++++
 .../llvm-exegesis/AArch64/reg_based/reg_ppr.s  |  8 ++++++++
 .../llvm-exegesis/AArch64/reg_based/reg_zpr.s  |  8 ++++++++
 .../tools/llvm-exegesis/lib/AArch64/Target.cpp | 18 +++++++-----------
 5 files changed, 38 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s
 create mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s
 create mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s
 create mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s

diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s
new file mode 100644
index 0000000000000..210ea563b85f5
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s
@@ -0,0 +1,8 @@
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv16i8v | FileCheck %s
+# REQUIRES: aarch64-registered-target
+
+# Check that warning of not initializing registers is not printed
+# CHECK-NOT: setRegTo is not implemented, results will be unreliable
+
+# Check that we add ret (bx lr) instr to snippet
+# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s
new file mode 100644
index 0000000000000..0a08dcc2b7715
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s
@@ -0,0 +1,7 @@
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv4i16v 2>&1 | FileCheck %s
+
+# Check that warning of not initializing registers is not printed
+# CHECK-NOT: setRegTo is not implemented, results will be unreliable
+
+# Check that we add ret (bx lr) instr to snippet
+# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s
new file mode 100644
index 0000000000000..79ed1d3aaeb84
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s
@@ -0,0 +1,8 @@
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_D | FileCheck %s
+# REQUIRES: aarch64-registered-target
+
+# Check that warning of not initializing registers is not printed
+# CHECK-NOT: setRegTo is not implemented, results will be unreliable
+
+# Check that we add ret (bx lr) instr to snippet
+# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s
new file mode 100644
index 0000000000000..e0d308f16c0f9
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s
@@ -0,0 +1,8 @@
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_S | FileCheck %s
+# REQUIRES: aarch64-registered-target
+
+# Check that warning of not initializing registers is not printed
+# CHECK-NOT: setRegTo is not implemented, results will be unreliable
+
+# Check that we add ret (bx lr) instr to snippet
+# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index dc312f4916703..3b0265cd22ffc 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -28,8 +28,7 @@ static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) {
 // Generates instruction to load an immediate value into a register.
 static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth,
                             const APInt &Value) {
-  if (Value.getBitWidth() > RegBitWidth)
-    llvm_unreachable("Value must fit in the Register");
+  assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the Register"); 
   return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth))
       .addReg(Reg)
       .addImm(Value.getZExtValue());
@@ -37,22 +36,20 @@ static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth,
 
 static MCInst loadZPRImmediate(MCRegister Reg, unsigned RegBitWidth,
                                const APInt &Value) {
-  if (Value.getBitWidth() > RegBitWidth)
-    llvm_unreachable("Value must fit in the ZPR Register");
+  assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the PPR Register");
   // For ZPR, we typically use DUPM instruction to load immediate values
   return MCInstBuilder(AArch64::DUPM_ZI)
       .addReg(Reg)
-      .addImm(Value.getZExtValue());
+      .addImm(0x1);
 }
 
 static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
                                const APInt &Value) {
-  if (Value.getBitWidth() > RegBitWidth)
-    llvm_unreachable("Value must fit in the PPR Register");
+  assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the PPR Register"); 
   // For PPR, we typically use PTRUE instruction to set predicate registers
   return MCInstBuilder(AArch64::PTRUE_B)
       .addReg(Reg)
-      .addImm(31); // All lanes true
+      .addImm(0xFFFF); // All lanes true for 16 bits
 }
 
 // Generates instruction to load an FP immediate value into a register.
@@ -70,8 +67,7 @@ static unsigned getLoadFPImmediateOpcode(unsigned RegBitWidth) {
 // Generates instruction to load an FP immediate value into a register.
 static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth,
                             const APInt &Value) {
-  if (Value.getBitWidth() > RegBitWidth)
-    llvm_unreachable("Value must fit in the FP Register");
+  assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the FP Register");
   return MCInstBuilder(getLoadFPImmediateOpcode(RegBitWidth))
       .addReg(Reg)
       .addImm(Value.getZExtValue());
@@ -131,4 +127,4 @@ void InitializeAArch64ExegesisTarget() {
 }
 
 } // namespace exegesis
-} // namespace llvm
\ No newline at end of file
+} // namespace llvm

>From 53b9f0b4f982bce63bece92cf16c169fd160aa24 Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Thu, 20 Feb 2025 21:34:55 -0800
Subject: [PATCH 04/18] Added combined testfile for register initialization
 (PPR,ZPR,FPR64/128); And ZPR imm initialized with Value not constant

---
 .../llvm-exegesis/AArch64/setReg_init_check.s | 24 +++++++++++++++++++
 .../llvm-exegesis/lib/AArch64/Target.cpp      |  5 +---
 2 files changed, 25 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s

diff --git a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
new file mode 100644
index 0000000000000..02ab79fe69264
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
@@ -0,0 +1,24 @@
+# ppr register class initialization testcase 
+# ideally we should use PTRUE_{B/H?S/D} instead of FADDV_VPZ_D for isolated testcase; but exegesis does not support PTRUE_{B/H?S/D} yet;
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR
+# REQUIRES: aarch64-registered-target
+# PPR-NOT: setRegTo is not implemented, results will be unreliable
+# PPR: assembled_snippet: {{.*}}C0035FD6
+
+# zpr register class initialization testcase 
+# ideally we should use DUPM_ZI instead of FADDV_VPZ_S for isolated testcase; but exegesis does not support DUPM_ZI yet;
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_S 2>&1 | FileCheck %s --check-prefix=ZPR
+# REQUIRES: aarch64-registered-target
+# ZPR-NOT: setRegTo is not implemented, results will be unreliable
+# ZPR: assembled_snippet: {{.*}}C0035FD6
+
+# fpr64 register class initialization testcase
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv4i16v 2>&1 | FileCheck %s --check-prefix=FPR64
+# FPR64-NOT: setRegTo is not implemented, results will be unreliable
+# FPR64: assembled_snippet: {{.*}}C0035FD6
+
+# fpr128 register class initialization testcase
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128
+# REQUIRES: aarch64-registered-target
+# FPR128-NOT: setRegTo is not implemented, results will be unreliable
+# FPR128: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 3b0265cd22ffc..76afb8b57ef56 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -40,7 +40,7 @@ static MCInst loadZPRImmediate(MCRegister Reg, unsigned RegBitWidth,
   // For ZPR, we typically use DUPM instruction to load immediate values
   return MCInstBuilder(AArch64::DUPM_ZI)
       .addReg(Reg)
-      .addImm(0x1);
+      .addImm(Value.getZExtValue());
 }
 
 static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
@@ -89,15 +89,12 @@ class ExegesisAArch64Target : public ExegesisTarget {
       return {loadImmediate(Reg, 32, Value)};
     if (AArch64::GPR64RegClass.contains(Reg))
       return {loadImmediate(Reg, 64, Value)};
-
     if (AArch64::PPRRegClass.contains(Reg))
       return {loadPPRImmediate(Reg, 16, Value)}; 
-
     if (AArch64::FPR64RegClass.contains(Reg)) 
       return {loadFPImmediate(Reg, 64, Value)};
     if (AArch64::FPR128RegClass.contains(Reg)) 
       return {loadFPImmediate(Reg, 128, Value)};
-
     if (AArch64::ZPRRegClass.contains(Reg)) 
       return {loadZPRImmediate(Reg, 128, Value)};
     

>From caebb7b7043cee73c33e1167b449cd4e9acf539e Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Thu, 20 Feb 2025 21:40:27 -0800
Subject: [PATCH 05/18] Deleted Indvidual testfiles

---
 .../tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s    | 8 --------
 .../tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s     | 7 -------
 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s | 8 --------
 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s | 8 --------
 4 files changed, 31 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s
 delete mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s
 delete mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s
 delete mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s

diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s
deleted file mode 100644
index 210ea563b85f5..0000000000000
--- a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s
+++ /dev/null
@@ -1,8 +0,0 @@
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv16i8v | FileCheck %s
-# REQUIRES: aarch64-registered-target
-
-# Check that warning of not initializing registers is not printed
-# CHECK-NOT: setRegTo is not implemented, results will be unreliable
-
-# Check that we add ret (bx lr) instr to snippet
-# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s
deleted file mode 100644
index 0a08dcc2b7715..0000000000000
--- a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s
+++ /dev/null
@@ -1,7 +0,0 @@
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv4i16v 2>&1 | FileCheck %s
-
-# Check that warning of not initializing registers is not printed
-# CHECK-NOT: setRegTo is not implemented, results will be unreliable
-
-# Check that we add ret (bx lr) instr to snippet
-# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s
deleted file mode 100644
index 79ed1d3aaeb84..0000000000000
--- a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s
+++ /dev/null
@@ -1,8 +0,0 @@
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_D | FileCheck %s
-# REQUIRES: aarch64-registered-target
-
-# Check that warning of not initializing registers is not printed
-# CHECK-NOT: setRegTo is not implemented, results will be unreliable
-
-# Check that we add ret (bx lr) instr to snippet
-# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s
deleted file mode 100644
index e0d308f16c0f9..0000000000000
--- a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s
+++ /dev/null
@@ -1,8 +0,0 @@
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_S | FileCheck %s
-# REQUIRES: aarch64-registered-target
-
-# Check that warning of not initializing registers is not printed
-# CHECK-NOT: setRegTo is not implemented, results will be unreliable
-
-# Check that we add ret (bx lr) instr to snippet
-# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file

>From 230aade9955299a8bf4e7327156ab6c9d950389e Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Sun, 23 Feb 2025 20:50:00 -0800
Subject: [PATCH 06/18] Modified: requirement(aarch64) check only required once
 for a test

---
 llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
index 02ab79fe69264..c1f6bcb7719e8 100644
--- a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
+++ b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
@@ -1,14 +1,14 @@
+# REQUIRES: aarch64-registered-target
+
 # ppr register class initialization testcase 
 # ideally we should use PTRUE_{B/H?S/D} instead of FADDV_VPZ_D for isolated testcase; but exegesis does not support PTRUE_{B/H?S/D} yet;
 # RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR
-# REQUIRES: aarch64-registered-target
 # PPR-NOT: setRegTo is not implemented, results will be unreliable
 # PPR: assembled_snippet: {{.*}}C0035FD6
 
 # zpr register class initialization testcase 
 # ideally we should use DUPM_ZI instead of FADDV_VPZ_S for isolated testcase; but exegesis does not support DUPM_ZI yet;
 # RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_S 2>&1 | FileCheck %s --check-prefix=ZPR
-# REQUIRES: aarch64-registered-target
 # ZPR-NOT: setRegTo is not implemented, results will be unreliable
 # ZPR: assembled_snippet: {{.*}}C0035FD6
 
@@ -19,6 +19,5 @@
 
 # fpr128 register class initialization testcase
 # RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128
-# REQUIRES: aarch64-registered-target
 # FPR128-NOT: setRegTo is not implemented, results will be unreliable
 # FPR128: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file

>From aab854bcc638190e71e480322ddf0692acdf6783 Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Sun, 23 Feb 2025 20:52:11 -0800
Subject: [PATCH 07/18] Modified: PPR register class should be set with
 immediate value 31 for all lanes true

---
 llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 76afb8b57ef56..4c95945fa63ba 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -49,7 +49,7 @@ static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
   // For PPR, we typically use PTRUE instruction to set predicate registers
   return MCInstBuilder(AArch64::PTRUE_B)
       .addReg(Reg)
-      .addImm(0xFFFF); // All lanes true for 16 bits
+      .addImm(31); // All lanes true for 16 bits
 }
 
 // Generates instruction to load an FP immediate value into a register.

>From f1e561ced0ad7c471977e22b131358e1d1708c6f Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Wed, 26 Feb 2025 19:54:02 -0800
Subject: [PATCH 08/18] Modified: Testcases to check disassembly, apart from
 setReg warning and return in assembly snippet

---
 .../llvm-exegesis/AArch64/setReg_init_check.s  | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
index c1f6bcb7719e8..ce70a770741c0 100644
--- a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
+++ b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
@@ -2,22 +2,30 @@
 
 # ppr register class initialization testcase 
 # ideally we should use PTRUE_{B/H?S/D} instead of FADDV_VPZ_D for isolated testcase; but exegesis does not support PTRUE_{B/H?S/D} yet;
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=FADDV_VPZ_D.o --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR
+# RUN: llvm-objdump -d FADDV_VPZ_D.o | FileCheck %s --check-prefix=PPR_ASM
 # PPR-NOT: setRegTo is not implemented, results will be unreliable
 # PPR: assembled_snippet: {{.*}}C0035FD6
+# PPR_ASM: {{0|4}}:	{{.*}} ptrue p{{[0-9]|1[0-5]}}
 
 # zpr register class initialization testcase 
 # ideally we should use DUPM_ZI instead of FADDV_VPZ_S for isolated testcase; but exegesis does not support DUPM_ZI yet;
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_S 2>&1 | FileCheck %s --check-prefix=ZPR
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=FADDV_VPZ_S.o --opcode-name=FADDV_VPZ_S 2>&1 | FileCheck %s --check-prefix=ZPR
+# RUN: llvm-objdump -d FADDV_VPZ_S.o | FileCheck %s --check-prefix=ZPR_ASM
 # ZPR-NOT: setRegTo is not implemented, results will be unreliable
 # ZPR: assembled_snippet: {{.*}}C0035FD6
+# ZPR_ASM: {{4|8}}: {{.*}} dupm z{{[0-9]|[1-2][0-9]|3[0-1]}}.s, #0x1
 
 # fpr64 register class initialization testcase
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv4i16v 2>&1 | FileCheck %s --check-prefix=FPR64
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=ADDVv4i16v.o --opcode-name=ADDVv4i16v 2>&1 | FileCheck %s --check-prefix=FPR64
+# RUN: llvm-objdump -d ADDVv4i16v.o | FileCheck %s --check-prefix=FPR64-ASM
 # FPR64-NOT: setRegTo is not implemented, results will be unreliable
 # FPR64: assembled_snippet: {{.*}}C0035FD6
+# FPR64-ASM: {{0|4}}:	{{.*}} fmov d{{[0-9]|[1-2][0-9]|3[0-1]}}, #2.0{{.*}}
 
 # fpr128 register class initialization testcase
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=ADDVv16i8v.o --opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128
+# RUN: llvm-objdump -d ADDVv16i8v.o | FileCheck %s --check-prefix=FPR128-ASM
 # FPR128-NOT: setRegTo is not implemented, results will be unreliable
-# FPR128: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
+# FPR128: assembled_snippet: {{.*}}C0035FD6
+# FPR128-ASM: {{0|4}}:	{{.*}} movi v{{[0-9]|[1-2][0-9]|3[0-1]}}.2d, {{#0x0|#0000000000000000}}

>From 25b02b6b4ac5eda652a0e39d49d6666b0e2d3380 Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Wed, 26 Feb 2025 19:58:00 -0800
Subject: [PATCH 09/18] Modified: Fixed Typo in assertion message

---
 llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 4c95945fa63ba..dbca08603fd78 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -36,7 +36,7 @@ static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth,
 
 static MCInst loadZPRImmediate(MCRegister Reg, unsigned RegBitWidth,
                                const APInt &Value) {
-  assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the PPR Register");
+  assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the ZPR Register");
   // For ZPR, we typically use DUPM instruction to load immediate values
   return MCInstBuilder(AArch64::DUPM_ZI)
       .addReg(Reg)

>From b83b52de93bd000a7328222e888ff6eb2c5d0ccd Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Thu, 27 Feb 2025 00:42:02 -0800
Subject: [PATCH 10/18] Modified: Simplified regex checks for disassembly

---
 .../llvm-exegesis/AArch64/setReg_init_check.s | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
index ce70a770741c0..195365ad62f14 100644
--- a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
+++ b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
@@ -2,30 +2,30 @@
 
 # ppr register class initialization testcase 
 # ideally we should use PTRUE_{B/H?S/D} instead of FADDV_VPZ_D for isolated testcase; but exegesis does not support PTRUE_{B/H?S/D} yet;
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=FADDV_VPZ_D.o --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR
-# RUN: llvm-objdump -d FADDV_VPZ_D.o | FileCheck %s --check-prefix=PPR_ASM
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR
+# RUN: llvm-objdump -d %d | FileCheck %s --check-prefix=PPR_ASM
 # PPR-NOT: setRegTo is not implemented, results will be unreliable
 # PPR: assembled_snippet: {{.*}}C0035FD6
-# PPR_ASM: {{0|4}}:	{{.*}} ptrue p{{[0-9]|1[0-5]}}
+# PPR_ASM: ptrue p{{[0-9]+}}
 
 # zpr register class initialization testcase 
 # ideally we should use DUPM_ZI instead of FADDV_VPZ_S for isolated testcase; but exegesis does not support DUPM_ZI yet;
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=FADDV_VPZ_S.o --opcode-name=FADDV_VPZ_S 2>&1 | FileCheck %s --check-prefix=ZPR
-# RUN: llvm-objdump -d FADDV_VPZ_S.o | FileCheck %s --check-prefix=ZPR_ASM
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_S 2>&1 | FileCheck %s --check-prefix=ZPR
+# RUN: llvm-objdump -d %d | FileCheck %s --check-prefix=ZPR_ASM
 # ZPR-NOT: setRegTo is not implemented, results will be unreliable
 # ZPR: assembled_snippet: {{.*}}C0035FD6
-# ZPR_ASM: {{4|8}}: {{.*}} dupm z{{[0-9]|[1-2][0-9]|3[0-1]}}.s, #0x1
+# ZPR_ASM: dupm z{{[0-9]+}}.s, #0x1
 
 # fpr64 register class initialization testcase
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=ADDVv4i16v.o --opcode-name=ADDVv4i16v 2>&1 | FileCheck %s --check-prefix=FPR64
-# RUN: llvm-objdump -d ADDVv4i16v.o | FileCheck %s --check-prefix=FPR64-ASM
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv4i16v 2>&1 | FileCheck %s --check-prefix=FPR64
+# RUN: llvm-objdump -d %d | FileCheck %s --check-prefix=FPR64-ASM
 # FPR64-NOT: setRegTo is not implemented, results will be unreliable
 # FPR64: assembled_snippet: {{.*}}C0035FD6
-# FPR64-ASM: {{0|4}}:	{{.*}} fmov d{{[0-9]|[1-2][0-9]|3[0-1]}}, #2.0{{.*}}
+# FPR64-ASM: fmov d{{[0-9]+}}, #2.0
 
 # fpr128 register class initialization testcase
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=ADDVv16i8v.o --opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128
-# RUN: llvm-objdump -d ADDVv16i8v.o | FileCheck %s --check-prefix=FPR128-ASM
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128
+# RUN: llvm-objdump -d %d | FileCheck %s --check-prefix=FPR128-ASM
 # FPR128-NOT: setRegTo is not implemented, results will be unreliable
 # FPR128: assembled_snippet: {{.*}}C0035FD6
-# FPR128-ASM: {{0|4}}:	{{.*}} movi v{{[0-9]|[1-2][0-9]|3[0-1]}}.2d, {{#0x0|#0000000000000000}}
+# FPR128-ASM: movi v{{[0-9]+}}.2d, {{#0}}

>From 433b62e908295633b0387ec026331bc05d4cc36b Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Fri, 28 Feb 2025 02:24:17 -0800
Subject: [PATCH 11/18] Modified: Testcases to strictly check correct order of
 instruction in disassembly, except str

---
 .../llvm-exegesis/AArch64/setReg_init_check.s | 68 +++++++++++--------
 1 file changed, 41 insertions(+), 27 deletions(-)

diff --git a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
index 195365ad62f14..b888d6a50e6c7 100644
--- a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
+++ b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
@@ -1,31 +1,45 @@
-# REQUIRES: aarch64-registered-target
+; REQUIRES: aarch64-registered-target
 
-# ppr register class initialization testcase 
-# ideally we should use PTRUE_{B/H?S/D} instead of FADDV_VPZ_D for isolated testcase; but exegesis does not support PTRUE_{B/H?S/D} yet;
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR
-# RUN: llvm-objdump -d %d | FileCheck %s --check-prefix=PPR_ASM
-# PPR-NOT: setRegTo is not implemented, results will be unreliable
-# PPR: assembled_snippet: {{.*}}C0035FD6
-# PPR_ASM: ptrue p{{[0-9]+}}
+# PPR REGISTER CLASS INITIALIZATION TESTCASE 
+# IDEALLY WE SHOULD USE PTRUE_{B/H?S/D} INSTEAD OF FADDV_VPZ_D FOR ISOLATED TESTCASE; BUT EXEGESIS DOES NOT SUPPORT PTRUE_{B/H?S/D} YET;
+; RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR
+; RUN: llvm-objdump -d %d > %t.s
+; RUN: FileCheck %s --check-prefix=PPR_ASM < %t.s
+; PPR-NOT: setRegTo is not implemented, results will be unreliable
+; PPR: assembled_snippet: {{.*}}C0035FD6
+; PPR_ASM: {{<foo>:}}
+; PPR_ASM: ptrue p{{[0-9]+}}.b
+; PPR_ASM-NEXT: dupm z{{[0-9]+}}.s, #0x1
+; PPR_ASM-NEXT: faddv d{{[0-9]+}}, p{{[0-9]+}}, z{{[0-9]+}}
 
-# zpr register class initialization testcase 
-# ideally we should use DUPM_ZI instead of FADDV_VPZ_S for isolated testcase; but exegesis does not support DUPM_ZI yet;
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_S 2>&1 | FileCheck %s --check-prefix=ZPR
-# RUN: llvm-objdump -d %d | FileCheck %s --check-prefix=ZPR_ASM
-# ZPR-NOT: setRegTo is not implemented, results will be unreliable
-# ZPR: assembled_snippet: {{.*}}C0035FD6
-# ZPR_ASM: dupm z{{[0-9]+}}.s, #0x1
+# ZPR REGISTER CLASS INITIALIZATION TESTCASE 
+# IDEALLY WE SHOULD USE PTRUE_{B/H?S/D} INSTEAD OF FADDV_VPZ_D FOR ISOLATED TESTCASE; BUT EXEGESIS DOES NOT SUPPORT PTRUE_{B/H?S/D} YET;
+; RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=ZPR
+; RUN: llvm-objdump -d %d > %t.s
+; RUN: FileCheck %s --check-prefix=ZPR_ASM < %t.s
+; ZPR-NOT: setRegTo is not implemented, results will be unreliable
+; ZPR: assembled_snippet: {{.*}}C0035FD6
+; ZPR_ASM: {{<foo>:}}
+; ZPR_ASM: ptrue p{{[0-9]+}}.b
+; ZPR_ASM-NEXT: dupm z{{[0-9]+}}.s, #0x1
+; ZPR_ASM-NEXT: faddv d{{[0-9]+}}, p{{[0-9]+}}, z{{[0-9]+}}
 
-# fpr64 register class initialization testcase
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv4i16v 2>&1 | FileCheck %s --check-prefix=FPR64
-# RUN: llvm-objdump -d %d | FileCheck %s --check-prefix=FPR64-ASM
-# FPR64-NOT: setRegTo is not implemented, results will be unreliable
-# FPR64: assembled_snippet: {{.*}}C0035FD6
-# FPR64-ASM: fmov d{{[0-9]+}}, #2.0
+# FPR128 REGISTER CLASS INITIALIZATION TESTCASE
+; RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128
+; RUN: llvm-objdump -d %d > %t.s
+; RUN: FileCheck %s --check-prefix=FPR128-ASM < %t.s
+; FPR128-NOT: setRegTo is not implemented, results will be unreliable
+; FPR128: assembled_snippet: {{.*}}C0035FD6
+; FPR128-ASM: {{<foo>:}}
+; FPR128-ASM: movi v{{[0-9]+}}.2d, #0000000000000000
+; FPR128-ASM-NEXT: addv b{{[0-9]+}}, v{{[0-9]+}}.16b
 
-# fpr128 register class initialization testcase
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128
-# RUN: llvm-objdump -d %d | FileCheck %s --check-prefix=FPR128-ASM
-# FPR128-NOT: setRegTo is not implemented, results will be unreliable
-# FPR128: assembled_snippet: {{.*}}C0035FD6
-# FPR128-ASM: movi v{{[0-9]+}}.2d, {{#0}}
+# FPR64 REGISTER CLASS INITIALIZATION TESTCASE
+; RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv4i16v 2>&1 | FileCheck %s --check-prefix=FPR64
+; RUN: llvm-objdump -d %d > %t.s
+; RUN: FileCheck %s --check-prefix=FPR64-ASM < %t.s
+; FPR64-NOT: setRegTo is not implemented, results will be unreliable
+; FPR64: assembled_snippet: {{.*}}C0035FD6
+; FPR64-ASM: {{<foo>:}}
+; FPR64-ASM: fmov d{{[0-9]+}}, {{#2.0+|#2\.000000000000000000e\+00}}
+; FPR64-ASM-NEXT: addv h{{[0-9]+}}, v{{[0-9]+}}.4h

>From 482a0a3ec599e00c5776fd5cab0bb98f25505e71 Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Tue, 4 Mar 2025 03:40:06 -0800
Subject: [PATCH 12/18] Modified: Stricter asserts for checking immediate
 (Value) to be set in permissible range of base instruction. Style nits
 changes to Testcases.

---
 .../llvm-exegesis/AArch64/setReg_init_check.s | 82 +++++++++----------
 .../llvm-exegesis/lib/AArch64/Target.cpp      | 41 +++++++---
 2 files changed, 69 insertions(+), 54 deletions(-)

diff --git a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
index b888d6a50e6c7..407eb918faf42 100644
--- a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
+++ b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
@@ -1,45 +1,45 @@
-; REQUIRES: aarch64-registered-target
+REQUIRES: aarch64-registered-target
 
-# PPR REGISTER CLASS INITIALIZATION TESTCASE 
-# IDEALLY WE SHOULD USE PTRUE_{B/H?S/D} INSTEAD OF FADDV_VPZ_D FOR ISOLATED TESTCASE; BUT EXEGESIS DOES NOT SUPPORT PTRUE_{B/H?S/D} YET;
-; RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR
-; RUN: llvm-objdump -d %d > %t.s
-; RUN: FileCheck %s --check-prefix=PPR_ASM < %t.s
-; PPR-NOT: setRegTo is not implemented, results will be unreliable
-; PPR: assembled_snippet: {{.*}}C0035FD6
-; PPR_ASM: {{<foo>:}}
-; PPR_ASM: ptrue p{{[0-9]+}}.b
-; PPR_ASM-NEXT: dupm z{{[0-9]+}}.s, #0x1
-; PPR_ASM-NEXT: faddv d{{[0-9]+}}, p{{[0-9]+}}, z{{[0-9]+}}
+## PPR Register Class Initialization Testcase
+## Ideally, we should use PTRUE_{B/H/S/D} instead of FADDV_VPZ_D for an isolated test case; however, Exegesis does not yet support PTRUE_{B/H/S/D}.
+RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR
+RUN: llvm-objdump -d %d > %t.s
+RUN: FileCheck %s --check-prefix=PPR_ASM < %t.s
+PPR-NOT: setRegTo is not implemented, results will be unreliable
+PPR: assembled_snippet: {{.*}}C0035FD6
+PPR_ASM: {{<foo>:}}
+PPR_ASM: ptrue p{{[0-9]+}}.b
+PPR_ASM-NEXT: dupm z{{[0-9]+}}.s, #0x1
+PPR_ASM-NEXT: faddv d{{[0-9]+}}, p{{[0-9]+}}, z{{[0-9]+}}
 
-# ZPR REGISTER CLASS INITIALIZATION TESTCASE 
-# IDEALLY WE SHOULD USE PTRUE_{B/H?S/D} INSTEAD OF FADDV_VPZ_D FOR ISOLATED TESTCASE; BUT EXEGESIS DOES NOT SUPPORT PTRUE_{B/H?S/D} YET;
-; RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=ZPR
-; RUN: llvm-objdump -d %d > %t.s
-; RUN: FileCheck %s --check-prefix=ZPR_ASM < %t.s
-; ZPR-NOT: setRegTo is not implemented, results will be unreliable
-; ZPR: assembled_snippet: {{.*}}C0035FD6
-; ZPR_ASM: {{<foo>:}}
-; ZPR_ASM: ptrue p{{[0-9]+}}.b
-; ZPR_ASM-NEXT: dupm z{{[0-9]+}}.s, #0x1
-; ZPR_ASM-NEXT: faddv d{{[0-9]+}}, p{{[0-9]+}}, z{{[0-9]+}}
+## ZPR Register Class Initialization Testcase
+## Ideally, we should use PTRUE_{B/H/S/D} instead of FADDV_VPZ_D for an isolated test case; however, Exegesis does not yet support PTRUE_{B/H/S/D}.
+RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=ZPR
+RUN: llvm-objdump -d %d > %t.s
+RUN: FileCheck %s --check-prefix=ZPR_ASM < %t.s
+ZPR-NOT: setRegTo is not implemented, results will be unreliable
+ZPR: assembled_snippet: {{.*}}C0035FD6
+ZPR_ASM: {{<foo>:}}
+ZPR_ASM: ptrue p{{[0-9]+}}.b
+ZPR_ASM-NEXT: dupm z{{[0-9]+}}.s, #0x1
+ZPR_ASM-NEXT: faddv d{{[0-9]+}}, p{{[0-9]+}}, z{{[0-9]+}}
 
-# FPR128 REGISTER CLASS INITIALIZATION TESTCASE
-; RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128
-; RUN: llvm-objdump -d %d > %t.s
-; RUN: FileCheck %s --check-prefix=FPR128-ASM < %t.s
-; FPR128-NOT: setRegTo is not implemented, results will be unreliable
-; FPR128: assembled_snippet: {{.*}}C0035FD6
-; FPR128-ASM: {{<foo>:}}
-; FPR128-ASM: movi v{{[0-9]+}}.2d, #0000000000000000
-; FPR128-ASM-NEXT: addv b{{[0-9]+}}, v{{[0-9]+}}.16b
+## FPR128 Register Class Initialization Testcase
+RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128
+RUN: llvm-objdump -d %d > %t.s
+RUN: FileCheck %s --check-prefix=FPR128-ASM < %t.s
+FPR128-NOT: setRegTo is not implemented, results will be unreliable
+FPR128: assembled_snippet: {{.*}}C0035FD6
+FPR128-ASM: {{<foo>:}}
+FPR128-ASM: movi v{{[0-9]+}}.2d, #0000000000000000
+FPR128-ASM-NEXT: addv b{{[0-9]+}}, v{{[0-9]+}}.16b
 
-# FPR64 REGISTER CLASS INITIALIZATION TESTCASE
-; RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv4i16v 2>&1 | FileCheck %s --check-prefix=FPR64
-; RUN: llvm-objdump -d %d > %t.s
-; RUN: FileCheck %s --check-prefix=FPR64-ASM < %t.s
-; FPR64-NOT: setRegTo is not implemented, results will be unreliable
-; FPR64: assembled_snippet: {{.*}}C0035FD6
-; FPR64-ASM: {{<foo>:}}
-; FPR64-ASM: fmov d{{[0-9]+}}, {{#2.0+|#2\.000000000000000000e\+00}}
-; FPR64-ASM-NEXT: addv h{{[0-9]+}}, v{{[0-9]+}}.4h
+## FPR64 Register Class Initialization Testcase
+RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv4i16v 2>&1 | FileCheck %s --check-prefix=FPR64
+RUN: llvm-objdump -d %d > %t.s
+RUN: FileCheck %s --check-prefix=FPR64-ASM < %t.s
+FPR64-NOT: setRegTo is not implemented, results will be unreliable
+FPR64: assembled_snippet: {{.*}}C0035FD6
+FPR64-ASM: {{<foo>:}}
+FPR64-ASM: fmov d{{[0-9]+}}, {{#2.0+|#2\.000000000000000000e\+00}}
+FPR64-ASM-NEXT: addv h{{[0-9]+}}, v{{[0-9]+}}.4h
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index dbca08603fd78..b911722a51bfd 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -28,7 +28,9 @@ static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) {
 // Generates instruction to load an immediate value into a register.
 static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth,
                             const APInt &Value) {
-  assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the Register"); 
+  // 0 <= Value.getZExtValue() < 2**16
+  assert(Value.getZExtValue() < (1 << 16) &&
+         "Value must be in the range of the immediate opcode");
   return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth))
       .addReg(Reg)
       .addImm(Value.getZExtValue());
@@ -36,7 +38,9 @@ static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth,
 
 static MCInst loadZPRImmediate(MCRegister Reg, unsigned RegBitWidth,
                                const APInt &Value) {
-  assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the ZPR Register");
+  // 0 <= Value.getZExtValue() < 2**13
+  assert(Value.getZExtValue() < (1 << 13) &&
+         "Value must be in the range of the immediate opcode");
   // For ZPR, we typically use DUPM instruction to load immediate values
   return MCInstBuilder(AArch64::DUPM_ZI)
       .addReg(Reg)
@@ -45,14 +49,13 @@ static MCInst loadZPRImmediate(MCRegister Reg, unsigned RegBitWidth,
 
 static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
                                const APInt &Value) {
-  assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the PPR Register"); 
   // For PPR, we typically use PTRUE instruction to set predicate registers
   return MCInstBuilder(AArch64::PTRUE_B)
       .addReg(Reg)
       .addImm(31); // All lanes true for 16 bits
 }
 
-// Generates instruction to load an FP immediate value into a register.
+// Fetch base-instruction to load an FP immediate value into a register.
 static unsigned getLoadFPImmediateOpcode(unsigned RegBitWidth) {
   switch (RegBitWidth) {
   case 64:
@@ -63,11 +66,23 @@ static unsigned getLoadFPImmediateOpcode(unsigned RegBitWidth) {
   llvm_unreachable("Invalid Value Width");
 }
 
-
 // Generates instruction to load an FP immediate value into a register.
 static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth,
-                            const APInt &Value) {
-  assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the FP Register");
+                              const APInt &Value) {
+  // -31 <= Value.getZExtValue() <= 31
+  assert(Value.getZExtValue() <= 31 &&
+         "Value must be in the range of the immediate opcode");
+  return MCInstBuilder(getLoadFPImmediateOpcode(RegBitWidth))
+      .addReg(Reg)
+      .addImm(Value.getZExtValue());
+}
+
+// Generates instruction to load an FP128 immediate value into a register.
+static MCInst loadFP128Immediate(MCRegister Reg, unsigned RegBitWidth,
+                                 const APInt &Value) {
+  // 0 <= Value.getZExtValue() < 2**8
+  assert(Value.getZExtValue() < (1 << 8) &&
+         "Value must be in the range of the immediate opcode");
   return MCInstBuilder(getLoadFPImmediateOpcode(RegBitWidth))
       .addReg(Reg)
       .addImm(Value.getZExtValue());
@@ -90,14 +105,14 @@ class ExegesisAArch64Target : public ExegesisTarget {
     if (AArch64::GPR64RegClass.contains(Reg))
       return {loadImmediate(Reg, 64, Value)};
     if (AArch64::PPRRegClass.contains(Reg))
-      return {loadPPRImmediate(Reg, 16, Value)}; 
-    if (AArch64::FPR64RegClass.contains(Reg)) 
+      return {loadPPRImmediate(Reg, 16, Value)};
+    if (AArch64::FPR64RegClass.contains(Reg))
       return {loadFPImmediate(Reg, 64, Value)};
-    if (AArch64::FPR128RegClass.contains(Reg)) 
-      return {loadFPImmediate(Reg, 128, Value)};
-    if (AArch64::ZPRRegClass.contains(Reg)) 
+    if (AArch64::FPR128RegClass.contains(Reg))
+      return {loadFP128Immediate(Reg, 128, Value)};
+    if (AArch64::ZPRRegClass.contains(Reg))
       return {loadZPRImmediate(Reg, 128, Value)};
-    
+
     errs() << "setRegTo is not implemented, results will be unreliable\n";
     return {};
   }

>From 951e05e632e1bae3fcc43d2bedd933a7bea5d90a Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Wed, 5 Mar 2025 00:00:56 -0800
Subject: [PATCH 13/18] Modified: Base-instruction of FPR64 reg class to MOVID
 & ZPR reg class to DUP_ZI_D

---
 .../llvm-exegesis/lib/AArch64/Target.cpp      | 31 +++++++------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index b911722a51bfd..dbd276bf9630a 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -8,6 +8,7 @@
 #include "../Target.h"
 #include "AArch64.h"
 #include "AArch64RegisterInfo.h"
+#include <iostream>
 
 #define GET_AVAILABLE_OPCODE_CHECKER
 #include "AArch64GenInstrInfo.inc"
@@ -38,13 +39,13 @@ static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth,
 
 static MCInst loadZPRImmediate(MCRegister Reg, unsigned RegBitWidth,
                                const APInt &Value) {
-  // 0 <= Value.getZExtValue() < 2**13
-  assert(Value.getZExtValue() < (1 << 13) &&
+  // -127 <= Value.getZExtValue() < 128
+  assert(Value.getZExtValue() < (1 << 7) &&
          "Value must be in the range of the immediate opcode");
-  // For ZPR, we typically use DUPM instruction to load immediate values
-  return MCInstBuilder(AArch64::DUPM_ZI)
+  return MCInstBuilder(AArch64::DUP_ZI_D)
       .addReg(Reg)
-      .addImm(Value.getZExtValue());
+      .addImm(Value.getZExtValue())
+      .addImm(0);
 }
 
 static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
@@ -59,7 +60,7 @@ static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
 static unsigned getLoadFPImmediateOpcode(unsigned RegBitWidth) {
   switch (RegBitWidth) {
   case 64:
-    return AArch64::FMOVDi; 
+    return AArch64::MOVID; //FMOVDi;
   case 128:
     return AArch64::MOVIv2d_ns;
   }
@@ -69,18 +70,10 @@ static unsigned getLoadFPImmediateOpcode(unsigned RegBitWidth) {
 // Generates instruction to load an FP immediate value into a register.
 static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth,
                               const APInt &Value) {
-  // -31 <= Value.getZExtValue() <= 31
-  assert(Value.getZExtValue() <= 31 &&
-         "Value must be in the range of the immediate opcode");
-  return MCInstBuilder(getLoadFPImmediateOpcode(RegBitWidth))
-      .addReg(Reg)
-      .addImm(Value.getZExtValue());
-}
-
-// Generates instruction to load an FP128 immediate value into a register.
-static MCInst loadFP128Immediate(MCRegister Reg, unsigned RegBitWidth,
-                                 const APInt &Value) {
-  // 0 <= Value.getZExtValue() < 2**8
+  // 0 <= Value.getZExtValue() < 2**8 (int Value)
+  // -31.0 <= Value.getZExtValue() < 31.0 (frac Value)
+  assert(Value.getZExtValue() == 0 &&
+         "Value should be zero, temporary fix for now");
   assert(Value.getZExtValue() < (1 << 8) &&
          "Value must be in the range of the immediate opcode");
   return MCInstBuilder(getLoadFPImmediateOpcode(RegBitWidth))
@@ -109,7 +102,7 @@ class ExegesisAArch64Target : public ExegesisTarget {
     if (AArch64::FPR64RegClass.contains(Reg))
       return {loadFPImmediate(Reg, 64, Value)};
     if (AArch64::FPR128RegClass.contains(Reg))
-      return {loadFP128Immediate(Reg, 128, Value)};
+      return {loadFPImmediate(Reg, 128, Value)};
     if (AArch64::ZPRRegClass.contains(Reg))
       return {loadZPRImmediate(Reg, 128, Value)};
 

>From ea8b28b6da4ba5becc8828d8cd20ab640dfa53a8 Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Wed, 5 Mar 2025 00:02:16 -0800
Subject: [PATCH 14/18] Modified: Updated testcases checks for disassembly as
 be base instruction

---
 .../test/tools/llvm-exegesis/AArch64/setReg_init_check.s | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
index 407eb918faf42..068e2a6373146 100644
--- a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
+++ b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
@@ -9,11 +9,11 @@ PPR-NOT: setRegTo is not implemented, results will be unreliable
 PPR: assembled_snippet: {{.*}}C0035FD6
 PPR_ASM: {{<foo>:}}
 PPR_ASM: ptrue p{{[0-9]+}}.b
-PPR_ASM-NEXT: dupm z{{[0-9]+}}.s, #0x1
+PPR_ASM-NEXT: mov z{{[0-9]+}}.d, #0x0
 PPR_ASM-NEXT: faddv d{{[0-9]+}}, p{{[0-9]+}}, z{{[0-9]+}}
 
 ## ZPR Register Class Initialization Testcase
-## Ideally, we should use PTRUE_{B/H/S/D} instead of FADDV_VPZ_D for an isolated test case; however, Exegesis does not yet support PTRUE_{B/H/S/D}.
+## Ideally, we should use DUP_ZI_{B/H/S/D} instead of FADDV_VPZ_D for an isolated test case; however, Exegesis does not yet support DUP_ZI_{B/H/S/D}.
 RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=ZPR
 RUN: llvm-objdump -d %d > %t.s
 RUN: FileCheck %s --check-prefix=ZPR_ASM < %t.s
@@ -21,7 +21,7 @@ ZPR-NOT: setRegTo is not implemented, results will be unreliable
 ZPR: assembled_snippet: {{.*}}C0035FD6
 ZPR_ASM: {{<foo>:}}
 ZPR_ASM: ptrue p{{[0-9]+}}.b
-ZPR_ASM-NEXT: dupm z{{[0-9]+}}.s, #0x1
+ZPR_ASM-NEXT: mov z{{[0-9]+}}.d, #0x0
 ZPR_ASM-NEXT: faddv d{{[0-9]+}}, p{{[0-9]+}}, z{{[0-9]+}}
 
 ## FPR128 Register Class Initialization Testcase
@@ -41,5 +41,6 @@ RUN: FileCheck %s --check-prefix=FPR64-ASM < %t.s
 FPR64-NOT: setRegTo is not implemented, results will be unreliable
 FPR64: assembled_snippet: {{.*}}C0035FD6
 FPR64-ASM: {{<foo>:}}
-FPR64-ASM: fmov d{{[0-9]+}}, {{#2.0+|#2\.000000000000000000e\+00}}
+## For FMOVDi base-instruction : fmov d{{[0-9]+}}, {{#2.0+|#2\.000000000000000000e\+00}}
+FPR64-ASM: movi d{{[0-9]+}}, #0000000000000000
 FPR64-ASM-NEXT: addv h{{[0-9]+}}, v{{[0-9]+}}.4h

>From b5853a948d189de69a6e3f52be544ae0ad0b0e9e Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Wed, 5 Mar 2025 01:03:05 -0800
Subject: [PATCH 15/18] Modified: reverted headers

---
 llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index dbd276bf9630a..70a8df9d0c96a 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -8,7 +8,6 @@
 #include "../Target.h"
 #include "AArch64.h"
 #include "AArch64RegisterInfo.h"
-#include <iostream>
 
 #define GET_AVAILABLE_OPCODE_CHECKER
 #include "AArch64GenInstrInfo.inc"

>From 2c16af659c04d87f05e0f9c53fee0e0e02aa3e4b Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Wed, 5 Mar 2025 02:20:54 -0800
Subject: [PATCH 16/18] Modified: Simplified testfile to check only
 disassembly.

---
 .../llvm-exegesis/AArch64/setReg_init_check.s | 51 ++++++++-----------
 1 file changed, 22 insertions(+), 29 deletions(-)

diff --git a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
index 068e2a6373146..1b69ac68a2b30 100644
--- a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
+++ b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
@@ -1,46 +1,39 @@
 REQUIRES: aarch64-registered-target
 
 ## PPR Register Class Initialization Testcase
-## Ideally, we should use PTRUE_{B/H/S/D} instead of FADDV_VPZ_D for an isolated test case; however, Exegesis does not yet support PTRUE_{B/H/S/D}.
-RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR
+## Ideally, we should use PTRUE_{B/H/S/D} instead of FADDV_VPZ_D for an isolated test case; 
+## However, exegesis does not yet support PTRUE_{B/H/S/D}.
+RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_D 2>&1 
 RUN: llvm-objdump -d %d > %t.s
 RUN: FileCheck %s --check-prefix=PPR_ASM < %t.s
-PPR-NOT: setRegTo is not implemented, results will be unreliable
-PPR: assembled_snippet: {{.*}}C0035FD6
-PPR_ASM: {{<foo>:}}
-PPR_ASM: ptrue p{{[0-9]+}}.b
-PPR_ASM-NEXT: mov z{{[0-9]+}}.d, #0x0
-PPR_ASM-NEXT: faddv d{{[0-9]+}}, p{{[0-9]+}}, z{{[0-9]+}}
+PPR_ASM:            <foo>:
+PPR_ASM:            ptrue p{{[0-9]+}}.b
+PPR_ASM-NEXT:       mov z{{[0-9]+}}.d, #0x0
+PPR_ASM-NEXT:       faddv d{{[0-9]+}}, p{{[0-9]+}}, z{{[0-9]+}}
 
 ## ZPR Register Class Initialization Testcase
-## Ideally, we should use DUP_ZI_{B/H/S/D} instead of FADDV_VPZ_D for an isolated test case; however, Exegesis does not yet support DUP_ZI_{B/H/S/D}.
-RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=ZPR
+## Ideally, we should use DUP_ZI_{B/H/S/D} instead of FADDV_VPZ_D for an isolated test case; 
+## However, exegesis does not yet support DUP_ZI_{B/H/S/D}.
+RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_D 2>&1 
 RUN: llvm-objdump -d %d > %t.s
 RUN: FileCheck %s --check-prefix=ZPR_ASM < %t.s
-ZPR-NOT: setRegTo is not implemented, results will be unreliable
-ZPR: assembled_snippet: {{.*}}C0035FD6
-ZPR_ASM: {{<foo>:}}
-ZPR_ASM: ptrue p{{[0-9]+}}.b
-ZPR_ASM-NEXT: mov z{{[0-9]+}}.d, #0x0
-ZPR_ASM-NEXT: faddv d{{[0-9]+}}, p{{[0-9]+}}, z{{[0-9]+}}
+ZPR_ASM:            <foo>:
+ZPR_ASM:            ptrue p{{[0-9]+}}.b
+ZPR_ASM-NEXT:       mov z{{[0-9]+}}.d, #0x0
+ZPR_ASM-NEXT:       faddv d{{[0-9]+}}, p{{[0-9]+}}, z{{[0-9]+}}
 
 ## FPR128 Register Class Initialization Testcase
-RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128
+RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv16i8v 2>&1 
 RUN: llvm-objdump -d %d > %t.s
 RUN: FileCheck %s --check-prefix=FPR128-ASM < %t.s
-FPR128-NOT: setRegTo is not implemented, results will be unreliable
-FPR128: assembled_snippet: {{.*}}C0035FD6
-FPR128-ASM: {{<foo>:}}
-FPR128-ASM: movi v{{[0-9]+}}.2d, #0000000000000000
-FPR128-ASM-NEXT: addv b{{[0-9]+}}, v{{[0-9]+}}.16b
+FPR128-ASM:         <foo>:
+FPR128-ASM:         movi v{{[0-9]+}}.2d, #0000000000000000
+FPR128-ASM-NEXT:    addv b{{[0-9]+}}, v{{[0-9]+}}.16b
 
 ## FPR64 Register Class Initialization Testcase
-RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv4i16v 2>&1 | FileCheck %s --check-prefix=FPR64
+RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv4i16v 2>&1
 RUN: llvm-objdump -d %d > %t.s
 RUN: FileCheck %s --check-prefix=FPR64-ASM < %t.s
-FPR64-NOT: setRegTo is not implemented, results will be unreliable
-FPR64: assembled_snippet: {{.*}}C0035FD6
-FPR64-ASM: {{<foo>:}}
-## For FMOVDi base-instruction : fmov d{{[0-9]+}}, {{#2.0+|#2\.000000000000000000e\+00}}
-FPR64-ASM: movi d{{[0-9]+}}, #0000000000000000
-FPR64-ASM-NEXT: addv h{{[0-9]+}}, v{{[0-9]+}}.4h
+FPR64-ASM:          <foo>:
+FPR64-ASM:          movi d{{[0-9]+}}, #0000000000000000
+FPR64-ASM-NEXT:     addv h{{[0-9]+}}, v{{[0-9]+}}.4h

>From c21ee8bfa51ff7a51060c1048258bf9af086cf51 Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Wed, 5 Mar 2025 02:21:50 -0800
Subject: [PATCH 17/18] Modified: Removed redundant comments and asserts.

---
 llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 70a8df9d0c96a..781ac37bfc34c 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -28,7 +28,6 @@ static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) {
 // Generates instruction to load an immediate value into a register.
 static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth,
                             const APInt &Value) {
-  // 0 <= Value.getZExtValue() < 2**16
   assert(Value.getZExtValue() < (1 << 16) &&
          "Value must be in the range of the immediate opcode");
   return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth))
@@ -38,7 +37,6 @@ static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth,
 
 static MCInst loadZPRImmediate(MCRegister Reg, unsigned RegBitWidth,
                                const APInt &Value) {
-  // -127 <= Value.getZExtValue() < 128
   assert(Value.getZExtValue() < (1 << 7) &&
          "Value must be in the range of the immediate opcode");
   return MCInstBuilder(AArch64::DUP_ZI_D)
@@ -69,12 +67,8 @@ static unsigned getLoadFPImmediateOpcode(unsigned RegBitWidth) {
 // Generates instruction to load an FP immediate value into a register.
 static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth,
                               const APInt &Value) {
-  // 0 <= Value.getZExtValue() < 2**8 (int Value)
-  // -31.0 <= Value.getZExtValue() < 31.0 (frac Value)
   assert(Value.getZExtValue() == 0 &&
-         "Value should be zero, temporary fix for now");
-  assert(Value.getZExtValue() < (1 << 8) &&
-         "Value must be in the range of the immediate opcode");
+         "Expected initialisation value 0");
   return MCInstBuilder(getLoadFPImmediateOpcode(RegBitWidth))
       .addReg(Reg)
       .addImm(Value.getZExtValue());

>From 5cda550ac927a439d62db4deb75834146aee336a Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Wed, 5 Mar 2025 23:49:21 -0800
Subject: [PATCH 18/18] Modified: Revert back to asserting bit width for GPR
 Register classes

---
 llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 781ac37bfc34c..ed36cb2f75d5b 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -28,8 +28,8 @@ static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) {
 // Generates instruction to load an immediate value into a register.
 static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth,
                             const APInt &Value) {
-  assert(Value.getZExtValue() < (1 << 16) &&
-         "Value must be in the range of the immediate opcode");
+  assert (Value.getBitWidth() <= RegBitWidth &&
+          "Value must fit in the Register");
   return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth))
       .addReg(Reg)
       .addImm(Value.getZExtValue());



More information about the llvm-commits mailing list