[llvm] Adding support in llvm-exegesis for Aarch64 for handling FPR64/128, PPR16 and ZPR128 reg class. (PR #127564)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 20:00:25 PST 2025
https://github.com/lakshayk-nv updated https://github.com/llvm/llvm-project/pull/127564
>From 624a7eeeb08df37b2cb7349dc3ce9bfa198d2abe Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Mon, 17 Feb 2025 02:25:29 -0800
Subject: [PATCH 1/9] Adding support for FPR64/128, PPR16 and ZPR128 in setReg
of llvm-exegesis for Aarch64
---
.../llvm-exegesis/lib/AArch64/Target.cpp | 57 ++++++++++++++++++-
1 file changed, 56 insertions(+), 1 deletion(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 5a7cc6f5e30d3..806565a26f19b 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -35,6 +35,48 @@ static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth,
.addImm(Value.getZExtValue());
}
+static MCInst loadZPRImmediate(MCRegister Reg, unsigned RegBitWidth,
+ const APInt &Value) {
+ if (Value.getBitWidth() > RegBitWidth)
+ llvm_unreachable("Value must fit in the ZPR Register");
+ // For ZPR, we typically use DUPM instruction to load immediate values
+ return MCInstBuilder(AArch64::DUPM_ZI)
+ .addReg(Reg)
+ .addImm(Value.getZExtValue());
+}
+
+static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
+ const APInt &Value) {
+ if (Value.getBitWidth() > RegBitWidth)
+ llvm_unreachable("Value must fit in the PPR Register");
+ // For PPR, we typically use PTRUE instruction to set predicate registers
+ return MCInstBuilder(AArch64::PTRUE_B)
+ .addReg(Reg)
+ .addImm(31); // All lanes true
+}
+
+// Generates instruction to load an FP immediate value into a register.
+static unsigned getLoadFPImmediateOpcode(unsigned RegBitWidth) {
+ switch (RegBitWidth) {
+ case 64:
+ return AArch64::FMOVDi;
+ case 128:
+ return AArch64::MOVIv2d_ns;
+ }
+ llvm_unreachable("Invalid Value Width");
+}
+
+
+// Generates instruction to load an FP immediate value into a register.
+static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth,
+ const APInt &Value) {
+ if (Value.getBitWidth() > RegBitWidth)
+ llvm_unreachable("Value must fit in the FP Register");
+ return MCInstBuilder(getLoadFPImmediateOpcode(RegBitWidth))
+ .addReg(Reg)
+ .addImm(Value.getZExtValue());
+}
+
#include "AArch64GenExegesis.inc"
namespace {
@@ -49,8 +91,21 @@ class ExegesisAArch64Target : public ExegesisTarget {
const APInt &Value) const override {
if (AArch64::GPR32RegClass.contains(Reg))
return {loadImmediate(Reg, 32, Value)};
+
if (AArch64::GPR64RegClass.contains(Reg))
return {loadImmediate(Reg, 64, Value)};
+
+ if (AArch64::PPRRegClass.contains(Reg))
+ return {loadPPRImmediate(Reg, 16, Value)};
+
+ if (AArch64::FPR64RegClass.contains(Reg))
+ return {loadFPImmediate(Reg, 64, Value)};
+ if (AArch64::FPR128RegClass.contains(Reg))
+ return {loadFPImmediate(Reg, 128, Value)};
+
+ if (AArch64::ZPRRegClass.contains(Reg))
+ return {loadZPRImmediate(Reg, 128, Value)};
+
errs() << "setRegTo is not implemented, results will be unreliable\n";
return {};
}
@@ -77,4 +132,4 @@ void InitializeAArch64ExegesisTarget() {
}
} // namespace exegesis
-} // namespace llvm
+} // namespace llvm
\ No newline at end of file
>From 4c4d605ba2aa3a4b1f510bac246d7ff75eb2993a Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Mon, 17 Feb 2025 19:51:16 -0800
Subject: [PATCH 2/9] Adding support for FPR64/128, PPR16 and ZPR128 in setReg
of llvm-exegesis for Aarch64
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 806565a26f19b..dc312f4916703 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -91,7 +91,6 @@ class ExegesisAArch64Target : public ExegesisTarget {
const APInt &Value) const override {
if (AArch64::GPR32RegClass.contains(Reg))
return {loadImmediate(Reg, 32, Value)};
-
if (AArch64::GPR64RegClass.contains(Reg))
return {loadImmediate(Reg, 64, Value)};
>From d34cb6d157ccc8b1abf55c71494144a42b5bb546 Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Thu, 20 Feb 2025 02:27:00 -0800
Subject: [PATCH 3/9] Added assert that Value is in range for the generated
instructions and testcases for patch FPR64/128, PPR, ZPR128
---
.../AArch64/reg_based/reg_fpr128.s | 8 ++++++++
.../AArch64/reg_based/reg_fpr64.s | 7 +++++++
.../llvm-exegesis/AArch64/reg_based/reg_ppr.s | 8 ++++++++
.../llvm-exegesis/AArch64/reg_based/reg_zpr.s | 8 ++++++++
.../tools/llvm-exegesis/lib/AArch64/Target.cpp | 18 +++++++-----------
5 files changed, 38 insertions(+), 11 deletions(-)
create mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s
create mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s
create mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s
create mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s
new file mode 100644
index 0000000000000..210ea563b85f5
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s
@@ -0,0 +1,8 @@
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv16i8v | FileCheck %s
+# REQUIRES: aarch64-registered-target
+
+# Check that warning of not initializing registers is not printed
+# CHECK-NOT: setRegTo is not implemented, results will be unreliable
+
+# Check that we add ret (bx lr) instr to snippet
+# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s
new file mode 100644
index 0000000000000..0a08dcc2b7715
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s
@@ -0,0 +1,7 @@
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv4i16v 2>&1 | FileCheck %s
+
+# Check that warning of not initializing registers is not printed
+# CHECK-NOT: setRegTo is not implemented, results will be unreliable
+
+# Check that we add ret (bx lr) instr to snippet
+# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s
new file mode 100644
index 0000000000000..79ed1d3aaeb84
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s
@@ -0,0 +1,8 @@
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_D | FileCheck %s
+# REQUIRES: aarch64-registered-target
+
+# Check that warning of not initializing registers is not printed
+# CHECK-NOT: setRegTo is not implemented, results will be unreliable
+
+# Check that we add ret (bx lr) instr to snippet
+# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s
new file mode 100644
index 0000000000000..e0d308f16c0f9
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s
@@ -0,0 +1,8 @@
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_S | FileCheck %s
+# REQUIRES: aarch64-registered-target
+
+# Check that warning of not initializing registers is not printed
+# CHECK-NOT: setRegTo is not implemented, results will be unreliable
+
+# Check that we add ret (bx lr) instr to snippet
+# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index dc312f4916703..3b0265cd22ffc 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -28,8 +28,7 @@ static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) {
// Generates instruction to load an immediate value into a register.
static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth,
const APInt &Value) {
- if (Value.getBitWidth() > RegBitWidth)
- llvm_unreachable("Value must fit in the Register");
+ assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the Register");
return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth))
.addReg(Reg)
.addImm(Value.getZExtValue());
@@ -37,22 +36,20 @@ static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth,
static MCInst loadZPRImmediate(MCRegister Reg, unsigned RegBitWidth,
const APInt &Value) {
- if (Value.getBitWidth() > RegBitWidth)
- llvm_unreachable("Value must fit in the ZPR Register");
+ assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the PPR Register");
// For ZPR, we typically use DUPM instruction to load immediate values
return MCInstBuilder(AArch64::DUPM_ZI)
.addReg(Reg)
- .addImm(Value.getZExtValue());
+ .addImm(0x1);
}
static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
const APInt &Value) {
- if (Value.getBitWidth() > RegBitWidth)
- llvm_unreachable("Value must fit in the PPR Register");
+ assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the PPR Register");
// For PPR, we typically use PTRUE instruction to set predicate registers
return MCInstBuilder(AArch64::PTRUE_B)
.addReg(Reg)
- .addImm(31); // All lanes true
+ .addImm(0xFFFF); // All lanes true for 16 bits
}
// Generates instruction to load an FP immediate value into a register.
@@ -70,8 +67,7 @@ static unsigned getLoadFPImmediateOpcode(unsigned RegBitWidth) {
// Generates instruction to load an FP immediate value into a register.
static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth,
const APInt &Value) {
- if (Value.getBitWidth() > RegBitWidth)
- llvm_unreachable("Value must fit in the FP Register");
+ assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the FP Register");
return MCInstBuilder(getLoadFPImmediateOpcode(RegBitWidth))
.addReg(Reg)
.addImm(Value.getZExtValue());
@@ -131,4 +127,4 @@ void InitializeAArch64ExegesisTarget() {
}
} // namespace exegesis
-} // namespace llvm
\ No newline at end of file
+} // namespace llvm
>From 53b9f0b4f982bce63bece92cf16c169fd160aa24 Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Thu, 20 Feb 2025 21:34:55 -0800
Subject: [PATCH 4/9] Added combined testfile for register initialization
(PPR,ZPR,FPR64/128); And ZPR imm initialized with Value not constant
---
.../llvm-exegesis/AArch64/setReg_init_check.s | 24 +++++++++++++++++++
.../llvm-exegesis/lib/AArch64/Target.cpp | 5 +---
2 files changed, 25 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
new file mode 100644
index 0000000000000..02ab79fe69264
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
@@ -0,0 +1,24 @@
+# ppr register class initialization testcase
+# ideally we should use PTRUE_{B/H?S/D} instead of FADDV_VPZ_D for isolated testcase; but exegesis does not support PTRUE_{B/H?S/D} yet;
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR
+# REQUIRES: aarch64-registered-target
+# PPR-NOT: setRegTo is not implemented, results will be unreliable
+# PPR: assembled_snippet: {{.*}}C0035FD6
+
+# zpr register class initialization testcase
+# ideally we should use DUPM_ZI instead of FADDV_VPZ_S for isolated testcase; but exegesis does not support DUPM_ZI yet;
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_S 2>&1 | FileCheck %s --check-prefix=ZPR
+# REQUIRES: aarch64-registered-target
+# ZPR-NOT: setRegTo is not implemented, results will be unreliable
+# ZPR: assembled_snippet: {{.*}}C0035FD6
+
+# fpr64 register class initialization testcase
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv4i16v 2>&1 | FileCheck %s --check-prefix=FPR64
+# FPR64-NOT: setRegTo is not implemented, results will be unreliable
+# FPR64: assembled_snippet: {{.*}}C0035FD6
+
+# fpr128 register class initialization testcase
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128
+# REQUIRES: aarch64-registered-target
+# FPR128-NOT: setRegTo is not implemented, results will be unreliable
+# FPR128: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 3b0265cd22ffc..76afb8b57ef56 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -40,7 +40,7 @@ static MCInst loadZPRImmediate(MCRegister Reg, unsigned RegBitWidth,
// For ZPR, we typically use DUPM instruction to load immediate values
return MCInstBuilder(AArch64::DUPM_ZI)
.addReg(Reg)
- .addImm(0x1);
+ .addImm(Value.getZExtValue());
}
static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
@@ -89,15 +89,12 @@ class ExegesisAArch64Target : public ExegesisTarget {
return {loadImmediate(Reg, 32, Value)};
if (AArch64::GPR64RegClass.contains(Reg))
return {loadImmediate(Reg, 64, Value)};
-
if (AArch64::PPRRegClass.contains(Reg))
return {loadPPRImmediate(Reg, 16, Value)};
-
if (AArch64::FPR64RegClass.contains(Reg))
return {loadFPImmediate(Reg, 64, Value)};
if (AArch64::FPR128RegClass.contains(Reg))
return {loadFPImmediate(Reg, 128, Value)};
-
if (AArch64::ZPRRegClass.contains(Reg))
return {loadZPRImmediate(Reg, 128, Value)};
>From caebb7b7043cee73c33e1167b449cd4e9acf539e Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Thu, 20 Feb 2025 21:40:27 -0800
Subject: [PATCH 5/9] Deleted Indvidual testfiles
---
.../tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s | 8 --------
.../tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s | 7 -------
llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s | 8 --------
llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s | 8 --------
4 files changed, 31 deletions(-)
delete mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s
delete mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s
delete mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s
delete mode 100644 llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s
deleted file mode 100644
index 210ea563b85f5..0000000000000
--- a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr128.s
+++ /dev/null
@@ -1,8 +0,0 @@
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv16i8v | FileCheck %s
-# REQUIRES: aarch64-registered-target
-
-# Check that warning of not initializing registers is not printed
-# CHECK-NOT: setRegTo is not implemented, results will be unreliable
-
-# Check that we add ret (bx lr) instr to snippet
-# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s
deleted file mode 100644
index 0a08dcc2b7715..0000000000000
--- a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_fpr64.s
+++ /dev/null
@@ -1,7 +0,0 @@
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv4i16v 2>&1 | FileCheck %s
-
-# Check that warning of not initializing registers is not printed
-# CHECK-NOT: setRegTo is not implemented, results will be unreliable
-
-# Check that we add ret (bx lr) instr to snippet
-# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s
deleted file mode 100644
index 79ed1d3aaeb84..0000000000000
--- a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_ppr.s
+++ /dev/null
@@ -1,8 +0,0 @@
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_D | FileCheck %s
-# REQUIRES: aarch64-registered-target
-
-# Check that warning of not initializing registers is not printed
-# CHECK-NOT: setRegTo is not implemented, results will be unreliable
-
-# Check that we add ret (bx lr) instr to snippet
-# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s b/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s
deleted file mode 100644
index e0d308f16c0f9..0000000000000
--- a/llvm/test/tools/llvm-exegesis/AArch64/reg_based/reg_zpr.s
+++ /dev/null
@@ -1,8 +0,0 @@
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_S | FileCheck %s
-# REQUIRES: aarch64-registered-target
-
-# Check that warning of not initializing registers is not printed
-# CHECK-NOT: setRegTo is not implemented, results will be unreliable
-
-# Check that we add ret (bx lr) instr to snippet
-# CHECK: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
>From 230aade9955299a8bf4e7327156ab6c9d950389e Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Sun, 23 Feb 2025 20:50:00 -0800
Subject: [PATCH 6/9] Modified: requirement(aarch64) check only required once
for a test
---
llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
index 02ab79fe69264..c1f6bcb7719e8 100644
--- a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
+++ b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
@@ -1,14 +1,14 @@
+# REQUIRES: aarch64-registered-target
+
# ppr register class initialization testcase
# ideally we should use PTRUE_{B/H?S/D} instead of FADDV_VPZ_D for isolated testcase; but exegesis does not support PTRUE_{B/H?S/D} yet;
# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR
-# REQUIRES: aarch64-registered-target
# PPR-NOT: setRegTo is not implemented, results will be unreliable
# PPR: assembled_snippet: {{.*}}C0035FD6
# zpr register class initialization testcase
# ideally we should use DUPM_ZI instead of FADDV_VPZ_S for isolated testcase; but exegesis does not support DUPM_ZI yet;
# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_S 2>&1 | FileCheck %s --check-prefix=ZPR
-# REQUIRES: aarch64-registered-target
# ZPR-NOT: setRegTo is not implemented, results will be unreliable
# ZPR: assembled_snippet: {{.*}}C0035FD6
@@ -19,6 +19,5 @@
# fpr128 register class initialization testcase
# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128
-# REQUIRES: aarch64-registered-target
# FPR128-NOT: setRegTo is not implemented, results will be unreliable
# FPR128: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
>From aab854bcc638190e71e480322ddf0692acdf6783 Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Sun, 23 Feb 2025 20:52:11 -0800
Subject: [PATCH 7/9] Modified: PPR register class should be set with immediate
value 31 for all lanes true
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 76afb8b57ef56..4c95945fa63ba 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -49,7 +49,7 @@ static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
// For PPR, we typically use PTRUE instruction to set predicate registers
return MCInstBuilder(AArch64::PTRUE_B)
.addReg(Reg)
- .addImm(0xFFFF); // All lanes true for 16 bits
+ .addImm(31); // All lanes true for 16 bits
}
// Generates instruction to load an FP immediate value into a register.
>From f1e561ced0ad7c471977e22b131358e1d1708c6f Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Wed, 26 Feb 2025 19:54:02 -0800
Subject: [PATCH 8/9] Modified: Testcases to check disassembly, apart from
setReg warning and return in assembly snippet
---
.../llvm-exegesis/AArch64/setReg_init_check.s | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
index c1f6bcb7719e8..ce70a770741c0 100644
--- a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
+++ b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
@@ -2,22 +2,30 @@
# ppr register class initialization testcase
# ideally we should use PTRUE_{B/H?S/D} instead of FADDV_VPZ_D for isolated testcase; but exegesis does not support PTRUE_{B/H?S/D} yet;
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=FADDV_VPZ_D.o --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR
+# RUN: llvm-objdump -d FADDV_VPZ_D.o | FileCheck %s --check-prefix=PPR_ASM
# PPR-NOT: setRegTo is not implemented, results will be unreliable
# PPR: assembled_snippet: {{.*}}C0035FD6
+# PPR_ASM: {{0|4}}: {{.*}} ptrue p{{[0-9]|1[0-5]}}
# zpr register class initialization testcase
# ideally we should use DUPM_ZI instead of FADDV_VPZ_S for isolated testcase; but exegesis does not support DUPM_ZI yet;
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=FADDV_VPZ_S 2>&1 | FileCheck %s --check-prefix=ZPR
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=FADDV_VPZ_S.o --opcode-name=FADDV_VPZ_S 2>&1 | FileCheck %s --check-prefix=ZPR
+# RUN: llvm-objdump -d FADDV_VPZ_S.o | FileCheck %s --check-prefix=ZPR_ASM
# ZPR-NOT: setRegTo is not implemented, results will be unreliable
# ZPR: assembled_snippet: {{.*}}C0035FD6
+# ZPR_ASM: {{4|8}}: {{.*}} dupm z{{[0-9]|[1-2][0-9]|3[0-1]}}.s, #0x1
# fpr64 register class initialization testcase
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv4i16v 2>&1 | FileCheck %s --check-prefix=FPR64
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=ADDVv4i16v.o --opcode-name=ADDVv4i16v 2>&1 | FileCheck %s --check-prefix=FPR64
+# RUN: llvm-objdump -d ADDVv4i16v.o | FileCheck %s --check-prefix=FPR64-ASM
# FPR64-NOT: setRegTo is not implemented, results will be unreliable
# FPR64: assembled_snippet: {{.*}}C0035FD6
+# FPR64-ASM: {{0|4}}: {{.*}} fmov d{{[0-9]|[1-2][0-9]|3[0-1]}}, #2.0{{.*}}
# fpr128 register class initialization testcase
-# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency -opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128
+# RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=ADDVv16i8v.o --opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128
+# RUN: llvm-objdump -d ADDVv16i8v.o | FileCheck %s --check-prefix=FPR128-ASM
# FPR128-NOT: setRegTo is not implemented, results will be unreliable
-# FPR128: assembled_snippet: {{.*}}C0035FD6
\ No newline at end of file
+# FPR128: assembled_snippet: {{.*}}C0035FD6
+# FPR128-ASM: {{0|4}}: {{.*}} movi v{{[0-9]|[1-2][0-9]|3[0-1]}}.2d, {{#0x0|#0000000000000000}}
>From 25b02b6b4ac5eda652a0e39d49d6666b0e2d3380 Mon Sep 17 00:00:00 2001
From: Lakshay Kumar <lakshayk at nvidia.com>
Date: Wed, 26 Feb 2025 19:58:00 -0800
Subject: [PATCH 9/9] Modified: Fixed Typo in assertion message
---
llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index 4c95945fa63ba..dbca08603fd78 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -36,7 +36,7 @@ static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth,
static MCInst loadZPRImmediate(MCRegister Reg, unsigned RegBitWidth,
const APInt &Value) {
- assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the PPR Register");
+ assert(Value.getBitWidth() <= RegBitWidth && "Value must fit in the ZPR Register");
// For ZPR, we typically use DUPM instruction to load immediate values
return MCInstBuilder(AArch64::DUPM_ZI)
.addReg(Reg)
More information about the llvm-commits
mailing list