[llvm] [FuncSpec] Adjust the names of specializations and promoted stack val… (PR #66685)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 19 06:27:12 PDT 2023
https://github.com/victor-eds updated https://github.com/llvm/llvm-project/pull/66685
>From e96adfd0dbcf84c27be2087371890f4228890609 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett at linaro.org>
Date: Tue, 12 Sep 2023 12:10:18 +0100
Subject: [PATCH 1/7] [lldb][AArch64] Add testing for SME's ZA and SVG
registers
An SME enabled program has the following extra state:
* Streaming mode or non-streaming mode.
* ZA enabled or disabled.
* The active vector length.
Covering the transition between all possible states and all other
possible states is not viable, therefore the testing added here is a cross
section of that, all of which found real bugs in LLDB and the Linux
Kernel during development.
Many of those transitions will not be possible via LLDB
(e.g. disabling ZA) and many more are possible but unlikely to be
used in normal use.
Added testing:
* TestSVEThreadedDynamic now checks for correct SVG values.
* New test TestZAThreadedDynamic creates 3 threads with different ZA sizes
and states and switches between them verifying the register value
(derived from the existing threaded SVE test).
* New test TestZARegisterSaveRestore starts in a given SME state, runs a
set of expressions in various orders, then checks that the original
state has been restored.
* TestArm64DynamicRegsets has ZA and SVG checks added, including writing
to ZA to enable it.
Running these tests will as usual require QEMU as there is no
real SME hardware available at this time, and a very recent
kernel.
Reviewed By: omjavaid
Differential Revision: https://reviews.llvm.org/D159505
---
.../TestArm64DynamicRegsets.py | 101 +++++--
.../TestSVEThreadedDynamic.py | 103 ++++++-
.../za_dynamic_resize/Makefile | 5 +
.../TestZAThreadedDynamic.py | 165 ++++++++++++
.../za_dynamic_resize/main.c | 104 ++++++++
.../za_save_restore/Makefile | 5 +
.../TestZARegisterSaveRestore.py | 252 ++++++++++++++++++
.../za_save_restore/main.c | 225 ++++++++++++++++
8 files changed, 927 insertions(+), 33 deletions(-)
create mode 100644 lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/Makefile
create mode 100644 lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py
create mode 100644 lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c
create mode 100644 lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/Makefile
create mode 100644 lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py
create mode 100644 lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/main.c
diff --git a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py
index d3f53f0e95dfcb5..4f4da2b5223fb15 100644
--- a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py
+++ b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py
@@ -70,15 +70,13 @@ def sve_regs_read_dynamic(self, sve_registers):
self.runCmd("register write ffr " + "'" + p_regs_value + "'")
self.expect("register read ffr", substrs=[p_regs_value])
- @no_debug_info_test
- @skipIf(archs=no_match(["aarch64"]))
- @skipIf(oslist=no_match(["linux"]))
- def test_aarch64_dynamic_regset_config(self):
- """Test AArch64 Dynamic Register sets configuration."""
+ def setup_register_config_test(self, run_args=None):
self.build()
self.line = line_number("main.c", "// Set a break point here.")
exe = self.getBuildArtifact("a.out")
+ if run_args is not None:
+ self.runCmd("settings set target.run-args " + run_args)
self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
lldbutil.run_break_set_by_file_and_line(
@@ -92,12 +90,16 @@ def test_aarch64_dynamic_regset_config(self):
substrs=["stop reason = breakpoint 1."],
)
- target = self.dbg.GetSelectedTarget()
- process = target.GetProcess()
- thread = process.GetThreadAtIndex(0)
- currentFrame = thread.GetFrameAtIndex(0)
+ return self.thread().GetSelectedFrame().GetRegisters()
+
+ @no_debug_info_test
+ @skipIf(archs=no_match(["aarch64"]))
+ @skipIf(oslist=no_match(["linux"]))
+ def test_aarch64_dynamic_regset_config(self):
+ """Test AArch64 Dynamic Register sets configuration."""
+ register_sets = self.setup_register_config_test()
- for registerSet in currentFrame.GetRegisters():
+ for registerSet in register_sets:
if "Scalable Vector Extension Registers" in registerSet.GetName():
self.assertTrue(
self.isAArch64SVE(),
@@ -120,6 +122,19 @@ def test_aarch64_dynamic_regset_config(self):
)
self.expect("register read data_mask", substrs=["data_mask = 0x"])
self.expect("register read code_mask", substrs=["code_mask = 0x"])
+ if "Scalable Matrix Extension Registers" in registerSet.GetName():
+ self.assertTrue(
+ self.isAArch64SME(),
+ "LLDB Enabled SME register set when it was disabled by target",
+ )
+
+ def make_za_value(self, vl, generator):
+ # Generate a vector value string "{0x00 0x01....}".
+ rows = []
+ for row in range(vl):
+ byte = "0x{:02x}".format(generator(row))
+ rows.append(" ".join([byte] * vl))
+ return "{" + " ".join(rows) + "}"
@no_debug_info_test
@skipIf(archs=no_match(["aarch64"]))
@@ -130,28 +145,58 @@ def test_aarch64_dynamic_regset_config_sme(self):
if not self.isAArch64SME():
self.skipTest("SME must be present.")
- self.build()
- self.line = line_number("main.c", "// Set a break point here.")
+ register_sets = self.setup_register_config_test("sme")
- exe = self.getBuildArtifact("a.out")
- self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
-
- lldbutil.run_break_set_by_file_and_line(
- self, "main.c", self.line, num_expected_locations=1
+ ssve_registers = register_sets.GetFirstValueByName(
+ "Scalable Vector Extension Registers"
)
- self.runCmd("settings set target.run-args sme")
- self.runCmd("run", RUN_SUCCEEDED)
+ self.assertTrue(ssve_registers.IsValid())
+ self.sve_regs_read_dynamic(ssve_registers)
- self.expect(
- "thread backtrace",
- STOPPED_DUE_TO_BREAKPOINT,
- substrs=["stop reason = breakpoint 1."],
+ sme_registers = register_sets.GetFirstValueByName(
+ "Scalable Matrix Extension Registers"
)
+ self.assertTrue(sme_registers.IsValid())
- register_sets = self.thread().GetSelectedFrame().GetRegisters()
+ vg = ssve_registers.GetChildMemberWithName("vg").GetValueAsUnsigned()
+ vl = vg * 8
+ # When first enabled it is all 0s.
+ self.expect("register read za", substrs=[self.make_za_value(vl, lambda r: 0)])
+ za_value = self.make_za_value(vl, lambda r: r + 1)
+ self.runCmd("register write za '{}'".format(za_value))
+ self.expect("register read za", substrs=[za_value])
- ssve_registers = register_sets.GetFirstValueByName(
- "Scalable Vector Extension Registers"
+ # SVG should match VG because we're in streaming mode.
+
+ self.assertTrue(sme_registers.IsValid())
+ svg = sme_registers.GetChildMemberWithName("svg").GetValueAsUnsigned()
+ self.assertEqual(vg, svg)
+
+ @no_debug_info_test
+ @skipIf(archs=no_match(["aarch64"]))
+ @skipIf(oslist=no_match(["linux"]))
+ def test_aarch64_dynamic_regset_config_sme_za_disabled(self):
+ """Test that ZA shows as 0s when disabled and can be enabled by writing
+ to it."""
+ if not self.isAArch64SME():
+ self.skipTest("SME must be present.")
+
+ # No argument, so ZA will be disabled when we break.
+ register_sets = self.setup_register_config_test()
+
+ # vg is the non-streaming vg as we are in non-streaming mode, so we need
+ # to use svg.
+ sme_registers = register_sets.GetFirstValueByName(
+ "Scalable Matrix Extension Registers"
)
- self.assertTrue(ssve_registers.IsValid())
- self.sve_regs_read_dynamic(ssve_registers)
+ self.assertTrue(sme_registers.IsValid())
+ svg = sme_registers.GetChildMemberWithName("svg").GetValueAsUnsigned()
+
+ svl = svg * 8
+ # A disabled ZA is shown as all 0s.
+ self.expect("register read za", substrs=[self.make_za_value(svl, lambda r: 0)])
+ za_value = self.make_za_value(svl, lambda r: r + 1)
+ # Writing to it enables ZA, so the value should be there when we read
+ # it back.
+ self.runCmd("register write za '{}'".format(za_value))
+ self.expect("register read za", substrs=[za_value])
diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py
index ecac3712674976b..8bcb76776459d01 100644
--- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py
+++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py
@@ -98,6 +98,12 @@ def check_sve_registers(self, vg_test_value):
self.expect("register read ffr", substrs=[p_regs_value])
+ def build_for_mode(self, mode):
+ cflags = "-march=armv8-a+sve -lpthread"
+ if mode == Mode.SSVE:
+ cflags += " -DUSE_SSVE"
+ self.build(dictionary={"CFLAGS_EXTRAS": cflags})
+
def run_sve_test(self, mode):
if (mode == Mode.SVE) and not self.isAArch64SVE():
self.skipTest("SVE registers must be supported.")
@@ -105,12 +111,8 @@ def run_sve_test(self, mode):
if (mode == Mode.SSVE) and not self.isAArch64SME():
self.skipTest("Streaming SVE registers must be supported.")
- cflags = "-march=armv8-a+sve -lpthread"
- if mode == Mode.SSVE:
- cflags += " -DUSE_SSVE"
- self.build(dictionary={"CFLAGS_EXTRAS": cflags})
+ self.build_for_mode(mode)
- self.build()
supported_vg = self.get_supported_vg()
if not (2 in supported_vg and 4 in supported_vg):
@@ -196,3 +198,94 @@ def test_sve_registers_dynamic_config(self):
def test_ssve_registers_dynamic_config(self):
"""Test AArch64 SSVE registers multi-threaded dynamic resize."""
self.run_sve_test(Mode.SSVE)
+
+ def setup_svg_test(self, mode):
+ # Even when running in SVE mode, we need access to SVG for these tests.
+ if not self.isAArch64SME():
+ self.skipTest("Streaming SVE registers must be present.")
+
+ self.build_for_mode(mode)
+
+ supported_vg = self.get_supported_vg()
+
+ main_thread_stop_line = line_number("main.c", "// Break in main thread")
+ lldbutil.run_break_set_by_file_and_line(self, "main.c", main_thread_stop_line)
+
+ self.runCmd("run", RUN_SUCCEEDED)
+
+ self.expect(
+ "thread info 1",
+ STOPPED_DUE_TO_BREAKPOINT,
+ substrs=["stop reason = breakpoint"],
+ )
+
+ target = self.dbg.GetSelectedTarget()
+ process = target.GetProcess()
+
+ return process, supported_vg
+
+ def read_reg(self, process, regset, reg):
+ registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters()
+ sve_registers = registerSets.GetFirstValueByName(regset)
+ return sve_registers.GetChildMemberWithName(reg).GetValueAsUnsigned()
+
+ def read_vg(self, process):
+ return self.read_reg(process, "Scalable Vector Extension Registers", "vg")
+
+ def read_svg(self, process):
+ return self.read_reg(process, "Scalable Matrix Extension Registers", "svg")
+
+ def do_svg_test(self, process, vgs, expected_svgs):
+ for vg, svg in zip(vgs, expected_svgs):
+ self.runCmd("register write vg {}".format(vg))
+ self.assertEqual(svg, self.read_svg(process))
+
+ @no_debug_info_test
+ @skipIf(archs=no_match(["aarch64"]))
+ @skipIf(oslist=no_match(["linux"]))
+ def test_svg_sve_mode(self):
+ """When in SVE mode, svg should remain constant as we change vg."""
+ process, supported_vg = self.setup_svg_test(Mode.SVE)
+ svg = self.read_svg(process)
+ self.do_svg_test(process, supported_vg, [svg] * len(supported_vg))
+
+ @no_debug_info_test
+ @skipIf(archs=no_match(["aarch64"]))
+ @skipIf(oslist=no_match(["linux"]))
+ def test_svg_ssve_mode(self):
+ """When in SSVE mode, changing vg should change svg to the same value."""
+ process, supported_vg = self.setup_svg_test(Mode.SSVE)
+ self.do_svg_test(process, supported_vg, supported_vg)
+
+ @no_debug_info_test
+ @skipIf(archs=no_match(["aarch64"]))
+ @skipIf(oslist=no_match(["linux"]))
+ def test_sme_not_present(self):
+ """When there is no SME, we should not show the SME register sets."""
+ if self.isAArch64SME():
+ self.skipTest("Streaming SVE registers must not be present.")
+
+ self.build_for_mode(Mode.SVE)
+
+ exe = self.getBuildArtifact("a.out")
+ self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
+
+ # This test may run on a non-sve system, but we'll stop before any
+ # SVE instruction would be run.
+ self.runCmd("b main")
+ self.runCmd("run", RUN_SUCCEEDED)
+
+ self.expect(
+ "thread info 1",
+ STOPPED_DUE_TO_BREAKPOINT,
+ substrs=["stop reason = breakpoint"],
+ )
+
+ target = self.dbg.GetSelectedTarget()
+ process = target.GetProcess()
+
+ registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters()
+ sme_registers = registerSets.GetFirstValueByName(
+ "Scalable Matrix Extension Registers"
+ )
+ self.assertFalse(sme_registers.IsValid())
diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/Makefile b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/Makefile
new file mode 100644
index 000000000000000..57d926b37d45cf4
--- /dev/null
+++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/Makefile
@@ -0,0 +1,5 @@
+C_SOURCES := main.c
+
+CFLAGS_EXTRAS := -march=armv8-a+sve+sme -lpthread
+
+include Makefile.rules
diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py
new file mode 100644
index 000000000000000..65d1071c26b2a34
--- /dev/null
+++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/TestZAThreadedDynamic.py
@@ -0,0 +1,165 @@
+"""
+Test the AArch64 SME Array Storage (ZA) register dynamic resize with
+multiple threads.
+"""
+
+from enum import Enum
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class AArch64ZAThreadedTestCase(TestBase):
+ def get_supported_vg(self):
+ exe = self.getBuildArtifact("a.out")
+ self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
+
+ main_thread_stop_line = line_number("main.c", "// Break in main thread")
+ lldbutil.run_break_set_by_file_and_line(self, "main.c", main_thread_stop_line)
+
+ self.runCmd("settings set target.run-args 0")
+ self.runCmd("run", RUN_SUCCEEDED)
+
+ self.expect(
+ "thread info 1",
+ STOPPED_DUE_TO_BREAKPOINT,
+ substrs=["stop reason = breakpoint"],
+ )
+
+ current_vg = self.match("register read vg", ["(0x[0-9]+)"])
+ self.assertTrue(current_vg is not None)
+ self.expect("register write vg {}".format(current_vg.group()))
+
+ # Aka 128, 256 and 512 bit.
+ supported_vg = []
+ for vg in [2, 4, 8]:
+ # This could mask other errors but writing vg is tested elsewhere
+ # so we assume the hardware rejected the value.
+ self.runCmd("register write vg {}".format(vg), check=False)
+ if not self.res.GetError():
+ supported_vg.append(vg)
+
+ self.runCmd("breakpoint delete 1")
+ self.runCmd("continue")
+
+ return supported_vg
+
+ def gen_za_value(self, svg, value_generator):
+ svl = svg * 8
+
+ rows = []
+ for row in range(svl):
+ byte = "0x{:02x}".format(value_generator(row))
+ rows.append(" ".join([byte] * svl))
+
+ return "{" + " ".join(rows) + "}"
+
+ def check_za_register(self, svg, value_offset):
+ self.expect(
+ "register read za",
+ substrs=[self.gen_za_value(svg, lambda r: r + value_offset)],
+ )
+
+ def check_disabled_za_register(self, svg):
+ self.expect("register read za", substrs=[self.gen_za_value(svg, lambda r: 0)])
+
+ def za_test_impl(self, enable_za):
+ if not self.isAArch64SME():
+ self.skipTest("SME must be present.")
+
+ self.build()
+ supported_vg = self.get_supported_vg()
+
+ self.runCmd("settings set target.run-args {}".format("1" if enable_za else "0"))
+
+ if not (2 in supported_vg and 4 in supported_vg):
+ self.skipTest("Not all required streaming vector lengths are supported.")
+
+ main_thread_stop_line = line_number("main.c", "// Break in main thread")
+ lldbutil.run_break_set_by_file_and_line(self, "main.c", main_thread_stop_line)
+
+ thX_break_line1 = line_number("main.c", "// Thread X breakpoint 1")
+ lldbutil.run_break_set_by_file_and_line(self, "main.c", thX_break_line1)
+
+ thX_break_line2 = line_number("main.c", "// Thread X breakpoint 2")
+ lldbutil.run_break_set_by_file_and_line(self, "main.c", thX_break_line2)
+
+ thY_break_line1 = line_number("main.c", "// Thread Y breakpoint 1")
+ lldbutil.run_break_set_by_file_and_line(self, "main.c", thY_break_line1)
+
+ thY_break_line2 = line_number("main.c", "// Thread Y breakpoint 2")
+ lldbutil.run_break_set_by_file_and_line(self, "main.c", thY_break_line2)
+
+ self.runCmd("run", RUN_SUCCEEDED)
+
+ self.expect(
+ "thread info 1",
+ STOPPED_DUE_TO_BREAKPOINT,
+ substrs=["stop reason = breakpoint"],
+ )
+
+ if 8 in supported_vg:
+ if enable_za:
+ self.check_za_register(8, 1)
+ else:
+ self.check_disabled_za_register(8)
+ else:
+ if enable_za:
+ self.check_za_register(4, 1)
+ else:
+ self.check_disabled_za_register(4)
+
+ self.runCmd("process continue", RUN_SUCCEEDED)
+
+ process = self.dbg.GetSelectedTarget().GetProcess()
+ for idx in range(1, process.GetNumThreads()):
+ thread = process.GetThreadAtIndex(idx)
+ if thread.GetStopReason() != lldb.eStopReasonBreakpoint:
+ self.runCmd("thread continue %d" % (idx + 1))
+ self.assertEqual(thread.GetStopReason(), lldb.eStopReasonBreakpoint)
+
+ stopped_at_line_number = thread.GetFrameAtIndex(0).GetLineEntry().GetLine()
+
+ if stopped_at_line_number == thX_break_line1:
+ self.runCmd("thread select %d" % (idx + 1))
+ self.check_za_register(4, 2)
+ self.runCmd("register write vg 2")
+
+ elif stopped_at_line_number == thY_break_line1:
+ self.runCmd("thread select %d" % (idx + 1))
+ self.check_za_register(2, 3)
+ self.runCmd("register write vg 4")
+
+ self.runCmd("thread continue 2")
+ self.runCmd("thread continue 3")
+
+ for idx in range(1, process.GetNumThreads()):
+ thread = process.GetThreadAtIndex(idx)
+ self.assertEqual(thread.GetStopReason(), lldb.eStopReasonBreakpoint)
+
+ stopped_at_line_number = thread.GetFrameAtIndex(0).GetLineEntry().GetLine()
+
+ if stopped_at_line_number == thX_break_line2:
+ self.runCmd("thread select %d" % (idx + 1))
+ self.check_za_register(2, 2)
+
+ elif stopped_at_line_number == thY_break_line2:
+ self.runCmd("thread select %d" % (idx + 1))
+ self.check_za_register(4, 3)
+
+ @no_debug_info_test
+ @skipIf(archs=no_match(["aarch64"]))
+ @skipIf(oslist=no_match(["linux"]))
+ def test_za_register_dynamic_config_main_enabled(self):
+ """Test multiple threads resizing ZA, with the main thread's ZA
+ enabled."""
+ self.za_test_impl(True)
+
+ @no_debug_info_test
+ @skipIf(archs=no_match(["aarch64"]))
+ @skipIf(oslist=no_match(["linux"]))
+ def test_za_register_dynamic_config_main_disabled(self):
+ """Test multiple threads resizing ZA, with the main thread's ZA
+ disabled."""
+ self.za_test_impl(False)
diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c
new file mode 100644
index 000000000000000..fd2590dbe411f7f
--- /dev/null
+++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_dynamic_resize/main.c
@@ -0,0 +1,104 @@
+#include <pthread.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+// Important notes for this test:
+// * Making a syscall will disable streaming mode.
+// * LLDB writing to vg while in streaming mode will disable ZA
+// (this is just how ptrace works).
+// * Using an instruction to write to an inactive ZA produces a SIGILL
+// (doing the same thing via ptrace does not, as the kernel activates ZA for
+// us in that case).
+
+#ifndef PR_SME_SET_VL
+#define PR_SME_SET_VL 63
+#endif
+
+#define SM_INST(c) asm volatile("msr s0_3_c4_c" #c "_3, xzr")
+#define SMSTART_SM SM_INST(3)
+#define SMSTART_ZA SM_INST(5)
+
+void set_za_register(int svl, int value_offset) {
+#define MAX_VL_BYTES 256
+ uint8_t data[MAX_VL_BYTES];
+
+ // ldr za will actually wrap the selected vector row, by the number of rows
+ // you have. So setting one that didn't exist would actually set one that did.
+ // That's why we need the streaming vector length here.
+ for (int i = 0; i < svl; ++i) {
+ memset(data, i + value_offset, MAX_VL_BYTES);
+ // Each one of these loads a VL sized row of ZA.
+ asm volatile("mov w12, %w0\n\t"
+ "ldr za[w12, 0], [%1]\n\t" ::"r"(i),
+ "r"(&data)
+ : "w12");
+ }
+}
+
+// These are used to make sure we only break in each thread once both of the
+// threads have been started. Otherwise when the test does "process continue"
+// it could stop in one thread and wait forever for the other one to start.
+atomic_bool threadX_ready = false;
+atomic_bool threadY_ready = false;
+
+void *threadX_func(void *x_arg) {
+ threadX_ready = true;
+ while (!threadY_ready) {
+ }
+
+ prctl(PR_SME_SET_VL, 8 * 4);
+ SMSTART_SM;
+ SMSTART_ZA;
+ set_za_register(8 * 4, 2);
+ SMSTART_ZA; // Thread X breakpoint 1
+ set_za_register(8 * 2, 2);
+ return NULL; // Thread X breakpoint 2
+}
+
+void *threadY_func(void *y_arg) {
+ threadY_ready = true;
+ while (!threadX_ready) {
+ }
+
+ prctl(PR_SME_SET_VL, 8 * 2);
+ SMSTART_SM;
+ SMSTART_ZA;
+ set_za_register(8 * 2, 3);
+ SMSTART_ZA; // Thread Y breakpoint 1
+ set_za_register(8 * 4, 3);
+ return NULL; // Thread Y breakpoint 2
+}
+
+int main(int argc, char *argv[]) {
+ // Expecting argument to tell us whether to enable ZA on the main thread.
+ if (argc != 2)
+ return 1;
+
+ prctl(PR_SME_SET_VL, 8 * 8);
+ SMSTART_SM;
+
+ if (argv[1][0] == '1') {
+ SMSTART_ZA;
+ set_za_register(8 * 8, 1);
+ }
+ // else we do not enable ZA and lldb will show 0s for it.
+
+ pthread_t x_thread;
+ if (pthread_create(&x_thread, NULL, threadX_func, 0)) // Break in main thread
+ return 1;
+
+ pthread_t y_thread;
+ if (pthread_create(&y_thread, NULL, threadY_func, 0))
+ return 1;
+
+ if (pthread_join(x_thread, NULL))
+ return 2;
+
+ if (pthread_join(y_thread, NULL))
+ return 2;
+
+ return 0;
+}
diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/Makefile b/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/Makefile
new file mode 100644
index 000000000000000..f2ca08f3531aa16
--- /dev/null
+++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/Makefile
@@ -0,0 +1,5 @@
+C_SOURCES := main.c
+
+CFLAGS_EXTRAS := -march=armv8-a+sve+sme
+
+include Makefile.rules
diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py b/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py
new file mode 100644
index 000000000000000..1d4bbd6207a51c1
--- /dev/null
+++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/TestZARegisterSaveRestore.py
@@ -0,0 +1,252 @@
+"""
+Test the AArch64 SME ZA register is saved and restored around expressions.
+
+This attempts to cover expressions that change the following:
+* ZA enabled or not.
+* Streaming mode or not.
+* Streaming vector length (increasing and decreasing).
+* Some combintations of the above.
+"""
+
+from enum import IntEnum
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+# These enum values match the flag values used in the test program.
+class Mode(IntEnum):
+ SVE = 0
+ SSVE = 1
+
+
+class ZA(IntEnum):
+ Disabled = 0
+ Enabled = 1
+
+
+class AArch64ZATestCase(TestBase):
+ def get_supported_svg(self):
+ # Always build this probe program to start as streaming SVE.
+ # We will read/write "vg" here but since we are in streaming mode "svg"
+ # is really what we are writing ("svg" is a read only pseudo).
+ self.build()
+
+ exe = self.getBuildArtifact("a.out")
+ self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
+ # Enter streaming mode, don't enable ZA, start_vl and other_vl don't
+ # matter here.
+ self.runCmd("settings set target.run-args 1 0 0 0")
+
+ stop_line = line_number("main.c", "// Set a break point here.")
+ lldbutil.run_break_set_by_file_and_line(
+ self, "main.c", stop_line, num_expected_locations=1
+ )
+
+ self.runCmd("run", RUN_SUCCEEDED)
+
+ self.expect(
+ "thread info 1",
+ STOPPED_DUE_TO_BREAKPOINT,
+ substrs=["stop reason = breakpoint"],
+ )
+
+ # Write back the current vg to confirm read/write works at all.
+ current_svg = self.match("register read vg", ["(0x[0-9]+)"])
+ self.assertTrue(current_svg is not None)
+ self.expect("register write vg {}".format(current_svg.group()))
+
+ # Aka 128, 256 and 512 bit.
+ supported_svg = []
+ for svg in [2, 4, 8]:
+ # This could mask other errors but writing vg is tested elsewhere
+ # so we assume the hardware rejected the value.
+ self.runCmd("register write vg {}".format(svg), check=False)
+ if not self.res.GetError():
+ supported_svg.append(svg)
+
+ self.runCmd("breakpoint delete 1")
+ self.runCmd("continue")
+
+ return supported_svg
+
+ def read_vg(self):
+ process = self.dbg.GetSelectedTarget().GetProcess()
+ registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters()
+ sve_registers = registerSets.GetFirstValueByName(
+ "Scalable Vector Extension Registers"
+ )
+ return sve_registers.GetChildMemberWithName("vg").GetValueAsUnsigned()
+
+ def read_svg(self):
+ process = self.dbg.GetSelectedTarget().GetProcess()
+ registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters()
+ sve_registers = registerSets.GetFirstValueByName(
+ "Scalable Matrix Extension Registers"
+ )
+ return sve_registers.GetChildMemberWithName("svg").GetValueAsUnsigned()
+
+ def make_za_value(self, vl, generator):
+ # Generate a vector value string "{0x00 0x01....}".
+ rows = []
+ for row in range(vl):
+ byte = "0x{:02x}".format(generator(row))
+ rows.append(" ".join([byte] * vl))
+ return "{" + " ".join(rows) + "}"
+
+ def check_za(self, vl):
+ # We expect an increasing value starting at 1. Row 0=1, row 1 = 2, etc.
+ self.expect(
+ "register read za", substrs=[self.make_za_value(vl, lambda row: row + 1)]
+ )
+
+ def check_za_disabled(self, vl):
+ # When ZA is disabled, lldb will show ZA as all 0s.
+ self.expect("register read za", substrs=[self.make_za_value(vl, lambda row: 0)])
+
+ def za_expr_test_impl(self, sve_mode, za_state, swap_start_vl):
+ if not self.isAArch64SME():
+ self.skipTest("SME must be present.")
+
+ supported_svg = self.get_supported_svg()
+ if len(supported_svg) < 2:
+ self.skipTest("Target must support at least 2 streaming vector lengths.")
+
+ # vg is in units of 8 bytes.
+ start_vl = supported_svg[1] * 8
+ other_vl = supported_svg[2] * 8
+
+ if swap_start_vl:
+ start_vl, other_vl = other_vl, start_vl
+
+ self.line = line_number("main.c", "// Set a break point here.")
+
+ exe = self.getBuildArtifact("a.out")
+ self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
+ self.runCmd(
+ "settings set target.run-args {} {} {} {}".format(
+ sve_mode, za_state, start_vl, other_vl
+ )
+ )
+
+ lldbutil.run_break_set_by_file_and_line(
+ self, "main.c", self.line, num_expected_locations=1
+ )
+ self.runCmd("run", RUN_SUCCEEDED)
+
+ self.expect(
+ "thread backtrace",
+ STOPPED_DUE_TO_BREAKPOINT,
+ substrs=["stop reason = breakpoint 1."],
+ )
+
+ exprs = [
+ "expr_disable_za",
+ "expr_enable_za",
+ "expr_start_vl",
+ "expr_other_vl",
+ "expr_enable_sm",
+ "expr_disable_sm",
+ ]
+
+ # This may be the streaming or non-streaming vg. All that matters is
+ # that it is saved and restored, remaining constant throughout.
+ start_vg = self.read_vg()
+
+ # Check SVE registers to make sure that combination of scaling SVE
+ # and scaling ZA works properly. This is a brittle check, but failures
+ # are likely to be catastrophic when they do happen anyway.
+ sve_reg_names = "ffr {} {}".format(
+ " ".join(["z{}".format(n) for n in range(32)]),
+ " ".join(["p{}".format(n) for n in range(16)]),
+ )
+ self.runCmd("register read " + sve_reg_names)
+ sve_values = self.res.GetOutput()
+
+ def check_regs():
+ if za_state == ZA.Enabled:
+ self.check_za(start_vl)
+ else:
+ self.check_za_disabled(start_vl)
+
+ # svg and vg are in units of 8 bytes.
+ self.assertEqual(start_vl, self.read_svg() * 8)
+ self.assertEqual(start_vg, self.read_vg())
+
+ self.expect("register read " + sve_reg_names, substrs=[sve_values])
+
+ for expr in exprs:
+ expr_cmd = "expression {}()".format(expr)
+
+ # We do this twice because there were issues in development where
+ # using data stored by a previous WriteAllRegisterValues would crash
+ # the second time around.
+ self.runCmd(expr_cmd)
+ check_regs()
+ self.runCmd(expr_cmd)
+ check_regs()
+
+ # Run them in sequence to make sure there is no state lingering between
+ # them after a restore.
+ for expr in exprs:
+ self.runCmd("expression {}()".format(expr))
+ check_regs()
+
+ for expr in reversed(exprs):
+ self.runCmd("expression {}()".format(expr))
+ check_regs()
+
+ # These tests start with the 1st supported SVL and change to the 2nd
+ # supported SVL as needed.
+
+ @no_debug_info_test
+ @skipIf(archs=no_match(["aarch64"]))
+ @skipIf(oslist=no_match(["linux"]))
+ def test_za_expr_ssve_za_enabled(self):
+ self.za_expr_test_impl(Mode.SSVE, ZA.Enabled, False)
+
+ @no_debug_info_test
+ @skipIf(archs=no_match(["aarch64"]))
+ @skipIf(oslist=no_match(["linux"]))
+ def test_za_expr_ssve_za_disabled(self):
+ self.za_expr_test_impl(Mode.SSVE, ZA.Disabled, False)
+
+ @no_debug_info_test
+ @skipIf(archs=no_match(["aarch64"]))
+ @skipIf(oslist=no_match(["linux"]))
+ def test_za_expr_sve_za_enabled(self):
+ self.za_expr_test_impl(Mode.SVE, ZA.Enabled, False)
+
+ @no_debug_info_test
+ @skipIf(archs=no_match(["aarch64"]))
+ @skipIf(oslist=no_match(["linux"]))
+ def test_za_expr_sve_za_disabled(self):
+ self.za_expr_test_impl(Mode.SVE, ZA.Disabled, False)
+
+ # These tests start in the 2nd supported SVL and change to the 1st supported
+ # SVL as needed.
+
+ @no_debug_info_test
+ @skipIf(archs=no_match(["aarch64"]))
+ @skipIf(oslist=no_match(["linux"]))
+ def test_za_expr_ssve_za_enabled_different_vl(self):
+ self.za_expr_test_impl(Mode.SSVE, ZA.Enabled, True)
+
+ @no_debug_info_test
+ @skipIf(archs=no_match(["aarch64"]))
+ @skipIf(oslist=no_match(["linux"]))
+ def test_za_expr_ssve_za_disabled_different_vl(self):
+ self.za_expr_test_impl(Mode.SSVE, ZA.Disabled, True)
+
+ @no_debug_info_test
+ @skipIf(archs=no_match(["aarch64"]))
+ @skipIf(oslist=no_match(["linux"]))
+ def test_za_expr_sve_za_enabled_different_vl(self):
+ self.za_expr_test_impl(Mode.SVE, ZA.Enabled, True)
+
+ @no_debug_info_test
+ @skipIf(archs=no_match(["aarch64"]))
+ @skipIf(oslist=no_match(["linux"]))
+ def test_za_expr_sve_za_disabled_different_vl(self):
+ self.za_expr_test_impl(Mode.SVE, ZA.Disabled, True)
diff --git a/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/main.c b/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/main.c
new file mode 100644
index 000000000000000..a8434787a5a1235
--- /dev/null
+++ b/lldb/test/API/commands/register/register/aarch64_za_register/za_save_restore/main.c
@@ -0,0 +1,225 @@
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+// Important details for this program:
+// * Making a syscall will disable streaming mode if it is active.
+// * Changing the vector length will make streaming mode and ZA inactive.
+// * ZA can be active independent of streaming mode.
+// * ZA's size is the streaming vector length squared.
+
+#ifndef PR_SME_SET_VL
+#define PR_SME_SET_VL 63
+#endif
+
+#ifndef PR_SME_GET_VL
+#define PR_SME_GET_VL 64
+#endif
+
+#ifndef PR_SME_VL_LEN_MASK
+#define PR_SME_VL_LEN_MASK 0xffff
+#endif
+
+#define SM_INST(c) asm volatile("msr s0_3_c4_c" #c "_3, xzr")
+#define SMSTART SM_INST(7)
+#define SMSTART_SM SM_INST(3)
+#define SMSTART_ZA SM_INST(5)
+#define SMSTOP SM_INST(6)
+#define SMSTOP_SM SM_INST(2)
+#define SMSTOP_ZA SM_INST(4)
+
+int start_vl = 0;
+int other_vl = 0;
+
+void write_sve_regs() {
+ // We assume the smefa64 feature is present, which allows ffr access
+ // in streaming mode.
+ asm volatile("setffr\n\t");
+ asm volatile("ptrue p0.b\n\t");
+ asm volatile("ptrue p1.h\n\t");
+ asm volatile("ptrue p2.s\n\t");
+ asm volatile("ptrue p3.d\n\t");
+ asm volatile("pfalse p4.b\n\t");
+ asm volatile("ptrue p5.b\n\t");
+ asm volatile("ptrue p6.h\n\t");
+ asm volatile("ptrue p7.s\n\t");
+ asm volatile("ptrue p8.d\n\t");
+ asm volatile("pfalse p9.b\n\t");
+ asm volatile("ptrue p10.b\n\t");
+ asm volatile("ptrue p11.h\n\t");
+ asm volatile("ptrue p12.s\n\t");
+ asm volatile("ptrue p13.d\n\t");
+ asm volatile("pfalse p14.b\n\t");
+ asm volatile("ptrue p15.b\n\t");
+
+ asm volatile("cpy z0.b, p0/z, #1\n\t");
+ asm volatile("cpy z1.b, p5/z, #2\n\t");
+ asm volatile("cpy z2.b, p10/z, #3\n\t");
+ asm volatile("cpy z3.b, p15/z, #4\n\t");
+ asm volatile("cpy z4.b, p0/z, #5\n\t");
+ asm volatile("cpy z5.b, p5/z, #6\n\t");
+ asm volatile("cpy z6.b, p10/z, #7\n\t");
+ asm volatile("cpy z7.b, p15/z, #8\n\t");
+ asm volatile("cpy z8.b, p0/z, #9\n\t");
+ asm volatile("cpy z9.b, p5/z, #10\n\t");
+ asm volatile("cpy z10.b, p10/z, #11\n\t");
+ asm volatile("cpy z11.b, p15/z, #12\n\t");
+ asm volatile("cpy z12.b, p0/z, #13\n\t");
+ asm volatile("cpy z13.b, p5/z, #14\n\t");
+ asm volatile("cpy z14.b, p10/z, #15\n\t");
+ asm volatile("cpy z15.b, p15/z, #16\n\t");
+ asm volatile("cpy z16.b, p0/z, #17\n\t");
+ asm volatile("cpy z17.b, p5/z, #18\n\t");
+ asm volatile("cpy z18.b, p10/z, #19\n\t");
+ asm volatile("cpy z19.b, p15/z, #20\n\t");
+ asm volatile("cpy z20.b, p0/z, #21\n\t");
+ asm volatile("cpy z21.b, p5/z, #22\n\t");
+ asm volatile("cpy z22.b, p10/z, #23\n\t");
+ asm volatile("cpy z23.b, p15/z, #24\n\t");
+ asm volatile("cpy z24.b, p0/z, #25\n\t");
+ asm volatile("cpy z25.b, p5/z, #26\n\t");
+ asm volatile("cpy z26.b, p10/z, #27\n\t");
+ asm volatile("cpy z27.b, p15/z, #28\n\t");
+ asm volatile("cpy z28.b, p0/z, #29\n\t");
+ asm volatile("cpy z29.b, p5/z, #30\n\t");
+ asm volatile("cpy z30.b, p10/z, #31\n\t");
+ asm volatile("cpy z31.b, p15/z, #32\n\t");
+}
+
+// Write something different so we will know if we didn't restore them
+// correctly.
+void write_sve_regs_expr() {
+ asm volatile("pfalse p0.b\n\t");
+ asm volatile("wrffr p0.b\n\t");
+ asm volatile("pfalse p1.b\n\t");
+ asm volatile("pfalse p2.b\n\t");
+ asm volatile("pfalse p3.b\n\t");
+ asm volatile("ptrue p4.b\n\t");
+ asm volatile("pfalse p5.b\n\t");
+ asm volatile("pfalse p6.b\n\t");
+ asm volatile("pfalse p7.b\n\t");
+ asm volatile("pfalse p8.b\n\t");
+ asm volatile("ptrue p9.b\n\t");
+ asm volatile("pfalse p10.b\n\t");
+ asm volatile("pfalse p11.b\n\t");
+ asm volatile("pfalse p12.b\n\t");
+ asm volatile("pfalse p13.b\n\t");
+ asm volatile("ptrue p14.b\n\t");
+ asm volatile("pfalse p15.b\n\t");
+
+ asm volatile("cpy z0.b, p0/z, #2\n\t");
+ asm volatile("cpy z1.b, p5/z, #3\n\t");
+ asm volatile("cpy z2.b, p10/z, #4\n\t");
+ asm volatile("cpy z3.b, p15/z, #5\n\t");
+ asm volatile("cpy z4.b, p0/z, #6\n\t");
+ asm volatile("cpy z5.b, p5/z, #7\n\t");
+ asm volatile("cpy z6.b, p10/z, #8\n\t");
+ asm volatile("cpy z7.b, p15/z, #9\n\t");
+ asm volatile("cpy z8.b, p0/z, #10\n\t");
+ asm volatile("cpy z9.b, p5/z, #11\n\t");
+ asm volatile("cpy z10.b, p10/z, #12\n\t");
+ asm volatile("cpy z11.b, p15/z, #13\n\t");
+ asm volatile("cpy z12.b, p0/z, #14\n\t");
+ asm volatile("cpy z13.b, p5/z, #15\n\t");
+ asm volatile("cpy z14.b, p10/z, #16\n\t");
+ asm volatile("cpy z15.b, p15/z, #17\n\t");
+ asm volatile("cpy z16.b, p0/z, #18\n\t");
+ asm volatile("cpy z17.b, p5/z, #19\n\t");
+ asm volatile("cpy z18.b, p10/z, #20\n\t");
+ asm volatile("cpy z19.b, p15/z, #21\n\t");
+ asm volatile("cpy z20.b, p0/z, #22\n\t");
+ asm volatile("cpy z21.b, p5/z, #23\n\t");
+ asm volatile("cpy z22.b, p10/z, #24\n\t");
+ asm volatile("cpy z23.b, p15/z, #25\n\t");
+ asm volatile("cpy z24.b, p0/z, #26\n\t");
+ asm volatile("cpy z25.b, p5/z, #27\n\t");
+ asm volatile("cpy z26.b, p10/z, #28\n\t");
+ asm volatile("cpy z27.b, p15/z, #29\n\t");
+ asm volatile("cpy z28.b, p0/z, #30\n\t");
+ asm volatile("cpy z29.b, p5/z, #31\n\t");
+ asm volatile("cpy z30.b, p10/z, #32\n\t");
+ asm volatile("cpy z31.b, p15/z, #33\n\t");
+}
+
+void set_za_register(int svl, int value_offset) {
+#define MAX_VL_BYTES 256
+ uint8_t data[MAX_VL_BYTES];
+
+ // ldr za will actually wrap the selected vector row, by the number of rows
+ // you have. So setting one that didn't exist would actually set one that did.
+ // That's why we need the streaming vector length here.
+ for (int i = 0; i < svl; ++i) {
+ memset(data, i + value_offset, MAX_VL_BYTES);
+ // Each one of these loads a VL sized row of ZA.
+ asm volatile("mov w12, %w0\n\t"
+ "ldr za[w12, 0], [%1]\n\t" ::"r"(i),
+ "r"(&data)
+ : "w12");
+ }
+}
+
+void expr_disable_za() {
+ SMSTOP_ZA;
+ write_sve_regs_expr();
+}
+
+void expr_enable_za() {
+ SMSTART_ZA;
+ set_za_register(start_vl, 2);
+ write_sve_regs_expr();
+}
+
+void expr_start_vl() {
+ prctl(PR_SME_SET_VL, start_vl);
+ SMSTART_ZA;
+ set_za_register(start_vl, 4);
+ write_sve_regs_expr();
+}
+
+void expr_other_vl() {
+ prctl(PR_SME_SET_VL, other_vl);
+ SMSTART_ZA;
+ set_za_register(other_vl, 5);
+ write_sve_regs_expr();
+}
+
+void expr_enable_sm() {
+ SMSTART_SM;
+ write_sve_regs_expr();
+}
+
+void expr_disable_sm() {
+ SMSTOP_SM;
+ write_sve_regs_expr();
+}
+
+int main(int argc, char *argv[]) {
+ // We expect to get:
+ // * whether to enable streaming mode
+ // * whether to enable ZA
+ // * what the starting VL should be
+ // * what the other VL should be
+ if (argc != 5)
+ return 1;
+
+ bool ssve = argv[1][0] == '1';
+ bool za = argv[2][0] == '1';
+ start_vl = atoi(argv[3]);
+ other_vl = atoi(argv[4]);
+
+ prctl(PR_SME_SET_VL, start_vl);
+
+ if (ssve)
+ SMSTART_SM;
+
+ if (za) {
+ SMSTART_ZA;
+ set_za_register(start_vl, 1);
+ }
+
+ write_sve_regs();
+
+ return 0; // Set a break point here.
+}
>From f502ab7961b78d4b02e7cb61782c7bce7d6de074 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 19 Sep 2023 14:46:36 +0200
Subject: [PATCH 2/7] [InstCombine] Add test for #62450 (NFC)
---
llvm/test/Transforms/InstCombine/ctpop.ll | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll
index cf1d72383e69405..a3db7715d183453 100644
--- a/llvm/test/Transforms/InstCombine/ctpop.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop.ll
@@ -475,3 +475,16 @@ define i32 @parity_xor_extra_use2(i32 %arg, i32 %arg1) {
%i5 = xor i32 %i2, %i4
ret i32 %i5
}
+
+define i32 @select_ctpop_zero(i32 %x) {
+; CHECK-LABEL: @select_ctpop_zero(
+; CHECK-NEXT: [[CTPOP:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG1]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 0, i32 [[CTPOP]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %ctpop = call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp eq i32 %x, 0
+ %res = select i1 %cmp, i32 0, i32 %ctpop
+ ret i32 %res
+}
>From c41b4b6397675c0d75b316aac3bae380f1ac493c Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 19 Sep 2023 13:10:24 +0200
Subject: [PATCH 3/7] [InstCombine] Make flag drop during select equiv fold
more generic
Instead of unsetting flags on the instruction, attempting the
fold, and the resetting the flags if it failed, add support to
simplifyWithOpReplaced() to ignore poison-generating flags/metadata
and collect all instructions where they may need to be dropped.
This allows us to perform the fold a) with poison-generating
metadata, which was previously not handled and b) poison-generating
flags/metadata that are not on the root instruction.
Proof for the ctpop case: https://alive2.llvm.org/ce/z/3H3HFs
Fixes https://github.com/llvm/llvm-project/issues/62450.
---
.../llvm/Analysis/InstructionSimplify.h | 10 ++++-
llvm/lib/Analysis/InstructionSimplify.cpp | 22 ++++++----
.../InstCombine/InstCombineSelect.cpp | 41 ++++++-------------
llvm/test/Transforms/InstCombine/bit_ceil.ll | 7 ++--
llvm/test/Transforms/InstCombine/ctpop.ll | 4 +-
llvm/test/Transforms/InstCombine/ispow2.ll | 4 +-
.../LoopVectorize/reduction-inloop.ll | 4 +-
7 files changed, 41 insertions(+), 51 deletions(-)
diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h
index df0784664eadc86..401119b3cb85247 100644
--- a/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -339,8 +339,14 @@ simplifyInstructionWithOperands(Instruction *I, ArrayRef<Value *> NewOps,
/// AllowRefinement specifies whether the simplification can be a refinement
/// (e.g. 0 instead of poison), or whether it needs to be strictly identical.
/// Op and RepOp can be assumed to not be poison when determining refinement.
-Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
- const SimplifyQuery &Q, bool AllowRefinement);
+///
+/// If DropFlags is passed, then the replacement result is only valid if
+/// poison-generating flags/metadata on those instructions are dropped. This
+/// is only useful in conjunction with AllowRefinement=false.
+Value *
+simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+ const SimplifyQuery &Q, bool AllowRefinement,
+ SmallVectorImpl<Instruction *> *DropFlags = nullptr);
/// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
///
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index e8f96e9f681f2d5..d8aa614cae53b10 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4299,6 +4299,7 @@ Value *llvm::simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
const SimplifyQuery &Q,
bool AllowRefinement,
+ SmallVectorImpl<Instruction *> *DropFlags,
unsigned MaxRecurse) {
// Trivial replacement.
if (V == Op)
@@ -4333,7 +4334,7 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
bool AnyReplaced = false;
for (Value *InstOp : I->operands()) {
if (Value *NewInstOp = simplifyWithOpReplaced(
- InstOp, Op, RepOp, Q, AllowRefinement, MaxRecurse)) {
+ InstOp, Op, RepOp, Q, AllowRefinement, DropFlags, MaxRecurse)) {
NewOps.push_back(NewInstOp);
AnyReplaced = InstOp != NewInstOp;
} else {
@@ -4427,16 +4428,23 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
// will be done in InstCombine).
// TODO: This may be unsound, because it only catches some forms of
// refinement.
- if (!AllowRefinement && canCreatePoison(cast<Operator>(I)))
- return nullptr;
+ if (!AllowRefinement) {
+ if (canCreatePoison(cast<Operator>(I), !DropFlags))
+ return nullptr;
+ Constant *Res = ConstantFoldInstOperands(I, ConstOps, Q.DL, Q.TLI);
+ if (DropFlags && Res && I->hasPoisonGeneratingFlagsOrMetadata())
+ DropFlags->push_back(I);
+ return Res;
+ }
return ConstantFoldInstOperands(I, ConstOps, Q.DL, Q.TLI);
}
Value *llvm::simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
const SimplifyQuery &Q,
- bool AllowRefinement) {
- return ::simplifyWithOpReplaced(V, Op, RepOp, Q, AllowRefinement,
+ bool AllowRefinement,
+ SmallVectorImpl<Instruction *> *DropFlags) {
+ return ::simplifyWithOpReplaced(V, Op, RepOp, Q, AllowRefinement, DropFlags,
RecursionLimit);
}
@@ -4569,11 +4577,11 @@ static Value *simplifySelectWithICmpEq(Value *CmpLHS, Value *CmpRHS,
unsigned MaxRecurse) {
if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
/* AllowRefinement */ false,
- MaxRecurse) == TrueVal)
+ /* DropFlags */ nullptr, MaxRecurse) == TrueVal)
return FalseVal;
if (simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q,
/* AllowRefinement */ true,
- MaxRecurse) == FalseVal)
+ /* DropFlags */ nullptr, MaxRecurse) == FalseVal)
return FalseVal;
return nullptr;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 291e6382f898c5b..05b3eaacc7b4d06 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1309,45 +1309,28 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
return nullptr;
// InstSimplify already performed this fold if it was possible subject to
- // current poison-generating flags. Try the transform again with
- // poison-generating flags temporarily dropped.
- bool WasNUW = false, WasNSW = false, WasExact = false, WasInBounds = false;
- if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(FalseVal)) {
- WasNUW = OBO->hasNoUnsignedWrap();
- WasNSW = OBO->hasNoSignedWrap();
- FalseInst->setHasNoUnsignedWrap(false);
- FalseInst->setHasNoSignedWrap(false);
- }
- if (auto *PEO = dyn_cast<PossiblyExactOperator>(FalseVal)) {
- WasExact = PEO->isExact();
- FalseInst->setIsExact(false);
- }
- if (auto *GEP = dyn_cast<GetElementPtrInst>(FalseVal)) {
- WasInBounds = GEP->isInBounds();
- GEP->setIsInBounds(false);
- }
+ // current poison-generating flags. Check whether dropping poison-generating
+ // flags enables the transform.
// Try each equivalence substitution possibility.
// We have an 'EQ' comparison, so the select's false value will propagate.
// Example:
// (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1
+ SmallVector<Instruction *> DropFlags;
if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, SQ,
- /* AllowRefinement */ false) == TrueVal ||
+ /* AllowRefinement */ false,
+ &DropFlags) == TrueVal ||
simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, SQ,
- /* AllowRefinement */ false) == TrueVal) {
+ /* AllowRefinement */ false,
+ &DropFlags) == TrueVal) {
+ for (Instruction *I : DropFlags) {
+ I->dropPoisonGeneratingFlagsAndMetadata();
+ Worklist.add(I);
+ }
+
return replaceInstUsesWith(Sel, FalseVal);
}
- // Restore poison-generating flags if the transform did not apply.
- if (WasNUW)
- FalseInst->setHasNoUnsignedWrap();
- if (WasNSW)
- FalseInst->setHasNoSignedWrap();
- if (WasExact)
- FalseInst->setIsExact();
- if (WasInBounds)
- cast<GetElementPtrInst>(FalseInst)->setIsInBounds();
-
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll
index 6f714153a598ada..52e70c78ba54289 100644
--- a/llvm/test/Transforms/InstCombine/bit_ceil.ll
+++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll
@@ -148,10 +148,9 @@ define i32 @bit_ceil_commuted_operands(i32 %x) {
; CHECK-LABEL: @bit_ceil_commuted_operands(
; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1
; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]]
-; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i32 0, [[CTLZ]]
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 31
-; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP2]]
-; CHECK-NEXT: ret i32 [[SEL]]
+; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]]
+; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]]
+; CHECK-NEXT: ret i32 [[SHL]]
;
%dec = add i32 %x, -1
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll
index a3db7715d183453..f3419768bbd0285 100644
--- a/llvm/test/Transforms/InstCombine/ctpop.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop.ll
@@ -479,9 +479,7 @@ define i32 @parity_xor_extra_use2(i32 %arg, i32 %arg1) {
define i32 @select_ctpop_zero(i32 %x) {
; CHECK-LABEL: @select_ctpop_zero(
; CHECK-NEXT: [[CTPOP:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG1]]
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0
-; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 0, i32 [[CTPOP]]
-; CHECK-NEXT: ret i32 [[RES]]
+; CHECK-NEXT: ret i32 [[CTPOP]]
;
%ctpop = call i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp eq i32 %x, 0
diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll
index a67a3fde1e661d8..1fa0d4c4af054b3 100644
--- a/llvm/test/Transforms/InstCombine/ispow2.ll
+++ b/llvm/test/Transforms/InstCombine/ispow2.ll
@@ -345,9 +345,7 @@ define i1 @is_pow2_ctpop_wrong_pred1_logical(i32 %x) {
; CHECK-LABEL: @is_pow2_ctpop_wrong_pred1_logical(
; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[T0]], 2
-; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0
-; CHECK-NEXT: [[R:%.*]] = select i1 [[NOTZERO]], i1 [[CMP]], i1 false
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 [[CMP]]
;
%t0 = tail call i32 @llvm.ctpop.i32(i32 %x)
%cmp = icmp ugt i32 %t0, 2
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index aca02f37abe2ef3..18b05c05d9b9d21 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -1122,12 +1122,10 @@ define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], <i8 31, i8 31, i8 31, i8 31>
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], <i8 3, i8 3, i8 3, i8 3>
; CHECK-NEXT: [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], <i8 31, i8 31, i8 31, i8 31>
-; CHECK-NEXT: [[NARROW:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i8> zeroinitializer, <4 x i8> [[TMP4]]
-; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[NARROW]] to <4 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: [[TMP7]] = add i32 [[TMP6]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
>From 6d1c6ecbdc4e94cafd51ade0331d7112161a4cf3 Mon Sep 17 00:00:00 2001
From: vic <victor.perez at codeplay.com>
Date: Tue, 19 Sep 2023 15:06:22 +0200
Subject: [PATCH 4/7] [IR] `SingleBlock::push_back` operations to the back of
the block (#66655)
Instead of checking whether the last operation might be a terminator,
always insert operations to the end of the block.
Signed-off-by: Victor Perez <victor.perez at codeplay.com>
---
mlir/include/mlir/IR/OpDefinition.h | 4 ----
1 file changed, 4 deletions(-)
diff --git a/mlir/include/mlir/IR/OpDefinition.h b/mlir/include/mlir/IR/OpDefinition.h
index 306b3789a044f83..82d0e93a8ee2fa9 100644
--- a/mlir/include/mlir/IR/OpDefinition.h
+++ b/mlir/include/mlir/IR/OpDefinition.h
@@ -932,10 +932,6 @@ struct SingleBlock : public TraitBase<ConcreteType, SingleBlock> {
}
template <typename OpT = ConcreteType>
enable_if_single_region<OpT> insert(Block::iterator insertPt, Operation *op) {
- Block *body = getBody();
- // Insert op before the block's terminator if it has one
- if (insertPt == body->end() && body->hasTerminator())
- insertPt = Block::iterator(body->getTerminator());
getBody()->getOperations().insert(insertPt, op);
}
};
>From a292e7edf8b2fc51d3e86a96ff5dff45d16bd264 Mon Sep 17 00:00:00 2001
From: Zahira Ammarguellat <zahira.ammarguellat at intel.com>
Date: Tue, 19 Sep 2023 06:13:02 -0700
Subject: [PATCH 5/7] Fix math-errno issue (#66381)
Update handling of math errno. This change updates the logic for
generation of math intrinics in place of math library function calls.
The previous logic https://reviews.llvm.org/D151834 was incorrectly
using intrinsics when math errno handling was needed at optimization
levels above -O0.
This also fixes issue mentioned in https://reviews.llvm.org/D151834 by
@uabelho
This is joint work with @andykaylor Andy.
---
clang/lib/CodeGen/CGBuiltin.cpp | 39 +++++++++++++++++++++++++-----
clang/test/CodeGen/math-builtins.c | 2 ++
clang/test/CodeGen/math-libcalls.c | 2 ++
3 files changed, 37 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 52868ca260290b7..f727a0d5592effd 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2312,8 +2312,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const unsigned BuiltinIDIfNoAsmLabel =
FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
- bool ErrnoOverriden = false;
- // True if math-errno is overriden via the
+ std::optional<bool> ErrnoOverriden;
+ // ErrnoOverriden is true if math-errno is overriden via the
// '#pragma float_control(precise, on)'. This pragma disables fast-math,
// which implies math-errno.
if (E->hasStoredFPFeatures()) {
@@ -2329,8 +2329,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
// using the '#pragma float_control(precise, off)', and
// attribute opt-none hasn't been seen.
bool ErrnoOverridenToFalseWithOpt =
- !ErrnoOverriden && !OptNone &&
- CGM.getCodeGenOpts().OptimizationLevel != 0;
+ ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
+ CGM.getCodeGenOpts().OptimizationLevel != 0;
// There are LLVM math intrinsics/instructions corresponding to math library
// functions except the LLVM op will never set errno while the math library
@@ -2339,6 +2339,30 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
// LLVM counterparts if the call is marked 'const' (known to never set errno).
// In case FP exceptions are enabled, the experimental versions of the
// intrinsics model those.
+ bool ConstAlways =
+ getContext().BuiltinInfo.isConst(BuiltinID);
+
+ // There's a special case with the fma builtins where they are always const
+ // if the target environment is GNU or the target is OS is Windows and we're
+ // targeting the MSVCRT.dll environment.
+ // FIXME: This list can be become outdated. Need to find a way to get it some
+ // other way.
+ switch (BuiltinID) {
+ case Builtin::BI__builtin_fma:
+ case Builtin::BI__builtin_fmaf:
+ case Builtin::BI__builtin_fmal:
+ case Builtin::BIfma:
+ case Builtin::BIfmaf:
+ case Builtin::BIfmal: {
+ auto &Trip = CGM.getTriple();
+ if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
+ ConstAlways = true;
+ break;
+ }
+ default:
+ break;
+ }
+
bool ConstWithoutErrnoAndExceptions =
getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID);
bool ConstWithoutExceptions =
@@ -2362,14 +2386,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
bool ConstWithoutErrnoOrExceptions =
ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
bool GenerateIntrinsics =
- FD->hasAttr<ConstAttr>() && !ErrnoOverriden && !OptNone;
+ (ConstAlways && !OptNone) ||
+ (!getLangOpts().MathErrno &&
+ !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
if (!GenerateIntrinsics) {
GenerateIntrinsics =
ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
if (!GenerateIntrinsics)
GenerateIntrinsics =
ConstWithoutErrnoOrExceptions &&
- (!getLangOpts().MathErrno && !ErrnoOverriden && !OptNone);
+ (!getLangOpts().MathErrno &&
+ !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
if (!GenerateIntrinsics)
GenerateIntrinsics =
ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
diff --git a/clang/test/CodeGen/math-builtins.c b/clang/test/CodeGen/math-builtins.c
index 962e311698f5755..554c604219957ca 100644
--- a/clang/test/CodeGen/math-builtins.c
+++ b/clang/test/CodeGen/math-builtins.c
@@ -1,5 +1,7 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -w -S -o - -emit-llvm %s | FileCheck %s -check-prefix=NO__ERRNO
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -w -S -o - -emit-llvm -fmath-errno %s | FileCheck %s -check-prefix=HAS_ERRNO
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -w -S -o - -emit-llvm -disable-llvm-passes -O2 %s | FileCheck %s -check-prefix=NO__ERRNO
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -w -S -o - -emit-llvm -disable-llvm-passes -O2 -fmath-errno %s | FileCheck %s -check-prefix=HAS_ERRNO
// RUN: %clang_cc1 -triple x86_64-unknown-unknown-gnu -w -S -o - -emit-llvm -fmath-errno %s | FileCheck %s --check-prefix=HAS_ERRNO_GNU
// RUN: %clang_cc1 -triple x86_64-unknown-windows-msvc -w -S -o - -emit-llvm -fmath-errno %s | FileCheck %s --check-prefix=HAS_ERRNO_WIN
diff --git a/clang/test/CodeGen/math-libcalls.c b/clang/test/CodeGen/math-libcalls.c
index fa8f49d8a2c9ff4..02df4fe5fea6018 100644
--- a/clang/test/CodeGen/math-libcalls.c
+++ b/clang/test/CodeGen/math-libcalls.c
@@ -1,5 +1,7 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -Wno-implicit-function-declaration -w -S -o - -emit-llvm %s | FileCheck %s --check-prefix=NO__ERRNO
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -Wno-implicit-function-declaration -w -S -o - -emit-llvm -fmath-errno %s | FileCheck %s --check-prefix=HAS_ERRNO
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -Wno-implicit-function-declaration -w -S -o - -emit-llvm -disable-llvm-passes -O2 %s | FileCheck %s --check-prefix=NO__ERRNO
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -Wno-implicit-function-declaration -w -S -o - -emit-llvm -disable-llvm-passes -O2 -fmath-errno %s | FileCheck %s --check-prefix=HAS_ERRNO
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -Wno-implicit-function-declaration -w -S -o - -emit-llvm -ffp-exception-behavior=maytrap %s | FileCheck %s --check-prefix=HAS_MAYTRAP
// RUN: %clang_cc1 -triple x86_64-unknown-unknown-gnu -Wno-implicit-function-declaration -w -S -o - -emit-llvm -fmath-errno %s | FileCheck %s --check-prefix=HAS_ERRNO_GNU
// RUN: %clang_cc1 -triple x86_64-unknown-windows-msvc -Wno-implicit-function-declaration -w -S -o - -emit-llvm -fmath-errno %s | FileCheck %s --check-prefix=HAS_ERRNO_WIN
>From 9120e854486d00307e14751769200ae4c45909a9 Mon Sep 17 00:00:00 2001
From: Ben Mudd <Ben.Mudd at sony.com>
Date: Tue, 19 Sep 2023 13:01:32 +0100
Subject: [PATCH 6/7] [Dexter] Associate parser errors with correct file
(#66765)
Currently if Dexter encounters a parser error with a command, the resulting
error message will refer to the most recently declared file (i.e. the source
file it is testing) rather than the file containing the command itself. This
patch fixes this so that parser errors point towards the correct location.
---
.../dexter/dex/command/ParseCommand.py | 26 ++++++++++---------
.../test/err_syntax_dexdeclarefile.cpp | 14 ++++++++++
2 files changed, 28 insertions(+), 12 deletions(-)
create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_syntax_dexdeclarefile.cpp
diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/command/ParseCommand.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/ParseCommand.py
index 5afefb1142fc7d7..29d7867e808673b 100644
--- a/cross-project-tests/debuginfo-tests/dexter/dex/command/ParseCommand.py
+++ b/cross-project-tests/debuginfo-tests/dexter/dex/command/ParseCommand.py
@@ -13,7 +13,7 @@
import unittest
from copy import copy
from pathlib import PurePath
-from collections import defaultdict, OrderedDict
+from collections import defaultdict, OrderedDict, namedtuple
from dex.utils.Exceptions import CommandParseError, NonFloatValueInCommand
@@ -83,7 +83,7 @@ def _merge_subcommands(command_name: str, valid_commands: dict) -> dict:
def _build_command(
- command_type, labels, addresses, raw_text: str, path: str, lineno: str
+ command_type, labels, addresses, raw_text: str, path, lineno: str
) -> CommandBase:
"""Build a command object from raw text.
@@ -100,11 +100,13 @@ def label_to_line(label_name: str) -> int:
line = labels.get(label_name, None)
if line != None:
return line
- raise format_unresolved_label_err(label_name, raw_text, path, lineno)
+ raise format_unresolved_label_err(label_name, raw_text, path.base, lineno)
def get_address_object(address_name: str, offset: int = 0):
if address_name not in addresses:
- raise format_undeclared_address_err(address_name, raw_text, path, lineno)
+ raise format_undeclared_address_err(
+ address_name, raw_text, path.base, lineno
+ )
return AddressExpression(address_name, offset)
valid_commands = _merge_subcommands(
@@ -120,7 +122,7 @@ def get_address_object(address_name: str, offset: int = 0):
command = eval(raw_text, valid_commands)
# pylint: enable=eval-used
command.raw_text = raw_text
- command.path = path
+ command.path = path.declared
command.lineno = lineno
return command
@@ -267,7 +269,8 @@ def _find_all_commands_in_file(path, file_lines, valid_commands, source_root_dir
labels = {} # dict of {name: line}.
addresses = [] # list of addresses.
address_resolutions = {}
- cmd_path = path
+ CmdPath = namedtuple("cmd_path", "base declared")
+ cmd_path = CmdPath(path, path)
declared_files = set()
commands = defaultdict(dict)
paren_balance = 0
@@ -346,17 +349,16 @@ def _find_all_commands_in_file(path, file_lines, valid_commands, source_root_dir
elif type(command) is DexDeclareAddress:
add_address(addresses, command, path, cmd_point.get_lineno())
elif type(command) is DexDeclareFile:
- cmd_path = command.declared_file
- if not os.path.isabs(cmd_path):
+ declared_path = command.declared_file
+ if not os.path.isabs(declared_path):
source_dir = (
source_root_dir
if source_root_dir
else os.path.dirname(path)
)
- cmd_path = os.path.join(source_dir, cmd_path)
- # TODO: keep stored paths as PurePaths for 'longer'.
- cmd_path = str(PurePath(cmd_path))
- declared_files.add(cmd_path)
+ declared_path = os.path.join(source_dir, declared_path)
+ cmd_path = CmdPath(cmd_path.base, str(PurePath(declared_path)))
+ declared_files.add(cmd_path.declared)
elif type(command) is DexCommandLine and "DexCommandLine" in commands:
msg = "More than one DexCommandLine in file"
raise format_parse_err(msg, path, file_lines, err_point)
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_syntax_dexdeclarefile.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_syntax_dexdeclarefile.cpp
new file mode 100644
index 000000000000000..e3f08af204e7664
--- /dev/null
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_syntax_dexdeclarefile.cpp
@@ -0,0 +1,14 @@
+// Purpose:
+// Check that Dexter command syntax errors associate with the line and file
+// they appeared in rather than the current declared file.
+//
+// RUN: %dexter_regression_test_build %s -o %t
+// RUN: not %dexter_base test --binary %t --debugger 'lldb' -v -- %s \
+// RUN: | FileCheck %s --implicit-check-not=FAIL-FILENAME-MATCH
+
+// CHECK: err_syntax_dexdeclarefile.cpp(14): Undeclared address: 'not_been_declared'
+
+int main() { return 0; }
+
+// DexDeclareFile('FAIL-FILENAME-MATCH')
+// DexExpectWatchValue('example', address('not_been_declared'))
>From a4e15a200e4f146c0da70c7bb50566df60862025 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Mon, 18 Sep 2023 19:30:11 +0100
Subject: [PATCH 7/7] [FuncSpec] Adjust the names of specializations and
promoted stack values.
Currently the naming scheme is a bit funky; the specializations are named
after the original function followed by an arbitrary decimal number. This
makes it hard to debug inlined specializations of recursive functions.
With this patch I am adding ".specialized." in between of the original
name and the suffix, which is now a single increment counter.
---
.../Transforms/IPO/FunctionSpecialization.h | 1 +
.../Transforms/IPO/FunctionSpecialization.cpp | 7 ++-
.../bug55000-read-uninitialized-value.ll | 4 +-
.../compiler-crash-58759.ll | 4 +-
.../compiler-crash-60191.ll | 6 +-
.../compiler-crash-promote-alloca.ll | 2 +-
.../FunctionSpecialization/constant-struct.ll | 4 +-
.../function-specialization-always-inline.ll | 2 +-
...tion-specialization-constant-expression.ll | 4 +-
...ion-specialization-constant-expression3.ll | 4 +-
...ion-specialization-constant-expression4.ll | 2 +-
...ion-specialization-constant-expression5.ll | 2 +-
...nction-specialization-constant-integers.ll | 4 +-
.../function-specialization-minsize.ll | 4 +-
.../function-specialization-minsize2.ll | 4 +-
.../function-specialization-minsize3.ll | 2 +-
.../function-specialization-nodup.ll | 4 +-
.../function-specialization-nodup2.ll | 4 +-
.../function-specialization-noexec.ll | 4 +-
.../function-specialization-nonconst-glob.ll | 10 ++--
.../function-specialization-recursive2.ll | 4 +-
.../function-specialization-recursive3.ll | 4 +-
.../function-specialization-recursive4.ll | 4 +-
.../function-specialization.ll | 12 ++--
.../function-specialization2.ll | 16 ++---
.../function-specialization3.ll | 10 ++--
.../function-specialization4.ll | 8 +--
.../function-specialization5.ll | 4 +-
.../get-possible-constants.ll | 16 ++---
.../FunctionSpecialization/global-rank.ll | 4 +-
.../global-var-constants.ll | 6 +-
.../identical-specializations.ll | 18 +++---
.../FunctionSpecialization/literal-const.ll | 24 ++++----
.../no-spec-unused-arg.ll | 2 +-
.../FunctionSpecialization/noinline.ll | 4 +-
.../non-argument-tracked.ll | 24 ++++----
.../promoteContantStackValues.ll | 58 +++++++++----------
.../remove-dead-recursive-function.ll | 8 +--
.../specialization-order.ll | 12 ++--
.../specialize-multiple-arguments.ll | 18 +++---
.../FunctionSpecialization/track-return.ll | 24 ++++----
41 files changed, 180 insertions(+), 178 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index cb8fa380a3aa3f6..59378bc10873e36 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -252,6 +252,7 @@ class FunctionSpecializer {
SmallPtrSet<Function *, 32> FullySpecialized;
DenseMap<Function *, CodeMetrics> FunctionMetrics;
DenseMap<Function *, unsigned> FunctionGrowth;
+ unsigned NGlobals = 0;
public:
FunctionSpecializer(
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 2c1f5f66da74bda..aa63e2b64d9eabc 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -487,7 +487,7 @@ void FunctionSpecializer::promoteConstantStackValues(Function *F) {
Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
GlobalValue::InternalLinkage, ConstVal,
- "funcspec.arg");
+ "specialized.arg." + Twine(++NGlobals));
if (ArgOpType != ConstVal->getType())
GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOpType);
@@ -719,9 +719,10 @@ void FunctionSpecializer::removeDeadFunctions() {
/// Clone the function \p F and remove the ssa_copy intrinsics added by
/// the SCCPSolver in the cloned version.
-static Function *cloneCandidateFunction(Function *F) {
+static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
ValueToValueMapTy Mappings;
Function *Clone = CloneFunction(F, Mappings);
+ Clone->setName(F->getName() + ".specialized." + Twine(NSpecs));
removeSSACopy(*Clone);
return Clone;
}
@@ -879,7 +880,7 @@ bool FunctionSpecializer::isCandidateFunction(Function *F) {
Function *FunctionSpecializer::createSpecialization(Function *F,
const SpecSig &S) {
- Function *Clone = cloneCandidateFunction(F);
+ Function *Clone = cloneCandidateFunction(F, Specializations.size() + 1);
// The original function does not neccessarily have internal linkage, but the
// clone must.
diff --git a/llvm/test/Transforms/FunctionSpecialization/bug55000-read-uninitialized-value.ll b/llvm/test/Transforms/FunctionSpecialization/bug55000-read-uninitialized-value.ll
index 0cd15384454343f..d96460efe346200 100644
--- a/llvm/test/Transforms/FunctionSpecialization/bug55000-read-uninitialized-value.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/bug55000-read-uninitialized-value.ll
@@ -4,8 +4,8 @@ declare hidden i1 @compare(ptr) align 2
declare hidden { i8, ptr } @getType(ptr) align 2
; CHECK-LABEL: @foo
-; CHECK-LABEL: @foo.1
-; CHECK-LABEL: @foo.2
+; CHECK-LABEL: @foo.specialized.1
+; CHECK-LABEL: @foo.specialized.2
define internal void @foo(ptr %TLI, ptr %DL, ptr %Ty, ptr %ValueVTs, ptr %Offsets, i64 %StartingOffset) {
entry:
diff --git a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
index 5cbfaade98d3c65..f29cf0d123939a9 100644
--- a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
@@ -23,5 +23,5 @@ declare i32 @p0(i32 noundef)
declare i32 @p1(i32 noundef)
;; Tests that `f` has been fully specialize and it didn't cause compiler crash.
-;; CHECK-DAG: f.1
-;; CHECK-DAG: f.2
+;; CHECK-DAG: f.specialized.1
+;; CHECK-DAG: f.specialized.2
diff --git a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll
index 35364c4b0ad2bea..668929824cc6fa3 100644
--- a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll
@@ -60,7 +60,7 @@ define i32 @f2(i32 %offset) {
}
; Tests that `func` has been specialized and it didn't cause compiler crash.
-; CHECK-DAG: func.1
-; CHECK-DAG: func.2
-; CHECK-DAG: func.3
+; CHECK-DAG: func.specialized.1
+; CHECK-DAG: func.specialized.2
+; CHECK-DAG: func.specialized.3
diff --git a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-promote-alloca.ll b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-promote-alloca.ll
index fff454db303c95e..38450ba6819d756 100644
--- a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-promote-alloca.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-promote-alloca.ll
@@ -2,7 +2,7 @@
; Tests that `bar` has been specialized and that the compiler did not crash
; while attempting to promote the alloca in `entry`.
-; CHECK: bar.1
+; CHECK: bar.specialized.1
@block = internal constant [8 x i8] zeroinitializer, align 1
diff --git a/llvm/test/Transforms/FunctionSpecialization/constant-struct.ll b/llvm/test/Transforms/FunctionSpecialization/constant-struct.ll
index 6c3bfaef49b0ad5..39df6b05aa5a746 100644
--- a/llvm/test/Transforms/FunctionSpecialization/constant-struct.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/constant-struct.ll
@@ -8,7 +8,7 @@ define i32 @foo(i32 %y0, i32 %y1) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[Y:%.*]] = insertvalue { i32, i32 } undef, i32 [[Y0:%.*]], 0
; CHECK-NEXT: [[YY:%.*]] = insertvalue { i32, i32 } [[Y]], i32 [[Y1:%.*]], 1
-; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @add.1({ i32, i32 } { i32 2, i32 3 }, { i32, i32 } [[YY]])
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @add.specialized.1({ i32, i32 } { i32 2, i32 3 }, { i32, i32 } [[YY]])
; CHECK-NEXT: ret i32 [[CALL]]
;
entry:
@@ -23,7 +23,7 @@ define i32 @bar(i32 %x0, i32 %x1) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[X:%.*]] = insertvalue { i32, i32 } undef, i32 [[X0:%.*]], 0
; CHECK-NEXT: [[XX:%.*]] = insertvalue { i32, i32 } [[X]], i32 [[X1:%.*]], 1
-; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @add.2({ i32, i32 } [[XX]], { i32, i32 } { i32 3, i32 2 })
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @add.specialized.2({ i32, i32 } [[XX]], { i32, i32 } { i32 3, i32 2 })
; CHECK-NEXT: ret i32 [[CALL]]
;
entry:
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll
index b5a0084ed52e63c..5e65b0db7b235fd 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll
@@ -1,6 +1,6 @@
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
-; CHECK-NOT: foo.{{[0-9]+}}
+; CHECK-NOT: foo.specialized.{{[0-9]+}}
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
index 57db1cf71a9b9ed..c242816b91d43cd 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
@@ -30,10 +30,10 @@ define internal i64 @zoo(i1 %flag) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
; CHECK: plus:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i64 0, i32 3))
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.specialized.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i64 0, i32 3))
; CHECK-NEXT: br label [[MERGE:%.*]]
; CHECK: minus:
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i64 0, i32 4))
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.specialized.1(ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i64 0, i32 4))
; CHECK-NEXT: br label [[MERGE]]
; CHECK: merge:
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ ptrtoint (ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i64 0, i32 3) to i64), [[PLUS]] ], [ ptrtoint (ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i64 0, i32 4) to i64), [[MINUS]] ]
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression3.ll
index b1b7f1fd820d63f..7eaa68064607b85 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression3.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression3.ll
@@ -4,8 +4,8 @@
define i32 @main() {
; CHECK-LABEL: @main(
; CHECK-NEXT: bb:
-; CHECK-NEXT: tail call void @wombat.1(ptr undef, i64 undef, i64 undef, ptr @quux)
-; CHECK-NEXT: tail call void @wombat.2(ptr undef, i64 undef, i64 undef, ptr @eggs)
+; CHECK-NEXT: tail call void @wombat.specialized.1(ptr undef, i64 undef, i64 undef, ptr @quux)
+; CHECK-NEXT: tail call void @wombat.specialized.2(ptr undef, i64 undef, i64 undef, ptr @eggs)
; CHECK-NEXT: ret i32 undef
;
bb:
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression4.ll
index 9cac9ca95f8bce0..0410bfd9407b99d 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression4.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression4.ll
@@ -2,7 +2,7 @@
; Check that we don't crash and specialise on a function call with byval attribute.
-; CHECK-NOT: wombat.{{[0-9]+}}
+; CHECK-NOT: wombat.specialized.{{[0-9]+}}
declare ptr @quux()
declare ptr @eggs()
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression5.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression5.ll
index c53673cf84b636f..c1b442be97e6224 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression5.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression5.ll
@@ -3,7 +3,7 @@
; Check that we don't crash and specialise on a scalar global variable with byval attribute.
-; CHECK-NOT: wombat.{{[0-9]+}}
+; CHECK-NOT: wombat.specialized.{{[0-9]+}}
%struct.pluto = type { %struct.spam }
%struct.quux = type { i16 }
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll
index 976a326a4a886c1..17f9c30122d10a3 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll
@@ -1,8 +1,8 @@
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-for-literal-constant=true -force-specialization -S < %s | FileCheck %s
; Check that the literal constant parameter could be specialized.
-; CHECK: @foo.1(
-; CHECK: @foo.2(
+; CHECK: @foo.specialized.1(
+; CHECK: @foo.specialized.2(
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize.ll
index 6cc35403cc4e15a..9127e90ce23e8c9 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize.ll
@@ -1,7 +1,7 @@
; RUN: opt -passes="ipsccp<func-spec>" -S < %s | FileCheck %s
-; CHECK-NOT: @compute.1
-; CHECK-NOT: @compute.2
+; CHECK-NOT: @compute.specialized.1
+; CHECK-NOT: @compute.specialized.2
define i64 @main(i64 %x, i1 %flag) {
entry:
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize2.ll
index 2d0e04d01dc3740..0e2c4836b69309d 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize2.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize2.ll
@@ -3,8 +3,8 @@
; Checks for callsites that have been annotated with MinSize. No specialisation
; expected here:
;
-; CHECK-NOT: @compute.1
-; CHECK-NOT: @compute.2
+; CHECK-NOT: @compute.specialized.1
+; CHECK-NOT: @compute.specialized.2
define i64 @main(i64 %x, i1 %flag) {
entry:
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll
index 525721f03cfb251..743ca89e96cc752 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll
@@ -4,7 +4,7 @@
; specialisation for the call that does not have the attribute:
;
; CHECK: plus:
-; CHECK: %tmp0 = call i64 @compute.1(i64 %x, ptr @plus)
+; CHECK: %tmp0 = call i64 @compute.specialized.1(i64 %x, ptr @plus)
; CHECK: br label %merge
; CHECK: minus:
; CHECK: %tmp1 = call i64 @compute(i64 %x, ptr @minus) #0
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup.ll
index d9dcb44dcdb525e..a89aa2db84333e1 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup.ll
@@ -3,8 +3,8 @@
; Function @foo has function attribute 'noduplicate', so check that we don't
; specialize it:
-; CHECK-NOT: @foo.1(
-; CHECK-NOT: @foo.2(
+; CHECK-NOT: @foo.specialized.1(
+; CHECK-NOT: @foo.specialized.2(
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup2.ll
index c950dfa31e4b2c4..a61013e07b73a9d 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup2.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup2.ll
@@ -5,8 +5,8 @@
; Please note that the use of the hardwareloop intrinsic is arbitrary; it's
; just an easy to use intrinsic that has NoDuplicate.
-; CHECK-NOT: @foo.1(
-; CHECK-NOT: @foo.2(
+; CHECK-NOT: @foo.specialized.1(
+; CHECK-NOT: @foo.specialized.2(
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-noexec.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-noexec.ll
index d1e2a77dfc19c91..47624c7b1fe3344 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-noexec.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-noexec.ll
@@ -2,8 +2,8 @@
; The if.then block is not executed, so check that we don't specialise here.
-; CHECK-NOT: @foo.1(
-; CHECK-NOT: @foo.2(
+; CHECK-NOT: @foo.specialized.1(
+; CHECK-NOT: @foo.specialized.2(
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll
index 54eed8d1346fede..6c22126121613fb 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll
@@ -7,8 +7,8 @@
; Global B is not constant. We do not specialise on addresses unless we
; enable that:
-; ON-ADDRESS: call i32 @foo.1(i32 %x, ptr @A)
-; ON-ADDRESS: call i32 @foo.2(i32 %y, ptr @B)
+; ON-ADDRESS: call i32 @foo.specialized.1(i32 %x, ptr @A)
+; ON-ADDRESS: call i32 @foo.specialized.2(i32 %y, ptr @B)
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -21,7 +21,7 @@ define dso_local i32 @bar(i32 %x, i32 %y) {
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[X:%.*]], 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CHECK: if.then:
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo.1(i32 [[X]], ptr @A)
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo.specialized.1(i32 [[X]], ptr @A)
; CHECK-NEXT: br label [[RETURN:%.*]]
; CHECK: if.else:
; CHECK-NEXT: [[CALL1:%.*]] = call i32 @foo(i32 [[Y:%.*]], ptr @B)
@@ -60,11 +60,11 @@ entry:
ret i32 %add
}
-; CHECK-LABEL: define internal i32 @foo.1(i32 %x, ptr %b) {
+; CHECK-LABEL: define internal i32 @foo.specialized.1(i32 %x, ptr %b) {
; CHECK-NEXT: entry:
; CHECK-NEXT: %0 = load i32, ptr @A, align 4
; CHECK-NEXT: %add = add nsw i32 %x, %0
; CHECK-NEXT: ret i32 %add
; CHECK-NEXT: }
-; CHECK-NOT: define internal i32 @foo.2(
+; CHECK-NOT: define internal i32 @foo.specialized.2(
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive2.ll
index 9c7d3b22c917d89..68ad5821dbe6e55 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive2.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive2.ll
@@ -2,8 +2,8 @@
; Volatile store preventing recursive specialisation:
;
-; CHECK: @recursiveFunc.1
-; CHECK-NOT: @recursiveFunc.2
+; CHECK: @recursiveFunc.specialized.1
+; CHECK-NOT: @recursiveFunc.specialized.2
@Global = internal constant i32 1, align 4
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive3.ll
index 633138721e5540a..a98c91d2b82a391 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive3.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive3.ll
@@ -2,8 +2,8 @@
; Duplicate store preventing recursive specialisation:
;
-; CHECK: @recursiveFunc.1
-; CHECK-NOT: @recursiveFunc.2
+; CHECK: @recursiveFunc.specialized.1
+; CHECK-NOT: @recursiveFunc.specialized.2
@Global = internal constant i32 1, align 4
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive4.ll
index 6dca04c17bf4dca..e9bee3e50088d12 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive4.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive4.ll
@@ -2,8 +2,8 @@
; Alloca is not an integer type:
;
-; CHECK: @recursiveFunc.1
-; CHECK-NOT: @recursiveFunc.2
+; CHECK: @recursiveFunc.specialized.1
+; CHECK-NOT: @recursiveFunc.specialized.2
@Global = internal constant i32 1, align 4
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll
index b5d16f6dab1c05c..e1cacce4a7dde20 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll
@@ -7,10 +7,10 @@ define i64 @main(i64 %x, i1 %flag) {
; CHECK: entry:
; CHECK-NEXT: br i1 %flag, label %plus, label %minus
; CHECK: plus:
-; CHECK-NEXT: [[TMP0:%.+]] = call i64 @compute.1(i64 %x, ptr @plus)
+; CHECK-NEXT: [[TMP0:%.+]] = call i64 @compute.specialized.1(i64 %x, ptr @plus)
; CHECK-NEXT: br label %merge
; CHECK: minus:
-; CHECK-NEXT: [[TMP1:%.+]] = call i64 @compute.2(i64 %x, ptr @minus)
+; CHECK-NEXT: [[TMP1:%.+]] = call i64 @compute.specialized.2(i64 %x, ptr @minus)
; CHECK-NEXT: br label %merge
; CHECK: merge:
; CHECK-NEXT: [[TMP2:%.+]] = phi i64 [ [[TMP0]], %plus ], [ [[TMP1]], %minus ]
@@ -18,7 +18,7 @@ define i64 @main(i64 %x, i1 %flag) {
; CHECK-NEXT: }
;
; NOFSPEC-LABEL: @main(i64 %x, i1 %flag) {
-; NOFSPEC-NOT: call i64 @compute.{{[0-9]+}}(
+; NOFSPEC-NOT: call i64 @compute.specialized.{{[0-9]+}}(
; NOFSPEC: call i64 @compute(
;
entry:
@@ -39,20 +39,20 @@ merge:
; CHECK-NOT: define internal i64 @compute(
;
-; CHECK-LABEL: define internal i64 @compute.1(i64 %x, ptr %binop) {
+; CHECK-LABEL: define internal i64 @compute.specialized.1(i64 %x, ptr %binop) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.+]] = call i64 @plus(i64 %x)
; CHECK-NEXT: ret i64 [[TMP0]]
; CHECK-NEXT: }
;
-; CHECK-LABEL: define internal i64 @compute.2(i64 %x, ptr %binop) {
+; CHECK-LABEL: define internal i64 @compute.specialized.2(i64 %x, ptr %binop) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x)
; CHECK-NEXT: ret i64 [[TMP0]]
; CHECK-NEXT: }
;
; NOFSPEC: define internal i64 @compute(
-; NOFSPEC-NOT: define internal i64 @compute.{{[0-9]+}}(
+; NOFSPEC-NOT: define internal i64 @compute.specialized.{{[0-9]+}}(
;
define internal i64 @compute(i64 %x, ptr %binop) {
entry:
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
index 950ed13f7b9e135..b6cdcf18eea4297 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
@@ -3,8 +3,8 @@
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
-; DISABLED-NOT: @func.1(
-; DISABLED-NOT: @func.2(
+; DISABLED-NOT: @func.specialized.1(
+; DISABLED-NOT: @func.specialized.2(
define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) {
%4 = alloca i32, align 4
@@ -42,15 +42,15 @@ define internal void @decrement(ptr nocapture %0) {
}
define i32 @main(ptr %0, i32 %1) {
-; CHECK: call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
+; CHECK: call void @func.specialized.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
%3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment)
-; CHECK: call void @func.1(ptr [[TMP0]], i32 0)
+; CHECK: call void @func.specialized.1(ptr [[TMP0]], i32 0)
%4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement)
; CHECK: ret i32 0
ret i32 %4
}
-; CHECK: @func.1(
+; CHECK: @func.specialized.1(
; CHECK: [[TMP3:%.*]] = alloca i32, align 4
; CHECK: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
; CHECK: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -63,13 +63,13 @@ define i32 @main(ptr %0, i32 %1) {
; CHECK: call void @decrement(ptr [[TMP9]])
; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
-; CHECK: call void @func.1(ptr [[TMP0]], i32 [[TMP11]])
+; CHECK: call void @func.specialized.1(ptr [[TMP0]], i32 [[TMP11]])
; CHECK: br label [[TMP12:%.*]]
; CHECK: 12:
; CHECK: ret void
;
;
-; CHECK: @func.2(
+; CHECK: @func.specialized.2(
; CHECK: [[TMP3:%.*]] = alloca i32, align 4
; CHECK: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
; CHECK: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -82,7 +82,7 @@ define i32 @main(ptr %0, i32 %1) {
; CHECK: call void @increment(ptr [[TMP9]])
; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
-; CHECK: call void @func.2(ptr [[TMP0]], i32 [[TMP11]])
+; CHECK: call void @func.specialized.2(ptr [[TMP0]], i32 [[TMP11]])
; CHECK: br label [[TMP12:%.*]]
; CHECK: 12:
; CHECK: ret void
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll
index d80b6dfcf18aa15..8e075edaa6844c9 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll
@@ -12,9 +12,9 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
define dso_local i32 @bar(i32 %x, i32 %y) {
; COMMON-LABEL: @bar
-; FORCE: %call = call i32 @foo.1(i32 %x, ptr @A)
-; FORCE: %call1 = call i32 @foo.2(i32 %y, ptr @B)
-; DISABLED-NOT: %call1 = call i32 @foo.1(
+; FORCE: %call = call i32 @foo.specialized.1(i32 %x, ptr @A)
+; FORCE: %call1 = call i32 @foo.specialized.2(i32 %y, ptr @B)
+; DISABLED-NOT: %call1 = call i32 @foo.specialized.1(
entry:
%tobool = icmp ne i32 %x, 0
br i1 %tobool, label %if.then, label %if.else
@@ -34,14 +34,14 @@ return:
; FORCE-NOT: define internal i32 @foo(
;
-; FORCE: define internal i32 @foo.1(i32 %x, ptr %b) {
+; FORCE: define internal i32 @foo.specialized.1(i32 %x, ptr %b) {
; FORCE-NEXT: entry:
; FORCE-NEXT: %0 = load i32, ptr @A, align 4
; FORCE-NEXT: %add = add nsw i32 %x, %0
; FORCE-NEXT: ret i32 %add
; FORCE-NEXT: }
;
-; FORCE: define internal i32 @foo.2(i32 %x, ptr %b) {
+; FORCE: define internal i32 @foo.specialized.2(i32 %x, ptr %b) {
; FORCE-NEXT: entry:
; FORCE-NEXT: %0 = load i32, ptr @B, align 4
; FORCE-NEXT: %add = add nsw i32 %x, %0
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
index 0e859a0a24feb26..4e5a196d6682912 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
@@ -39,10 +39,10 @@ entry:
ret i32 %add1
}
-; CONST1: define internal i32 @foo.1(i32 %x, ptr %b, ptr %c)
-; CONST1-NOT: define internal i32 @foo.2(i32 %x, ptr %b, ptr %c)
+; CONST1: define internal i32 @foo.specialized.1(i32 %x, ptr %b, ptr %c)
+; CONST1-NOT: define internal i32 @foo.specialized.2(i32 %x, ptr %b, ptr %c)
-; CHECK: define internal i32 @foo.1(i32 %x, ptr %b, ptr %c) {
+; CHECK: define internal i32 @foo.specialized.1(i32 %x, ptr %b, ptr %c) {
; CHECK-NEXT: entry:
; CHECK-NEXT: %0 = load i32, ptr @A, align 4
; CHECK-NEXT: %add = add nsw i32 %x, %0
@@ -51,7 +51,7 @@ entry:
; CHECK-NEXT: ret i32 %add1
; CHECK-NEXT: }
-; CHECK: define internal i32 @foo.2(i32 %x, ptr %b, ptr %c) {
+; CHECK: define internal i32 @foo.specialized.2(i32 %x, ptr %b, ptr %c) {
; CHECK-NEXT: entry:
; CHECK-NEXT: %0 = load i32, ptr @B, align 4
; CHECK-NEXT: %add = add nsw i32 %x, %0
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization5.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization5.ll
index b272510b3939f21..aff1b770f8c812e 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization5.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization5.ll
@@ -2,8 +2,8 @@
; There's nothing to specialize here as both calls are the same, so check that:
;
-; CHECK-NOT: define internal i32 @foo.1(
-; CHECK-NOT: define internal i32 @foo.2(
+; CHECK-NOT: define internal i32 @foo.specialized.1(
+; CHECK-NOT: define internal i32 @foo.specialized.2(
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
index 9b14db5399f3da2..dfa1e5a42776a5f 100644
--- a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
@@ -12,7 +12,7 @@ entry:
}
; CHECK-LABEL: define dso_local i32 @f0
-; CHECK: tail call fastcc i32 @g.[[#A:]]({{.*}}@p0)
+; CHECK: tail call fastcc i32 @g.specialized.[[#A:]]({{.*}}@p0)
;
define dso_local i32 @f0(i32 noundef %x) {
entry:
@@ -21,7 +21,7 @@ entry:
}
; CHECK-LABEL: define dso_local i32 @f1
-; CHECK: tail call fastcc i32 @g.[[#B:]]({{.*}}@p1)
+; CHECK: tail call fastcc i32 @g.specialized.[[#B:]]({{.*}}@p1)
;
define dso_local i32 @f1(i32 noundef %x) {
entry:
@@ -40,7 +40,7 @@ entry:
}
; CHECK-LABEL: define dso_local i32 @g0
-; CHECK: tail call fastcc i32 @f.[[#C:]]({{.*}}@p0)
+; CHECK: tail call fastcc i32 @f.specialized.[[#C:]]({{.*}}@p0)
;
define dso_local i32 @g0(i32 noundef %x) {
entry:
@@ -56,7 +56,7 @@ entry:
}
; CHECK-LABEL: define dso_local i32 @g1
-; CHECK: tail call fastcc i32 @f.[[#D:]]({{.*}}@p1)
+; CHECK: tail call fastcc i32 @f.specialized.[[#D:]]({{.*}}@p1)
;
define dso_local i32 @g1(i32 noundef %x) {
entry:
@@ -76,7 +76,7 @@ entry:
; Also check that for callsites which reside in the body of newly created
; (specialized) functions, the lattice value of the arguments is known.
;
-; CHECK-DAG: define internal fastcc i32 @g.[[#A]]
-; CHECK-DAG: define internal fastcc i32 @g.[[#B]]
-; CHECK-DAG: define internal fastcc i32 @f.[[#C]]
-; CHECK-DAG: define internal fastcc i32 @f.[[#D]]
+; CHECK-DAG: define internal fastcc i32 @g.specialized.[[#A]]
+; CHECK-DAG: define internal fastcc i32 @g.specialized.[[#B]]
+; CHECK-DAG: define internal fastcc i32 @f.specialized.[[#C]]
+; CHECK-DAG: define internal fastcc i32 @f.specialized.[[#D]]
diff --git a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
index 541faa2e19515e3..1926e29ddee0136 100644
--- a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
@@ -47,5 +47,5 @@ declare i32 @qq(i32 noundef)
; `f` to be chosen, whereas the old algorithm would choose
; one specialsation of `f` and one of `g`.
-; CHECK-DAG: define internal i32 @f.1
-; CHECK-DAG: define internal i32 @f.2
+; CHECK-DAG: define internal i32 @f.specialized.1
+; CHECK-DAG: define internal i32 @f.specialized.2
diff --git a/llvm/test/Transforms/FunctionSpecialization/global-var-constants.ll b/llvm/test/Transforms/FunctionSpecialization/global-var-constants.ll
index 14c4855b58a24dc..b9481baae60b9eb 100644
--- a/llvm/test/Transforms/FunctionSpecialization/global-var-constants.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/global-var-constants.ll
@@ -65,14 +65,14 @@ entry:
; is allowed, then it is performed where possible.
; GLOBALS-LABEL: define internal i32 @g()
-; GLOBALS: call i32 @f.2()
+; GLOBALS: call i32 @f.specialized.2()
; GLOBALS-LABEL: define i32 @h0(ptr %p)
; GLOBALS: call i32 @g()
; GLOBALS-LABEL: define i32 @h1()
-; GLOBALS: call i32 @f.2()
+; GLOBALS: call i32 @f.specialized.2()
; GLOBALS-LABEL: define i32 @h2()
-; GLOBALS: call i32 @f.1()
+; GLOBALS: call i32 @f.specialized.1()
diff --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
index c2ba0920c2be334..368f3b044034471 100644
--- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
@@ -6,14 +6,14 @@ define i64 @main(i64 %x, i64 %y, i1 %flag) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
; CHECK: plus:
-; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus)
+; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.specialized.2(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus)
; CHECK-NEXT: br label [[MERGE:%.*]]
; CHECK: minus:
-; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
+; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.specialized.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
; CHECK-NEXT: br label [[MERGE]]
; CHECK: merge:
; CHECK-NEXT: [[PH:%.*]] = phi i64 [ [[CMP0]], [[PLUS]] ], [ [[CMP1]], [[MINUS]] ]
-; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.2(i64 [[PH]], i64 42, ptr @plus, ptr @minus)
+; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.specialized.2(i64 [[PH]], i64 42, ptr @plus, ptr @minus)
; CHECK-NEXT: ret i64 [[CMP2]]
;
entry:
@@ -60,20 +60,20 @@ entry:
ret i64 %sub
}
-; CHECK-LABEL: @compute.1
+; CHECK-LABEL: @compute.specialized.1
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP0:%.*]] = call i64 %binop1(i64 [[X:%.*]], i64 [[Y:%.*]])
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr %binop1, ptr @plus)
+; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.specialized.1(i64 [[X]], i64 [[Y]], ptr %binop1, ptr @plus)
-; CHECK-LABEL: @compute.2
+; CHECK-LABEL: @compute.specialized.2
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]])
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
+; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.specialized.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
-; CHECK-LABEL: @compute.3
+; CHECK-LABEL: @compute.specialized.3
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @minus(i64 [[X:%.*]], i64 [[Y:%.*]])
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
+; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.specialized.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
diff --git a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
index fc400202ab91e9a..f107ffe0ec7ebfd 100644
--- a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
@@ -66,27 +66,27 @@ entry:
; CHECK-NOLIT-NOT: @addf.
; CHECK-LIT-LABEL: define i32 @f0
-; CHECK-LIT: call i32 @neg.[[#A:]]
+; CHECK-LIT: call i32 @neg.specialized.[[#A:]]
; CHECK-LIT-LABEL: define i32 @f1
-; CHECK-LIT: call i32 @neg.[[#B:]]
+; CHECK-LIT: call i32 @neg.specialized.[[#B:]]
; CHECK-LIT-LABEL: define i32 @g0
-; CHECK-LIT: call i32 @add.[[#C:]]
+; CHECK-LIT: call i32 @add.specialized.[[#C:]]
; CHECK-LIT-LABEL: define i32 @g1
-; CHECK-LIT: call i32 @add.[[#D:]]
+; CHECK-LIT: call i32 @add.specialized.[[#D:]]
; CHECK-LIT-LABEL: define float @h0
-; CHECK-LIT: call float @addf.[[#E:]]
+; CHECK-LIT: call float @addf.specialized.[[#E:]]
; CHECK-LIT-LABEL: define float @h1
-; CHECK-LIT: call float @addf.[[#F:]]
+; CHECK-LIT: call float @addf.specialized.[[#F:]]
; Check all of `neg`, `add`, and `addf` were specialised.
-; CHECK-LIT-DAG: @neg.[[#A]]
-; CHECK-LIT-DAG: @neg.[[#B]]
-; CHECK-LIT-DAG: @add.[[#C]]
-; CHECK-LIT-DAG: @add.[[#D]]
-; CHECK-LIT-DAG: @addf.[[#E]]
-; CHECK-LIT-DAG: @addf.[[#F]]
+; CHECK-LIT-DAG: @neg.specialized.[[#A]]
+; CHECK-LIT-DAG: @neg.specialized.[[#B]]
+; CHECK-LIT-DAG: @add.specialized.[[#C]]
+; CHECK-LIT-DAG: @add.specialized.[[#D]]
+; CHECK-LIT-DAG: @addf.specialized.[[#E]]
+; CHECK-LIT-DAG: @addf.specialized.[[#F]]
diff --git a/llvm/test/Transforms/FunctionSpecialization/no-spec-unused-arg.ll b/llvm/test/Transforms/FunctionSpecialization/no-spec-unused-arg.ll
index 38345652bfc8cfe..8469727e974fe37 100644
--- a/llvm/test/Transforms/FunctionSpecialization/no-spec-unused-arg.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/no-spec-unused-arg.ll
@@ -17,4 +17,4 @@ define i32 @g1() {
; to be a constant without the need for function specialisation and
; the second parameter is unused.
-; CHECK-NOT: @f.
+; CHECK-NOT: @f.specialized.
diff --git a/llvm/test/Transforms/FunctionSpecialization/noinline.ll b/llvm/test/Transforms/FunctionSpecialization/noinline.ll
index 863e6e74eb23ced..73576402b002965 100644
--- a/llvm/test/Transforms/FunctionSpecialization/noinline.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/noinline.ll
@@ -31,5 +31,5 @@ entry:
}
; Check that a noinline function is specialized, even if it's small.
-; CHECK: @f.1
-; CHECK: @f.2
+; CHECK: @f.specialized.1
+; CHECK: @f.specialized.2
diff --git a/llvm/test/Transforms/FunctionSpecialization/non-argument-tracked.ll b/llvm/test/Transforms/FunctionSpecialization/non-argument-tracked.ll
index 14a6fd746d09e56..9446e557da75817 100644
--- a/llvm/test/Transforms/FunctionSpecialization/non-argument-tracked.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/non-argument-tracked.ll
@@ -29,9 +29,9 @@ define internal i32 @f2(i32 %i) {
;; All calls are to specilisation instances.
; CHECK-LABEL: define i32 @g0
-; CHECK: call void @f0.[[#A:]]()
-; CHECK-NEXT: call void @f1.[[#B:]]()
-; CHECK-NEXT: call void @f2.[[#C:]]()
+; CHECK: call void @f0.specialized.[[#A:]]()
+; CHECK-NEXT: call void @f1.specialized.[[#B:]]()
+; CHECK-NEXT: call void @f2.specialized.[[#C:]]()
; CHECK-NEXT: ret i32 9
define i32 @g0(i32 %i) {
%u0 = call i32 @f0(i32 1)
@@ -43,9 +43,9 @@ define i32 @g0(i32 %i) {
}
; CHECK-LABEL: define i32 @g1
-; CHECK: call void @f0.[[#D:]]()
-; CHECK-NEXT: call void @f1.[[#E:]]()
-; CHECK-NEXT: call void @f2.[[#F:]]()
+; CHECK: call void @f0.specialized.[[#D:]]()
+; CHECK-NEXT: call void @f1.specialized.[[#E:]]()
+; CHECK-NEXT: call void @f2.specialized.[[#F:]]()
; CHECK-NEXT: ret i32 12
define i32 @g1(i32 %i) {
%u0 = call i32 @f0(i32 2)
@@ -58,9 +58,9 @@ define i32 @g1(i32 %i) {
; All of the function are specialized and all clones are with internal linkage.
-; CHECK-DAG: define internal void @f0.[[#A]]() {
-; CHECK-DAG: define internal void @f1.[[#B]]() {
-; CHECK-DAG: define internal void @f2.[[#C]]() {
-; CHECK-DAG: define internal void @f0.[[#D]]() {
-; CHECK-DAG: define internal void @f1.[[#E]]() {
-; CHECK-DAG: define internal void @f2.[[#F]]() {
+; CHECK-DAG: define internal void @f0.specialized.[[#A]]() {
+; CHECK-DAG: define internal void @f1.specialized.[[#B]]() {
+; CHECK-DAG: define internal void @f2.specialized.[[#C]]() {
+; CHECK-DAG: define internal void @f0.specialized.[[#D]]() {
+; CHECK-DAG: define internal void @f1.specialized.[[#E]]() {
+; CHECK-DAG: define internal void @f2.specialized.[[#F]]() {
diff --git a/llvm/test/Transforms/FunctionSpecialization/promoteContantStackValues.ll b/llvm/test/Transforms/FunctionSpecialization/promoteContantStackValues.ll
index 256cbebae062042..e75e0476957eb19 100644
--- a/llvm/test/Transforms/FunctionSpecialization/promoteContantStackValues.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/promoteContantStackValues.ll
@@ -27,53 +27,53 @@ ret.block:
ret void
}
-; ITERS1: @funcspec.arg = internal constant i32 0
-; ITERS1: @funcspec.arg.1 = internal constant i32 6
-; ITERS1: @funcspec.arg.3 = internal constant i32 1
-; ITERS1: @funcspec.arg.4 = internal constant i32 5
+; ITERS1: @specialized.arg.1 = internal constant i32 0
+; ITERS1: @specialized.arg.2 = internal constant i32 6
+; ITERS1: @specialized.arg.3 = internal constant i32 1
+; ITERS1: @specialized.arg.4 = internal constant i32 5
-; ITERS2: @funcspec.arg = internal constant i32 0
-; ITERS2: @funcspec.arg.1 = internal constant i32 6
-; ITERS2: @funcspec.arg.3 = internal constant i32 1
-; ITERS2: @funcspec.arg.4 = internal constant i32 5
-; ITERS2: @funcspec.arg.6 = internal constant i32 2
-; ITERS2: @funcspec.arg.7 = internal constant i32 4
+; ITERS2: @specialized.arg.1 = internal constant i32 0
+; ITERS2: @specialized.arg.2 = internal constant i32 6
+; ITERS2: @specialized.arg.3 = internal constant i32 1
+; ITERS2: @specialized.arg.4 = internal constant i32 5
+; ITERS2: @specialized.arg.5 = internal constant i32 2
+; ITERS2: @specialized.arg.6 = internal constant i32 4
-; ITERS3: @funcspec.arg = internal constant i32 0
-; ITERS3: @funcspec.arg.1 = internal constant i32 6
-; ITERS3: @funcspec.arg.3 = internal constant i32 1
-; ITERS3: @funcspec.arg.4 = internal constant i32 5
-; ITERS3: @funcspec.arg.6 = internal constant i32 2
-; ITERS3: @funcspec.arg.7 = internal constant i32 4
-; ITERS3: @funcspec.arg.9 = internal constant i32 3
-; ITERS3: @funcspec.arg.10 = internal constant i32 3
+; ITERS3: @specialized.arg.1 = internal constant i32 0
+; ITERS3: @specialized.arg.2 = internal constant i32 6
+; ITERS3: @specialized.arg.3 = internal constant i32 1
+; ITERS3: @specialized.arg.4 = internal constant i32 5
+; ITERS3: @specialized.arg.5 = internal constant i32 2
+; ITERS3: @specialized.arg.6 = internal constant i32 4
+; ITERS3: @specialized.arg.7 = internal constant i32 3
+; ITERS3: @specialized.arg.8 = internal constant i32 3
-; ITERS4: @funcspec.arg = internal constant i32 0
-; ITERS4: @funcspec.arg.1 = internal constant i32 6
-; ITERS4: @funcspec.arg.3 = internal constant i32 1
-; ITERS4: @funcspec.arg.4 = internal constant i32 5
-; ITERS4: @funcspec.arg.6 = internal constant i32 2
-; ITERS4: @funcspec.arg.7 = internal constant i32 4
-; ITERS4: @funcspec.arg.9 = internal constant i32 3
-; ITERS4: @funcspec.arg.10 = internal constant i32 3
+; ITERS4: @specialized.arg.1 = internal constant i32 0
+; ITERS4: @specialized.arg.2 = internal constant i32 6
+; ITERS4: @specialized.arg.3 = internal constant i32 1
+; ITERS4: @specialized.arg.4 = internal constant i32 5
+; ITERS4: @specialized.arg.5 = internal constant i32 2
+; ITERS4: @specialized.arg.6 = internal constant i32 4
+; ITERS4: @specialized.arg.7 = internal constant i32 3
+; ITERS4: @specialized.arg.8 = internal constant i32 3
define i32 @main() {
; ITERS1-LABEL: @main(
; ITERS1-NEXT: call void @print_val(i32 0, i32 6)
-; ITERS1-NEXT: call void @recursiveFunc(ptr nonnull @funcspec.arg.3, i32 1, ptr nonnull @funcspec.arg.4)
+; ITERS1-NEXT: call void @recursiveFunc(ptr nonnull @specialized.arg.3, i32 1, ptr nonnull @specialized.arg.4)
; ITERS1-NEXT: ret i32 0
;
; ITERS2-LABEL: @main(
; ITERS2-NEXT: call void @print_val(i32 0, i32 6)
; ITERS2-NEXT: call void @print_val(i32 1, i32 5)
-; ITERS2-NEXT: call void @recursiveFunc(ptr nonnull @funcspec.arg.6, i32 1, ptr nonnull @funcspec.arg.7)
+; ITERS2-NEXT: call void @recursiveFunc(ptr nonnull @specialized.arg.5, i32 1, ptr nonnull @specialized.arg.6)
; ITERS2-NEXT: ret i32 0
;
; ITERS3-LABEL: @main(
; ITERS3-NEXT: call void @print_val(i32 0, i32 6)
; ITERS3-NEXT: call void @print_val(i32 1, i32 5)
; ITERS3-NEXT: call void @print_val(i32 2, i32 4)
-; ITERS3-NEXT: call void @recursiveFunc(ptr nonnull @funcspec.arg.9, i32 1, ptr nonnull @funcspec.arg.10)
+; ITERS3-NEXT: call void @recursiveFunc(ptr nonnull @specialized.arg.7, i32 1, ptr nonnull @specialized.arg.8)
; ITERS3-NEXT: ret i32 0
;
; ITERS4-LABEL: @main(
diff --git a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
index 4233998ad9f6d48..810526532c1060d 100644
--- a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
@@ -19,14 +19,14 @@ merge:
; CHECK-NOT: define internal i64 @compute(
;
-; CHECK-LABEL: define internal i64 @compute.1(i64 %n, ptr %binop) {
+; CHECK-LABEL: define internal i64 @compute.specialized.1(i64 %n, ptr %binop) {
; CHECK: [[TMP0:%.+]] = call i64 @plus(i64 %n)
-; CHECK: [[TMP1:%.+]] = call i64 @compute.1(i64 [[TMP2:%.+]], ptr @plus)
+; CHECK: [[TMP1:%.+]] = call i64 @compute.specialized.1(i64 [[TMP2:%.+]], ptr @plus)
; CHECK: add nsw i64 [[TMP1]], [[TMP0]]
;
-; CHECK-LABEL: define internal i64 @compute.2(i64 %n, ptr %binop) {
+; CHECK-LABEL: define internal i64 @compute.specialized.2(i64 %n, ptr %binop) {
; CHECK: [[TMP0:%.+]] = call i64 @minus(i64 %n)
-; CHECK: [[TMP1:%.+]] = call i64 @compute.2(i64 [[TMP2:%.+]], ptr @minus)
+; CHECK: [[TMP1:%.+]] = call i64 @compute.specialized.2(i64 [[TMP2:%.+]], ptr @minus)
; CHECK: add nsw i64 [[TMP1]], [[TMP0]]
;
define internal i64 @compute(i64 %n, ptr %binop) {
diff --git a/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll b/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll
index e0afb3bef1039fc..da4cb40fb6dc503 100644
--- a/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll
@@ -21,7 +21,7 @@ entry:
define dso_local i32 @g0(i32 %x, i32 %y) {
; CHECK-LABEL: @g0
-; CHECK: call i32 @f.3(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK: call i32 @f.specialized.3(i32 [[X:%.*]], i32 [[Y:%.*]])
entry:
%call = tail call i32 @f(i32 %x, i32 %y, ptr @add, ptr @add)
ret i32 %call
@@ -30,7 +30,7 @@ entry:
define dso_local i32 @g1(i32 %x, i32 %y) {
; CHECK-LABEL: @g1(
-; CHECK: call i32 @f.2(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK: call i32 @f.specialized.2(i32 [[X:%.*]], i32 [[Y:%.*]])
entry:
%call = tail call i32 @f(i32 %x, i32 %y, ptr @sub, ptr @add)
ret i32 %call
@@ -38,21 +38,21 @@ entry:
define dso_local i32 @g2(i32 %x, i32 %y, ptr %v) {
; CHECK-LABEL: @g2
-; CHECK: call i32 @f.1(i32 [[X:%.*]], i32 [[Y:%.*]], ptr [[V:%.*]])
+; CHECK: call i32 @f.specialized.1(i32 [[X:%.*]], i32 [[Y:%.*]], ptr [[V:%.*]])
entry:
%call = tail call i32 @f(i32 %x, i32 %y, ptr @sub, ptr %v)
ret i32 %call
}
-; CHECK-LABEL: define {{.*}} i32 @f.1
+; CHECK-LABEL: define {{.*}} i32 @f.specialized.1
; CHECK: call i32 @sub(i32 %x, i32 %y)
; CHECK-NEXT: call i32 %v(i32 %x, i32 %y)
-; CHECK-LABEL: define {{.*}} i32 @f.2
+; CHECK-LABEL: define {{.*}} i32 @f.specialized.2
; CHECK: call i32 @sub(i32 %x, i32 %y)
; CHECK-NEXT: call i32 @add(i32 %x, i32 %y)
-; CHECK-LABEL: define {{.*}} i32 @f.3
+; CHECK-LABEL: define {{.*}} i32 @f.specialized.3
; CHECK: call i32 @add(i32 %x, i32 %y)
; CHECK-NEXT: call i32 @add(i32 %x, i32 %y)
diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
index d1c23e07d59721f..a653760abb2cc65 100644
--- a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
@@ -38,7 +38,7 @@ define i64 @main(i64 %x, i64 %y, i1 %flag) {
; ONE-NEXT: [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @power, ptr @mul)
; ONE-NEXT: br label [[MERGE:%.*]]
; ONE: minus:
-; ONE-NEXT: [[TMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
+; ONE-NEXT: [[TMP1:%.*]] = call i64 @compute.specialized.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
; ONE-NEXT: br label [[MERGE]]
; ONE: merge:
; ONE-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
@@ -52,25 +52,25 @@ define i64 @main(i64 %x, i64 %y, i1 %flag) {
; TWO-NEXT: [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @power, ptr @mul)
; TWO-NEXT: br label [[MERGE:%.*]]
; TWO: minus:
-; TWO-NEXT: [[TMP1:%.*]] = call i64 @compute.2(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
+; TWO-NEXT: [[TMP1:%.*]] = call i64 @compute.specialized.2(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
; TWO-NEXT: br label [[MERGE]]
; TWO: merge:
; TWO-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
-; TWO-NEXT: [[TMP3:%.*]] = call i64 @compute.1(i64 [[TMP2]], i64 42, ptr @minus, ptr @power)
+; TWO-NEXT: [[TMP3:%.*]] = call i64 @compute.specialized.1(i64 [[TMP2]], i64 42, ptr @minus, ptr @power)
; TWO-NEXT: ret i64 [[TMP3]]
;
; THREE-LABEL: @main(
; THREE-NEXT: entry:
; THREE-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
; THREE: plus:
-; THREE-NEXT: [[TMP0:%.*]] = call i64 @compute.1(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @power, ptr @mul)
+; THREE-NEXT: [[TMP0:%.*]] = call i64 @compute.specialized.1(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @power, ptr @mul)
; THREE-NEXT: br label [[MERGE:%.*]]
; THREE: minus:
-; THREE-NEXT: [[TMP1:%.*]] = call i64 @compute.2(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
+; THREE-NEXT: [[TMP1:%.*]] = call i64 @compute.specialized.2(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
; THREE-NEXT: br label [[MERGE]]
; THREE: merge:
; THREE-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
-; THREE-NEXT: [[TMP3:%.*]] = call i64 @compute.3(i64 [[TMP2]], i64 42, ptr @minus, ptr @power)
+; THREE-NEXT: [[TMP3:%.*]] = call i64 @compute.specialized.3(i64 [[TMP2]], i64 42, ptr @minus, ptr @power)
; THREE-NEXT: ret i64 [[TMP3]]
;
entry:
@@ -92,7 +92,7 @@ merge:
; THREE-NOT: define internal i64 @compute
;
-; THREE-LABEL: define internal i64 @compute.1(i64 %x, i64 %y, ptr %binop1, ptr %binop2) {
+; THREE-LABEL: define internal i64 @compute.specialized.1(i64 %x, i64 %y, ptr %binop1, ptr %binop2) {
; THREE-NEXT: entry:
; THREE-NEXT: [[TMP0:%.+]] = call i64 @power(i64 %x, i64 %y)
; THREE-NEXT: [[TMP1:%.+]] = call i64 @mul(i64 %x, i64 %y)
@@ -103,7 +103,7 @@ merge:
; THREE-NEXT: ret i64 [[TMP5]]
; THREE-NEXT: }
;
-; THREE-LABEL: define internal i64 @compute.2(i64 %x, i64 %y, ptr %binop1, ptr %binop2) {
+; THREE-LABEL: define internal i64 @compute.specialized.2(i64 %x, i64 %y, ptr %binop1, ptr %binop2) {
; THREE-NEXT: entry:
; THREE-NEXT: [[TMP0:%.+]] = call i64 @plus(i64 %x, i64 %y)
; THREE-NEXT: [[TMP1:%.+]] = call i64 @minus(i64 %x, i64 %y)
@@ -114,7 +114,7 @@ merge:
; THREE-NEXT: ret i64 [[TMP5]]
; THREE-NEXT: }
;
-; THREE-LABEL: define internal i64 @compute.3(i64 %x, i64 %y, ptr %binop1, ptr %binop2) {
+; THREE-LABEL: define internal i64 @compute.specialized.3(i64 %x, i64 %y, ptr %binop1, ptr %binop2) {
; THREE-NEXT: entry:
; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 %y)
; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 %y)
diff --git a/llvm/test/Transforms/FunctionSpecialization/track-return.ll b/llvm/test/Transforms/FunctionSpecialization/track-return.ll
index 58a1c5f2a5904d3..54e5de018f19cd1 100644
--- a/llvm/test/Transforms/FunctionSpecialization/track-return.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/track-return.ll
@@ -3,8 +3,8 @@
define i64 @main() {
; CHECK: define i64 @main
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[C1:%.*]] = call i64 @foo.1(i1 true, i64 3, i64 1)
-; CHECK-NEXT: [[C2:%.*]] = call i64 @foo.2(i1 false, i64 4, i64 -1)
+; CHECK-NEXT: [[C1:%.*]] = call i64 @foo.specialized.1(i1 true, i64 3, i64 1)
+; CHECK-NEXT: [[C2:%.*]] = call i64 @foo.specialized.2(i1 false, i64 4, i64 -1)
; CHECK-NEXT: ret i64 8
;
entry:
@@ -16,22 +16,22 @@ entry:
define internal i64 @foo(i1 %flag, i64 %m, i64 %n) {
;
-; CHECK: define internal i64 @foo.1
+; CHECK: define internal i64 @foo.specialized.1
; CHECK-NEXT: entry:
; CHECK-NEXT: br label %plus
; CHECK: plus:
-; CHECK-NEXT: [[N0:%.*]] = call i64 @binop.4(i64 3, i64 1)
-; CHECK-NEXT: [[RES0:%.*]] = call i64 @bar.6(i64 4)
+; CHECK-NEXT: [[N0:%.*]] = call i64 @binop.specialized.4(i64 3, i64 1)
+; CHECK-NEXT: [[RES0:%.*]] = call i64 @bar.specialized.6(i64 4)
; CHECK-NEXT: br label %merge
; CHECK: merge:
; CHECK-NEXT: ret i64 undef
;
-; CHECK: define internal i64 @foo.2
+; CHECK: define internal i64 @foo.specialized.2
; CHECK-NEXT: entry:
; CHECK-NEXT: br label %minus
; CHECK: minus:
-; CHECK-NEXT: [[N1:%.*]] = call i64 @binop.3(i64 4, i64 -1)
-; CHECK-NEXT: [[RES1:%.*]] = call i64 @bar.5(i64 3)
+; CHECK-NEXT: [[N1:%.*]] = call i64 @binop.specialized.3(i64 4, i64 -1)
+; CHECK-NEXT: [[RES1:%.*]] = call i64 @bar.specialized.5(i64 3)
; CHECK-NEXT: br label %merge
; CHECK: merge:
; CHECK-NEXT: ret i64 undef
@@ -56,11 +56,11 @@ merge:
define internal i64 @binop(i64 %x, i64 %y) {
;
-; CHECK: define internal i64 @binop.3
+; CHECK: define internal i64 @binop.specialized.3
; CHECK-NEXT: entry:
; CHECK-NEXT: ret i64 undef
;
-; CHECK: define internal i64 @binop.4
+; CHECK: define internal i64 @binop.specialized.4
; CHECK-NEXT: entry:
; CHECK-NEXT: ret i64 undef
;
@@ -71,7 +71,7 @@ entry:
define internal i64 @bar(i64 %n) {
;
-; CHECK: define internal i64 @bar.5
+; CHECK: define internal i64 @bar.specialized.5
; CHECK-NEXT: entry:
; CHECK-NEXT: br label %if.else
; CHECK: if.else:
@@ -79,7 +79,7 @@ define internal i64 @bar(i64 %n) {
; CHECK: if.end:
; CHECK-NEXT: ret i64 undef
;
-; CHECK: define internal i64 @bar.6
+; CHECK: define internal i64 @bar.specialized.6
; CHECK-NEXT: entry:
; CHECK-NEXT: br label %if.then
; CHECK: if.then:
More information about the llvm-commits
mailing list