[llvm] [LiveIntervals] Ignore artificial regs when adding kill flags (PR #116963)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 4 09:03:51 PST 2024
https://github.com/sdesmalen-arm updated https://github.com/llvm/llvm-project/pull/116963
>From 7dbc99897a25ca370e222097342bf1dc312676f3 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Wed, 20 Nov 2024 12:07:45 +0000
Subject: [PATCH 1/6] Precommit tests
---
llvm/test/CodeGen/AArch64/arm64-addrmode.ll | 102 +++++++++++-------
.../CodeGen/AArch64/nested-iv-regalloc.mir | 10 +-
.../AArch64/preserve_nonecc_varargs_darwin.ll | 2 +-
3 files changed, 67 insertions(+), 47 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
index bfef61abd8c129..cb00272a7d022c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=arm64-eabi < %s | FileCheck %s
+; RUN: llc -aarch64-enable-subreg-liveness-tracking -mtriple=arm64-eabi < %s | FileCheck %s
; rdar://10232252
@object = external hidden global i64, section "__DATA, __objc_ivar", align 8
@@ -214,8 +214,9 @@ define void @t17(i64 %a) {
define i8 @LdOffset_i8(ptr %a) {
; CHECK-LABEL: LdOffset_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
-; CHECK-NEXT: ldrb w0, [x8, #3704]
+; CHECK-NEXT: mov w8, #56952 // =0xde78
+; CHECK-NEXT: movk w8, #15, lsl #16
+; CHECK-NEXT: ldrb w0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
@@ -226,8 +227,9 @@ define i8 @LdOffset_i8(ptr %a) {
define i32 @LdOffset_i8_zext32(ptr %a) {
; CHECK-LABEL: LdOffset_i8_zext32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
-; CHECK-NEXT: ldrb w0, [x8, #3704]
+; CHECK-NEXT: mov w8, #56952 // =0xde78
+; CHECK-NEXT: movk w8, #15, lsl #16
+; CHECK-NEXT: ldrb w0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
@@ -239,8 +241,9 @@ define i32 @LdOffset_i8_zext32(ptr %a) {
define i32 @LdOffset_i8_sext32(ptr %a) {
; CHECK-LABEL: LdOffset_i8_sext32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
-; CHECK-NEXT: ldrsb w0, [x8, #3704]
+; CHECK-NEXT: mov w8, #56952 // =0xde78
+; CHECK-NEXT: movk w8, #15, lsl #16
+; CHECK-NEXT: ldrsb w0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
@@ -252,8 +255,9 @@ define i32 @LdOffset_i8_sext32(ptr %a) {
define i64 @LdOffset_i8_zext64(ptr %a) {
; CHECK-LABEL: LdOffset_i8_zext64:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
-; CHECK-NEXT: ldrb w0, [x8, #3704]
+; CHECK-NEXT: mov w8, #56952 // =0xde78
+; CHECK-NEXT: movk w8, #15, lsl #16
+; CHECK-NEXT: ldrb w0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
@@ -265,8 +269,9 @@ define i64 @LdOffset_i8_zext64(ptr %a) {
define i64 @LdOffset_i8_sext64(ptr %a) {
; CHECK-LABEL: LdOffset_i8_sext64:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
-; CHECK-NEXT: ldrsb x0, [x8, #3704]
+; CHECK-NEXT: mov w8, #56952 // =0xde78
+; CHECK-NEXT: movk w8, #15, lsl #16
+; CHECK-NEXT: ldrsb x0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
@@ -278,8 +283,9 @@ define i64 @LdOffset_i8_sext64(ptr %a) {
define i16 @LdOffset_i16(ptr %a) {
; CHECK-LABEL: LdOffset_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
-; CHECK-NEXT: ldrh w0, [x8, #7408]
+; CHECK-NEXT: mov w8, #48368 // =0xbcf0
+; CHECK-NEXT: movk w8, #31, lsl #16
+; CHECK-NEXT: ldrh w0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
@@ -290,8 +296,9 @@ define i16 @LdOffset_i16(ptr %a) {
define i32 @LdOffset_i16_zext32(ptr %a) {
; CHECK-LABEL: LdOffset_i16_zext32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
-; CHECK-NEXT: ldrh w0, [x8, #7408]
+; CHECK-NEXT: mov w8, #48368 // =0xbcf0
+; CHECK-NEXT: movk w8, #31, lsl #16
+; CHECK-NEXT: ldrh w0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
@@ -303,8 +310,9 @@ define i32 @LdOffset_i16_zext32(ptr %a) {
define i32 @LdOffset_i16_sext32(ptr %a) {
; CHECK-LABEL: LdOffset_i16_sext32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
-; CHECK-NEXT: ldrsh w0, [x8, #7408]
+; CHECK-NEXT: mov w8, #48368 // =0xbcf0
+; CHECK-NEXT: movk w8, #31, lsl #16
+; CHECK-NEXT: ldrsh w0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
@@ -316,8 +324,9 @@ define i32 @LdOffset_i16_sext32(ptr %a) {
define i64 @LdOffset_i16_zext64(ptr %a) {
; CHECK-LABEL: LdOffset_i16_zext64:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
-; CHECK-NEXT: ldrh w0, [x8, #7408]
+; CHECK-NEXT: mov w8, #48368 // =0xbcf0
+; CHECK-NEXT: movk w8, #31, lsl #16
+; CHECK-NEXT: ldrh w0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
@@ -329,8 +338,9 @@ define i64 @LdOffset_i16_zext64(ptr %a) {
define i64 @LdOffset_i16_sext64(ptr %a) {
; CHECK-LABEL: LdOffset_i16_sext64:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
-; CHECK-NEXT: ldrsh x0, [x8, #7408]
+; CHECK-NEXT: mov w8, #48368 // =0xbcf0
+; CHECK-NEXT: movk w8, #31, lsl #16
+; CHECK-NEXT: ldrsh x0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
@@ -342,8 +352,9 @@ define i64 @LdOffset_i16_sext64(ptr %a) {
define i32 @LdOffset_i32(ptr %a) {
; CHECK-LABEL: LdOffset_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
-; CHECK-NEXT: ldr w0, [x8, #14816]
+; CHECK-NEXT: mov w8, #31200 // =0x79e0
+; CHECK-NEXT: movk w8, #63, lsl #16
+; CHECK-NEXT: ldr w0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 4
@@ -354,8 +365,9 @@ define i32 @LdOffset_i32(ptr %a) {
define i64 @LdOffset_i32_zext64(ptr %a) {
; CHECK-LABEL: LdOffset_i32_zext64:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
-; CHECK-NEXT: ldr w0, [x8, #14816]
+; CHECK-NEXT: mov w8, #31200 // =0x79e0
+; CHECK-NEXT: movk w8, #63, lsl #16
+; CHECK-NEXT: ldr w0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 2
@@ -367,8 +379,9 @@ define i64 @LdOffset_i32_zext64(ptr %a) {
define i64 @LdOffset_i32_sext64(ptr %a) {
; CHECK-LABEL: LdOffset_i32_sext64:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
-; CHECK-NEXT: ldrsw x0, [x8, #14816]
+; CHECK-NEXT: mov w8, #31200 // =0x79e0
+; CHECK-NEXT: movk w8, #63, lsl #16
+; CHECK-NEXT: ldrsw x0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 2
@@ -380,8 +393,9 @@ define i64 @LdOffset_i32_sext64(ptr %a) {
define i64 @LdOffset_i64(ptr %a) {
; CHECK-LABEL: LdOffset_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
-; CHECK-NEXT: ldr x0, [x8, #29632]
+; CHECK-NEXT: mov w8, #62400 // =0xf3c0
+; CHECK-NEXT: movk w8, #126, lsl #16
+; CHECK-NEXT: ldr x0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
%val = load i64, ptr %arrayidx, align 4
@@ -392,8 +406,9 @@ define i64 @LdOffset_i64(ptr %a) {
define <2 x i32> @LdOffset_v2i32(ptr %a) {
; CHECK-LABEL: LdOffset_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
-; CHECK-NEXT: ldr d0, [x8, #29632]
+; CHECK-NEXT: mov w8, #62400 // =0xf3c0
+; CHECK-NEXT: movk w8, #126, lsl #16
+; CHECK-NEXT: ldr d0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds <2 x i32>, ptr %a, i64 1039992
%val = load <2 x i32>, ptr %arrayidx, align 4
@@ -404,8 +419,9 @@ define <2 x i32> @LdOffset_v2i32(ptr %a) {
define <2 x i64> @LdOffset_v2i64(ptr %a) {
; CHECK-LABEL: LdOffset_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #4048, lsl #12 // =16580608
-; CHECK-NEXT: ldr q0, [x8, #59264]
+; CHECK-NEXT: mov w8, #59264 // =0xe780
+; CHECK-NEXT: movk w8, #253, lsl #16
+; CHECK-NEXT: ldr q0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds <2 x i64>, ptr %a, i64 1039992
%val = load <2 x i64>, ptr %arrayidx, align 4
@@ -416,8 +432,9 @@ define <2 x i64> @LdOffset_v2i64(ptr %a) {
define double @LdOffset_i8_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i8_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
-; CHECK-NEXT: ldrsb w8, [x8, #3704]
+; CHECK-NEXT: mov w8, #56952 // =0xde78
+; CHECK-NEXT: movk w8, #15, lsl #16
+; CHECK-NEXT: ldrsb w8, [x0, x8]
; CHECK-NEXT: scvtf d0, w8
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
@@ -430,8 +447,9 @@ define double @LdOffset_i8_f64(ptr %a) {
define double @LdOffset_i16_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i16_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
-; CHECK-NEXT: ldrsh w8, [x8, #7408]
+; CHECK-NEXT: mov w8, #48368 // =0xbcf0
+; CHECK-NEXT: movk w8, #31, lsl #16
+; CHECK-NEXT: ldrsh w8, [x0, x8]
; CHECK-NEXT: scvtf d0, w8
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
@@ -444,8 +462,9 @@ define double @LdOffset_i16_f64(ptr %a) {
define double @LdOffset_i32_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i32_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
-; CHECK-NEXT: ldr s0, [x8, #14816]
+; CHECK-NEXT: mov w8, #31200 // =0x79e0
+; CHECK-NEXT: movk w8, #63, lsl #16
+; CHECK-NEXT: ldr s0, [x0, x8]
; CHECK-NEXT: ucvtf d0, d0
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
@@ -458,8 +477,9 @@ define double @LdOffset_i32_f64(ptr %a) {
define double @LdOffset_i64_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i64_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
-; CHECK-NEXT: ldr d0, [x8, #29632]
+; CHECK-NEXT: mov w8, #62400 // =0xf3c0
+; CHECK-NEXT: movk w8, #126, lsl #16
+; CHECK-NEXT: ldr d0, [x0, x8]
; CHECK-NEXT: scvtf d0, d0
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
diff --git a/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir b/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir
index 3bd8f83d27c2da..56405a2675f7ab 100644
--- a/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir
+++ b/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple aarch64 --run-pass=greedy,virtregrewriter -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple aarch64 -aarch64-enable-subreg-liveness-tracking --run-pass=greedy,virtregrewriter -verify-machineinstrs %s -o - | FileCheck %s
# We should ideally not spill around any of the SUBSWri in the loop exit blocks (if.end and if.end27).
@@ -219,8 +219,8 @@ body: |
; CHECK-NEXT: liveins: $w10, $w11, $x2, $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: STRXui renamable $x8, %stack.1, 0 :: (store (s64) into %stack.1)
- ; CHECK-NEXT: renamable $w9 = MOVi32imm 36, implicit-def $x9
- ; CHECK-NEXT: renamable $x8 = MADDXrrr killed renamable $x8, killed renamable $x9, $xzr
+ ; CHECK-NEXT: renamable $w9 = MOVi32imm 36
+ ; CHECK-NEXT: renamable $x8 = MADDXrrr killed renamable $x8, renamable $x9, $xzr
; CHECK-NEXT: renamable $x9 = MOVaddr target-flags(aarch64-page) @g, target-flags(aarch64-pageoff, aarch64-nc) @g
; CHECK-NEXT: renamable $w8 = LDRWroX killed renamable $x9, killed renamable $x8, 0, 0 :: (load (s32) from %ir.arrayidx9)
; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w8, 1, 0, implicit-def $nzcv
@@ -244,8 +244,8 @@ body: |
; CHECK-NEXT: successors: %bb.5(0x50000000), %bb.8(0x30000000)
; CHECK-NEXT: liveins: $w10, $w11, $x2, $x12
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = MOVi32imm 36, implicit-def $x8
- ; CHECK-NEXT: renamable $x8 = MADDXrrr renamable $x12, killed renamable $x8, $xzr
+ ; CHECK-NEXT: renamable $w8 = MOVi32imm 36
+ ; CHECK-NEXT: renamable $x8 = MADDXrrr renamable $x12, renamable $x8, $xzr
; CHECK-NEXT: renamable $x9 = MOVaddr target-flags(aarch64-page) @g, target-flags(aarch64-pageoff, aarch64-nc) @g
; CHECK-NEXT: renamable $w8 = LDRWroX killed renamable $x9, killed renamable $x8, 0, 0 :: (load (s32) from %ir.arrayidx14)
; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w8, 1, 0, implicit-def $nzcv
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
index e227f14542cc11..4206c0bc269915 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -aarch64-enable-subreg-liveness-tracking < %s | FileCheck %s
define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind noinline ssp {
; CHECK-LABEL: callee:
>From 34e12da10133cd818c5d9273059e30f8dda8a5e3 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Wed, 20 Nov 2024 12:23:19 +0000
Subject: [PATCH 2/6] [LiveIntervals] Ignore artificial regs when adding kill
flags
If parts of a physical register for a given liverange, as assigned by the
register allocator, can be used to store other values not represented by
this liverange, then `LiveIntervals::addKillFlags` normally avoids adding a
kill flag on the use of this register when the value's liverange ends.
However, if all the other regunits are artificial, then we can still safely
add the kill flag, since those parts of the register can never be accessed
independently.
---
llvm/lib/CodeGen/LiveIntervals.cpp | 38 ++++++-
llvm/test/CodeGen/AArch64/arm64-addrmode.ll | 100 +++++++-----------
.../CodeGen/AArch64/nested-iv-regalloc.mir | 4 +-
.../AArch64/preserve_nonecc_varargs_darwin.ll | 10 +-
4 files changed, 84 insertions(+), 68 deletions(-)
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index a0b6bf445fa8af..18059a1d384580 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -710,6 +710,30 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
// Register allocator hooks.
//
+/// Returns true if the physreg has multiple regunits that can be accessed
+/// as independent registers.
+///
+/// Returns 'true' for e.g.:
+/// gpr64_0_gpr64_1
+// => two independently accessible registers gpr64_0 and gpr64_1.
+///
+/// Returns 'false' for e.g.:
+/// gpr64_0: => accessible register, reads/writes 64bits
+/// gpr32_0: => accessible sub-regsiter of gpr64_0, reads/writes 32bits
+// gpr32_0_hi => top 32bits of gpr64_0, not independently accessible.
+static bool hasMultipleAddressableRegUnits(const TargetRegisterInfo *TRI,
+ MCPhysReg PhysReg) {
+ unsigned NumAddressableRegUnits = 0;
+ for (MCRegUnit U : TRI->regunits(PhysReg)) {
+ for (MCRegUnitRootIterator RI(U, TRI); RI.isValid(); ++RI)
+ if (!TRI->isArtificial(*RI) && TRI->isInAllocatableClass(*RI))
+ NumAddressableRegUnits++;
+ if (NumAddressableRegUnits > 1)
+ return true;
+ }
+ return false;
+}
+
void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// Keep track of regunit ranges.
SmallVector<std::pair<const LiveRange*, LiveRange::const_iterator>, 8> RU;
@@ -736,6 +760,18 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
continue;
RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
}
+
+ // If parts of a physical register for a given liverange, as assigned by the
+ // register allocator, can be used to store other values not represented by
+ // this liverange, then `LiveIntervals::addKillFlags` normally avoids adding
+ // a kill flag on the use of this register when the value's liverange ends.
+ //
+ // However, if all the other regunits are artificial, then we can still
+ // safely add the kill flag, since those parts of the register can never be
+ // accessed independently.
+ bool AssumeOtherUnitsCanBeUsed =
+ hasMultipleAddressableRegUnits(TRI, PhysReg);
+
// Every instruction that kills Reg corresponds to a segment range end
// point.
for (LiveInterval::const_iterator RI = LI.begin(), RE = LI.end(); RI != RE;
@@ -780,7 +816,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// are actually never written by %2. After assignment the <kill>
// flag at the read instruction is invalid.
LaneBitmask DefinedLanesMask;
- if (LI.hasSubRanges()) {
+ if (LI.hasSubRanges() && AssumeOtherUnitsCanBeUsed) {
// Compute a mask of lanes that are defined.
DefinedLanesMask = LaneBitmask::getNone();
for (const LiveInterval::SubRange &SR : LI.subranges())
diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
index cb00272a7d022c..f8695b62619c09 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -214,9 +214,8 @@ define void @t17(i64 %a) {
define i8 @LdOffset_i8(ptr %a) {
; CHECK-LABEL: LdOffset_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #56952 // =0xde78
-; CHECK-NEXT: movk w8, #15, lsl #16
-; CHECK-NEXT: ldrb w0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT: ldrb w0, [x8, #3704]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
@@ -227,9 +226,8 @@ define i8 @LdOffset_i8(ptr %a) {
define i32 @LdOffset_i8_zext32(ptr %a) {
; CHECK-LABEL: LdOffset_i8_zext32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #56952 // =0xde78
-; CHECK-NEXT: movk w8, #15, lsl #16
-; CHECK-NEXT: ldrb w0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT: ldrb w0, [x8, #3704]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
@@ -241,9 +239,8 @@ define i32 @LdOffset_i8_zext32(ptr %a) {
define i32 @LdOffset_i8_sext32(ptr %a) {
; CHECK-LABEL: LdOffset_i8_sext32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #56952 // =0xde78
-; CHECK-NEXT: movk w8, #15, lsl #16
-; CHECK-NEXT: ldrsb w0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT: ldrsb w0, [x8, #3704]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
@@ -255,9 +252,8 @@ define i32 @LdOffset_i8_sext32(ptr %a) {
define i64 @LdOffset_i8_zext64(ptr %a) {
; CHECK-LABEL: LdOffset_i8_zext64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #56952 // =0xde78
-; CHECK-NEXT: movk w8, #15, lsl #16
-; CHECK-NEXT: ldrb w0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT: ldrb w0, [x8, #3704]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
@@ -269,9 +265,8 @@ define i64 @LdOffset_i8_zext64(ptr %a) {
define i64 @LdOffset_i8_sext64(ptr %a) {
; CHECK-LABEL: LdOffset_i8_sext64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #56952 // =0xde78
-; CHECK-NEXT: movk w8, #15, lsl #16
-; CHECK-NEXT: ldrsb x0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT: ldrsb x0, [x8, #3704]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
@@ -283,9 +278,8 @@ define i64 @LdOffset_i8_sext64(ptr %a) {
define i16 @LdOffset_i16(ptr %a) {
; CHECK-LABEL: LdOffset_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #48368 // =0xbcf0
-; CHECK-NEXT: movk w8, #31, lsl #16
-; CHECK-NEXT: ldrh w0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT: ldrh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
@@ -296,9 +290,8 @@ define i16 @LdOffset_i16(ptr %a) {
define i32 @LdOffset_i16_zext32(ptr %a) {
; CHECK-LABEL: LdOffset_i16_zext32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #48368 // =0xbcf0
-; CHECK-NEXT: movk w8, #31, lsl #16
-; CHECK-NEXT: ldrh w0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT: ldrh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
@@ -310,9 +303,8 @@ define i32 @LdOffset_i16_zext32(ptr %a) {
define i32 @LdOffset_i16_sext32(ptr %a) {
; CHECK-LABEL: LdOffset_i16_sext32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #48368 // =0xbcf0
-; CHECK-NEXT: movk w8, #31, lsl #16
-; CHECK-NEXT: ldrsh w0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT: ldrsh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
@@ -324,9 +316,8 @@ define i32 @LdOffset_i16_sext32(ptr %a) {
define i64 @LdOffset_i16_zext64(ptr %a) {
; CHECK-LABEL: LdOffset_i16_zext64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #48368 // =0xbcf0
-; CHECK-NEXT: movk w8, #31, lsl #16
-; CHECK-NEXT: ldrh w0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT: ldrh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
@@ -338,9 +329,8 @@ define i64 @LdOffset_i16_zext64(ptr %a) {
define i64 @LdOffset_i16_sext64(ptr %a) {
; CHECK-LABEL: LdOffset_i16_sext64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #48368 // =0xbcf0
-; CHECK-NEXT: movk w8, #31, lsl #16
-; CHECK-NEXT: ldrsh x0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT: ldrsh x0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
@@ -352,9 +342,8 @@ define i64 @LdOffset_i16_sext64(ptr %a) {
define i32 @LdOffset_i32(ptr %a) {
; CHECK-LABEL: LdOffset_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #31200 // =0x79e0
-; CHECK-NEXT: movk w8, #63, lsl #16
-; CHECK-NEXT: ldr w0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
+; CHECK-NEXT: ldr w0, [x8, #14816]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 4
@@ -365,9 +354,8 @@ define i32 @LdOffset_i32(ptr %a) {
define i64 @LdOffset_i32_zext64(ptr %a) {
; CHECK-LABEL: LdOffset_i32_zext64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #31200 // =0x79e0
-; CHECK-NEXT: movk w8, #63, lsl #16
-; CHECK-NEXT: ldr w0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
+; CHECK-NEXT: ldr w0, [x8, #14816]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 2
@@ -379,9 +367,8 @@ define i64 @LdOffset_i32_zext64(ptr %a) {
define i64 @LdOffset_i32_sext64(ptr %a) {
; CHECK-LABEL: LdOffset_i32_sext64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #31200 // =0x79e0
-; CHECK-NEXT: movk w8, #63, lsl #16
-; CHECK-NEXT: ldrsw x0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
+; CHECK-NEXT: ldrsw x0, [x8, #14816]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 2
@@ -393,9 +380,8 @@ define i64 @LdOffset_i32_sext64(ptr %a) {
define i64 @LdOffset_i64(ptr %a) {
; CHECK-LABEL: LdOffset_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #62400 // =0xf3c0
-; CHECK-NEXT: movk w8, #126, lsl #16
-; CHECK-NEXT: ldr x0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
+; CHECK-NEXT: ldr x0, [x8, #29632]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
%val = load i64, ptr %arrayidx, align 4
@@ -406,9 +392,8 @@ define i64 @LdOffset_i64(ptr %a) {
define <2 x i32> @LdOffset_v2i32(ptr %a) {
; CHECK-LABEL: LdOffset_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #62400 // =0xf3c0
-; CHECK-NEXT: movk w8, #126, lsl #16
-; CHECK-NEXT: ldr d0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
+; CHECK-NEXT: ldr d0, [x8, #29632]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds <2 x i32>, ptr %a, i64 1039992
%val = load <2 x i32>, ptr %arrayidx, align 4
@@ -419,9 +404,8 @@ define <2 x i32> @LdOffset_v2i32(ptr %a) {
define <2 x i64> @LdOffset_v2i64(ptr %a) {
; CHECK-LABEL: LdOffset_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #59264 // =0xe780
-; CHECK-NEXT: movk w8, #253, lsl #16
-; CHECK-NEXT: ldr q0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #4048, lsl #12 // =16580608
+; CHECK-NEXT: ldr q0, [x8, #59264]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds <2 x i64>, ptr %a, i64 1039992
%val = load <2 x i64>, ptr %arrayidx, align 4
@@ -432,9 +416,8 @@ define <2 x i64> @LdOffset_v2i64(ptr %a) {
define double @LdOffset_i8_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i8_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #56952 // =0xde78
-; CHECK-NEXT: movk w8, #15, lsl #16
-; CHECK-NEXT: ldrsb w8, [x0, x8]
+; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT: ldrsb w8, [x8, #3704]
; CHECK-NEXT: scvtf d0, w8
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
@@ -447,9 +430,8 @@ define double @LdOffset_i8_f64(ptr %a) {
define double @LdOffset_i16_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i16_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #48368 // =0xbcf0
-; CHECK-NEXT: movk w8, #31, lsl #16
-; CHECK-NEXT: ldrsh w8, [x0, x8]
+; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT: ldrsh w8, [x8, #7408]
; CHECK-NEXT: scvtf d0, w8
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
@@ -462,9 +444,8 @@ define double @LdOffset_i16_f64(ptr %a) {
define double @LdOffset_i32_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i32_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #31200 // =0x79e0
-; CHECK-NEXT: movk w8, #63, lsl #16
-; CHECK-NEXT: ldr s0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
+; CHECK-NEXT: ldr s0, [x8, #14816]
; CHECK-NEXT: ucvtf d0, d0
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
@@ -477,9 +458,8 @@ define double @LdOffset_i32_f64(ptr %a) {
define double @LdOffset_i64_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i64_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #62400 // =0xf3c0
-; CHECK-NEXT: movk w8, #126, lsl #16
-; CHECK-NEXT: ldr d0, [x0, x8]
+; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
+; CHECK-NEXT: ldr d0, [x8, #29632]
; CHECK-NEXT: scvtf d0, d0
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
diff --git a/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir b/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir
index 56405a2675f7ab..ff29c78b5a0ce5 100644
--- a/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir
+++ b/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir
@@ -220,7 +220,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: STRXui renamable $x8, %stack.1, 0 :: (store (s64) into %stack.1)
; CHECK-NEXT: renamable $w9 = MOVi32imm 36
- ; CHECK-NEXT: renamable $x8 = MADDXrrr killed renamable $x8, renamable $x9, $xzr
+ ; CHECK-NEXT: renamable $x8 = MADDXrrr killed renamable $x8, killed renamable $x9, $xzr
; CHECK-NEXT: renamable $x9 = MOVaddr target-flags(aarch64-page) @g, target-flags(aarch64-pageoff, aarch64-nc) @g
; CHECK-NEXT: renamable $w8 = LDRWroX killed renamable $x9, killed renamable $x8, 0, 0 :: (load (s32) from %ir.arrayidx9)
; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w8, 1, 0, implicit-def $nzcv
@@ -245,7 +245,7 @@ body: |
; CHECK-NEXT: liveins: $w10, $w11, $x2, $x12
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = MOVi32imm 36
- ; CHECK-NEXT: renamable $x8 = MADDXrrr renamable $x12, renamable $x8, $xzr
+ ; CHECK-NEXT: renamable $x8 = MADDXrrr renamable $x12, killed renamable $x8, $xzr
; CHECK-NEXT: renamable $x9 = MOVaddr target-flags(aarch64-page) @g, target-flags(aarch64-pageoff, aarch64-nc) @g
; CHECK-NEXT: renamable $w8 = LDRWroX killed renamable $x9, killed renamable $x8, 0, 0 :: (load (s32) from %ir.arrayidx14)
; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w8, 1, 0, implicit-def $nzcv
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
index 4206c0bc269915..2a77d4dd33fe53 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
@@ -27,12 +27,11 @@ define i32 @caller() nounwind ssp {
; CHECK-NEXT: sub sp, sp, #208
; CHECK-NEXT: mov w8, #10 ; =0xa
; CHECK-NEXT: mov w9, #9 ; =0x9
-; CHECK-NEXT: mov w0, #1 ; =0x1
+; CHECK-NEXT: mov w10, #8 ; =0x8
; CHECK-NEXT: stp x9, x8, [sp, #24]
-; CHECK-NEXT: mov w8, #8 ; =0x8
-; CHECK-NEXT: mov w9, #6 ; =0x6
-; CHECK-NEXT: str x8, [sp, #16]
; CHECK-NEXT: mov w8, #7 ; =0x7
+; CHECK-NEXT: mov w9, #6 ; =0x6
+; CHECK-NEXT: mov w0, #1 ; =0x1
; CHECK-NEXT: mov w1, #2 ; =0x2
; CHECK-NEXT: mov w2, #3 ; =0x3
; CHECK-NEXT: mov w3, #4 ; =0x4
@@ -47,7 +46,8 @@ define i32 @caller() nounwind ssp {
; CHECK-NEXT: stp x22, x21, [sp, #160] ; 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #176] ; 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #192] ; 16-byte Folded Spill
-; CHECK-NEXT: stp x9, x8, [sp]
+; CHECK-NEXT: stp x8, x10, [sp, #8]
+; CHECK-NEXT: str x9, [sp]
; CHECK-NEXT: bl _callee
; CHECK-NEXT: ldp x29, x30, [sp, #192] ; 16-byte Folded Reload
; CHECK-NEXT: ldp x20, x19, [sp, #176] ; 16-byte Folded Reload
>From d7dc6b4eaa6f4992f3aa7dafb64e60fcce7a3ada Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 3 Dec 2024 15:49:26 +0000
Subject: [PATCH 3/6] Precompute subreg info with TableGen
---
.../include/llvm/CodeGen/TargetRegisterInfo.h | 13 ++++++
llvm/lib/CodeGen/LiveIntervals.cpp | 41 ++-----------------
llvm/utils/TableGen/RegisterInfoEmitter.cpp | 32 ++++++++++++++-
3 files changed, 48 insertions(+), 38 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 292fa3c94969be..953a5ca648ef98 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -745,6 +745,15 @@ class TargetRegisterInfo : public MCRegisterInfo {
return reverseComposeSubRegIndexLaneMaskImpl(IdxA, LaneMask);
}
+ /// Returns the number of allocatable sub registers for R, which is the
+ /// number of register units that are not artificial and part of an
+ /// allocatable register class. For a register like D0_D1, which consists of
+ /// D0 and D1, this function would return '2'. For an architecture where
+ /// D0=S0_S1 and D1=S2_S3, this would return '4' for S0, S1, S2, S3.
+ unsigned getNumAllocatableSubRegs(MCPhysReg R) const {
+ return getNumAllocatableSubRegsImpl(R);
+ }
+
/// Debugging helper: dump register in human readable form to dbgs() stream.
static void dumpReg(Register Reg, unsigned SubRegIndex = 0,
const TargetRegisterInfo *TRI = nullptr);
@@ -774,6 +783,10 @@ class TargetRegisterInfo : public MCRegisterInfo {
llvm_unreachable("Target has no sub-registers");
}
+ virtual unsigned getNumAllocatableSubRegsImpl(MCPhysReg) const {
+ llvm_unreachable("Target has no sub-registers");
+ }
+
/// Return the register cost table index. This implementation is sufficient
/// for most architectures and can be overriden by targets in case there are
/// multiple cost values associated with each register.
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 18059a1d384580..d8acdb0c2c9e6d 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -710,30 +710,6 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
// Register allocator hooks.
//
-/// Returns true if the physreg has multiple regunits that can be accessed
-/// as independent registers.
-///
-/// Returns 'true' for e.g.:
-/// gpr64_0_gpr64_1
-// => two independently accessible registers gpr64_0 and gpr64_1.
-///
-/// Returns 'false' for e.g.:
-/// gpr64_0: => accessible register, reads/writes 64bits
-/// gpr32_0: => accessible sub-regsiter of gpr64_0, reads/writes 32bits
-// gpr32_0_hi => top 32bits of gpr64_0, not independently accessible.
-static bool hasMultipleAddressableRegUnits(const TargetRegisterInfo *TRI,
- MCPhysReg PhysReg) {
- unsigned NumAddressableRegUnits = 0;
- for (MCRegUnit U : TRI->regunits(PhysReg)) {
- for (MCRegUnitRootIterator RI(U, TRI); RI.isValid(); ++RI)
- if (!TRI->isArtificial(*RI) && TRI->isInAllocatableClass(*RI))
- NumAddressableRegUnits++;
- if (NumAddressableRegUnits > 1)
- return true;
- }
- return false;
-}
-
void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// Keep track of regunit ranges.
SmallVector<std::pair<const LiveRange*, LiveRange::const_iterator>, 8> RU;
@@ -760,18 +736,6 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
continue;
RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
}
-
- // If parts of a physical register for a given liverange, as assigned by the
- // register allocator, can be used to store other values not represented by
- // this liverange, then `LiveIntervals::addKillFlags` normally avoids adding
- // a kill flag on the use of this register when the value's liverange ends.
- //
- // However, if all the other regunits are artificial, then we can still
- // safely add the kill flag, since those parts of the register can never be
- // accessed independently.
- bool AssumeOtherUnitsCanBeUsed =
- hasMultipleAddressableRegUnits(TRI, PhysReg);
-
// Every instruction that kills Reg corresponds to a segment range end
// point.
for (LiveInterval::const_iterator RI = LI.begin(), RE = LI.end(); RI != RE;
@@ -806,6 +770,9 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
if (MRI->subRegLivenessEnabled()) {
// When reading a partial undefined value we must not add a kill flag.
// The regalloc might have used the undef lane for something else.
+ // If the register consists of a single allocatable subreg, then
+ // we can assume the other (undef) lanes cannot be used.
+ //
// Example:
// %1 = ... ; R32: %1
// %2:high16 = ... ; R64: %2
@@ -816,7 +783,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// are actually never written by %2. After assignment the <kill>
// flag at the read instruction is invalid.
LaneBitmask DefinedLanesMask;
- if (LI.hasSubRanges() && AssumeOtherUnitsCanBeUsed) {
+ if (LI.hasSubRanges() && TRI->getNumAllocatableSubRegs(PhysReg) > 1) {
// Compute a mask of lanes that are defined.
DefinedLanesMask = LaneBitmask::getNone();
for (const LiveInterval::SubRange &SR : LI.subranges())
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index a6f87119aca5ba..7b9257fb610e15 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -93,6 +93,8 @@ class RegisterInfoEmitter {
void EmitRegUnitPressure(raw_ostream &OS, StringRef ClassName);
void emitComposeSubRegIndices(raw_ostream &OS, StringRef ClassName);
void emitComposeSubRegIndexLaneMask(raw_ostream &OS, StringRef ClassName);
+ void emitNumAllocatableSubRegs(raw_ostream &OS, StringRef ClassName,
+ llvm::BitVector &InAllocClass);
};
} // end anonymous namespace
@@ -677,6 +679,31 @@ static bool combine(const CodeGenSubRegIndex *Idx,
return true;
}
+void RegisterInfoEmitter::emitNumAllocatableSubRegs(
+ raw_ostream &OS, StringRef ClassName, llvm::BitVector &InAllocClass) {
+ OS << "unsigned " << ClassName
+ << "::getNumAllocatableSubRegsImpl(MCPhysReg R) const {\n";
+ OS << " static unsigned numAllocatableSubRegsMap[] = { \n";
+ OS << " 0, // NoRegister\n";
+ const auto &Regs = RegBank.getRegisters();
+ for (auto [I, R] : llvm::enumerate(Regs)) {
+ unsigned NumAllocatableSubRegs = 0;
+ for (unsigned U : R.getRegUnits()) {
+ for (const CodeGenRegister *UR : RegBank.getRegUnit(U).getRoots())
+ if (!UR->Artificial && InAllocClass[UR->EnumValue])
+ NumAllocatableSubRegs++;
+ }
+ OS << " " << NumAllocatableSubRegs;
+ if (I < Regs.size() - 1)
+ OS << ",";
+ OS << " // " << R.getName() << "\n";
+ }
+ OS << " };\n";
+ OS << " assert(R <= " << Regs.size() << " && \"Unexpected physreg\");\n";
+ OS << " return numAllocatableSubRegsMap[R];\n";
+ OS << "};\n";
+}
+
void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
StringRef ClassName) {
const auto &SubRegIndices = RegBank.getSubRegIndices();
@@ -1122,7 +1149,9 @@ void RegisterInfoEmitter::runTargetHeader(raw_ostream &OS) {
<< " const TargetRegisterClass *getSubClassWithSubReg"
<< "(const TargetRegisterClass *, unsigned) const override;\n"
<< " const TargetRegisterClass *getSubRegisterClass"
- << "(const TargetRegisterClass *, unsigned) const override;\n";
+ << "(const TargetRegisterClass *, unsigned) const override;\n"
+ << " unsigned getNumAllocatableSubRegsImpl(MCPhysReg) const "
+ "override;\n";
}
OS << " const RegClassWeight &getRegClassWeight("
<< "const TargetRegisterClass *RC) const override;\n"
@@ -1483,6 +1512,7 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
if (!SubRegIndices.empty()) {
emitComposeSubRegIndices(OS, ClassName);
emitComposeSubRegIndexLaneMask(OS, ClassName);
+ emitNumAllocatableSubRegs(OS, ClassName, InAllocClass);
}
if (!SubRegIndices.empty()) {
>From 19504257275303b44149b9bc353189b9c1aee935 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Wed, 4 Dec 2024 11:16:45 +0000
Subject: [PATCH 4/6] Use MCRegUnitMaskIterator instead
---
.../include/llvm/CodeGen/TargetRegisterInfo.h | 13 --------
llvm/lib/CodeGen/LiveIntervals.cpp | 22 +++++++++----
llvm/utils/TableGen/RegisterInfoEmitter.cpp | 32 +------------------
3 files changed, 17 insertions(+), 50 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 953a5ca648ef98..292fa3c94969be 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -745,15 +745,6 @@ class TargetRegisterInfo : public MCRegisterInfo {
return reverseComposeSubRegIndexLaneMaskImpl(IdxA, LaneMask);
}
- /// Returns the number of allocatable sub registers for R, which is the
- /// number of register units that are not artificial and part of an
- /// allocatable register class. For a register like D0_D1, which consists of
- /// D0 and D1, this function would return '2'. For an architecture where
- /// D0=S0_S1 and D1=S2_S3, this would return '4' for S0, S1, S2, S3.
- unsigned getNumAllocatableSubRegs(MCPhysReg R) const {
- return getNumAllocatableSubRegsImpl(R);
- }
-
/// Debugging helper: dump register in human readable form to dbgs() stream.
static void dumpReg(Register Reg, unsigned SubRegIndex = 0,
const TargetRegisterInfo *TRI = nullptr);
@@ -783,10 +774,6 @@ class TargetRegisterInfo : public MCRegisterInfo {
llvm_unreachable("Target has no sub-registers");
}
- virtual unsigned getNumAllocatableSubRegsImpl(MCPhysReg) const {
- llvm_unreachable("Target has no sub-registers");
- }
-
/// Return the register cost table index. This implementation is sufficient
/// for most architectures and can be overriden by targets in case there are
/// multiple cost values associated with each register.
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index d8acdb0c2c9e6d..1d78872d8a0a30 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -730,7 +730,16 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// Find the regunit intervals for the assigned register. They may overlap
// the virtual register live range, cancelling any kills.
RU.clear();
- for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LaneBitmask ArtificialLanes = LaneBitmask::getNone();
+ for (MCRegUnitMaskIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
+ auto [Unit, Bitmask] = *UI;
+ // Record lane mask for all artificial RegUnits for this physreg.
+ for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
+ if (TRI->isArtificial(*Root)) {
+ ArtificialLanes |= Bitmask;
+ break;
+ }
+ }
const LiveRange &RURange = getRegUnit(Unit);
if (RURange.empty())
continue;
@@ -770,9 +779,6 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
if (MRI->subRegLivenessEnabled()) {
// When reading a partial undefined value we must not add a kill flag.
// The regalloc might have used the undef lane for something else.
- // If the register consists of a single allocatable subreg, then
- // we can assume the other (undef) lanes cannot be used.
- //
// Example:
// %1 = ... ; R32: %1
// %2:high16 = ... ; R64: %2
@@ -783,9 +789,13 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// are actually never written by %2. After assignment the <kill>
// flag at the read instruction is invalid.
LaneBitmask DefinedLanesMask;
- if (LI.hasSubRanges() && TRI->getNumAllocatableSubRegs(PhysReg) > 1) {
+ if (LI.hasSubRanges()) {
// Compute a mask of lanes that are defined.
- DefinedLanesMask = LaneBitmask::getNone();
+ // Artificial regunits are not independently allocatable so the
+ // register allocator cannot have used them to represent any other
+ // values. That's why we mark them as 'defined' here, as this
+ // otherwise prevents kill flags from being added.
+ DefinedLanesMask = ArtificialLanes;
for (const LiveInterval::SubRange &SR : LI.subranges())
for (const LiveRange::Segment &Segment : SR.segments) {
if (Segment.start >= RI->end)
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 7b9257fb610e15..a6f87119aca5ba 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -93,8 +93,6 @@ class RegisterInfoEmitter {
void EmitRegUnitPressure(raw_ostream &OS, StringRef ClassName);
void emitComposeSubRegIndices(raw_ostream &OS, StringRef ClassName);
void emitComposeSubRegIndexLaneMask(raw_ostream &OS, StringRef ClassName);
- void emitNumAllocatableSubRegs(raw_ostream &OS, StringRef ClassName,
- llvm::BitVector &InAllocClass);
};
} // end anonymous namespace
@@ -679,31 +677,6 @@ static bool combine(const CodeGenSubRegIndex *Idx,
return true;
}
-void RegisterInfoEmitter::emitNumAllocatableSubRegs(
- raw_ostream &OS, StringRef ClassName, llvm::BitVector &InAllocClass) {
- OS << "unsigned " << ClassName
- << "::getNumAllocatableSubRegsImpl(MCPhysReg R) const {\n";
- OS << " static unsigned numAllocatableSubRegsMap[] = { \n";
- OS << " 0, // NoRegister\n";
- const auto &Regs = RegBank.getRegisters();
- for (auto [I, R] : llvm::enumerate(Regs)) {
- unsigned NumAllocatableSubRegs = 0;
- for (unsigned U : R.getRegUnits()) {
- for (const CodeGenRegister *UR : RegBank.getRegUnit(U).getRoots())
- if (!UR->Artificial && InAllocClass[UR->EnumValue])
- NumAllocatableSubRegs++;
- }
- OS << " " << NumAllocatableSubRegs;
- if (I < Regs.size() - 1)
- OS << ",";
- OS << " // " << R.getName() << "\n";
- }
- OS << " };\n";
- OS << " assert(R <= " << Regs.size() << " && \"Unexpected physreg\");\n";
- OS << " return numAllocatableSubRegsMap[R];\n";
- OS << "};\n";
-}
-
void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
StringRef ClassName) {
const auto &SubRegIndices = RegBank.getSubRegIndices();
@@ -1149,9 +1122,7 @@ void RegisterInfoEmitter::runTargetHeader(raw_ostream &OS) {
<< " const TargetRegisterClass *getSubClassWithSubReg"
<< "(const TargetRegisterClass *, unsigned) const override;\n"
<< " const TargetRegisterClass *getSubRegisterClass"
- << "(const TargetRegisterClass *, unsigned) const override;\n"
- << " unsigned getNumAllocatableSubRegsImpl(MCPhysReg) const "
- "override;\n";
+ << "(const TargetRegisterClass *, unsigned) const override;\n";
}
OS << " const RegClassWeight &getRegClassWeight("
<< "const TargetRegisterClass *RC) const override;\n"
@@ -1512,7 +1483,6 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
if (!SubRegIndices.empty()) {
emitComposeSubRegIndices(OS, ClassName);
emitComposeSubRegIndexLaneMask(OS, ClassName);
- emitNumAllocatableSubRegs(OS, ClassName, InAllocClass);
}
if (!SubRegIndices.empty()) {
>From 5e5c20cfdb653ac7da2b17b46eab9dc694d558b7 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Wed, 4 Dec 2024 15:17:39 +0000
Subject: [PATCH 5/6] NFC: Move check for artificial regunit to
MachineRegisterInfo
---
llvm/include/llvm/CodeGen/MachineRegisterInfo.h | 6 ++++++
llvm/lib/CodeGen/LiveIntervals.cpp | 8 ++------
llvm/lib/CodeGen/MachineRegisterInfo.cpp | 8 ++++++++
3 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 5dc51aaed81c7b..9c13c253b97e72 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -980,6 +980,12 @@ class MachineRegisterInfo {
/// expected.
bool isReservedRegUnit(unsigned Unit) const;
+ /// Returns true when the given register unit is considered artificial.
+ ///
+ /// Register units are considered artificial when at least one of the
+ /// root registers is artificial.
+ bool isArtificialRegUnit(unsigned Unit) const;
+
/// isAllocatable - Returns true when PhysReg belongs to an allocatable
/// register class and it hasn't been reserved.
///
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 1d78872d8a0a30..2f4759cdbc3205 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -734,12 +734,8 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
for (MCRegUnitMaskIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
auto [Unit, Bitmask] = *UI;
// Record lane mask for all artificial RegUnits for this physreg.
- for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
- if (TRI->isArtificial(*Root)) {
- ArtificialLanes |= Bitmask;
- break;
- }
- }
+ if (MRI->isArtificialRegUnit(Unit))
+ ArtificialLanes |= Bitmask;
const LiveRange &RURange = getRegUnit(Unit);
if (RURange.empty())
continue;
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index fcedb302d228c4..c6c53c69c50372 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -659,3 +659,11 @@ bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const {
}
return false;
}
+
+bool MachineRegisterInfo::isArtificialRegUnit(unsigned Unit) const {
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root)
+ if (TRI->isArtificial(*Root))
+ return true;
+ return false;
+}
>From 8c3441b5a844d53896d0ddae02cf1b9b52871607 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Wed, 4 Dec 2024 17:01:11 +0000
Subject: [PATCH 6/6] Move isArtificialRegUnit to MCRegisterInfo
---
llvm/include/llvm/CodeGen/MachineRegisterInfo.h | 6 ------
llvm/include/llvm/MC/MCRegisterInfo.h | 5 +++++
llvm/lib/CodeGen/LiveIntervals.cpp | 2 +-
llvm/lib/CodeGen/MachineRegisterInfo.cpp | 8 --------
llvm/lib/MC/MCRegisterInfo.cpp | 7 +++++++
5 files changed, 13 insertions(+), 15 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 9c13c253b97e72..5dc51aaed81c7b 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -980,12 +980,6 @@ class MachineRegisterInfo {
/// expected.
bool isReservedRegUnit(unsigned Unit) const;
- /// Returns true when the given register unit is considered artificial.
- ///
- /// Register units are considered artificial when at least one of the
- /// root registers is artificial.
- bool isArtificialRegUnit(unsigned Unit) const;
-
/// isAllocatable - Returns true when PhysReg belongs to an allocatable
/// register class and it hasn't been reserved.
///
diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h
index 73f29d0f521edf..164ef1ef44bbba 100644
--- a/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -404,6 +404,11 @@ class MCRegisterInfo {
/// be modelled, such as the top 16-bits of a 32-bit GPR.
bool isArtificial(MCRegister RegNo) const { return get(RegNo).IsArtificial; }
+ /// Returns true when the given register unit is considered artificial.
+ /// Register units are considered artificial when at least one of the
+ /// root registers is artificial.
+ bool isArtificialRegUnit(MCRegUnit Unit) const;
+
/// Return the number of registers this target has (useful for
/// sizing arrays holding per register information)
unsigned getNumRegs() const {
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 2f4759cdbc3205..f69c71cd021034 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -734,7 +734,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
for (MCRegUnitMaskIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
auto [Unit, Bitmask] = *UI;
// Record lane mask for all artificial RegUnits for this physreg.
- if (MRI->isArtificialRegUnit(Unit))
+ if (TRI->isArtificialRegUnit(Unit))
ArtificialLanes |= Bitmask;
const LiveRange &RURange = getRegUnit(Unit);
if (RURange.empty())
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index c6c53c69c50372..fcedb302d228c4 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -659,11 +659,3 @@ bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const {
}
return false;
}
-
-bool MachineRegisterInfo::isArtificialRegUnit(unsigned Unit) const {
- const TargetRegisterInfo *TRI = getTargetRegisterInfo();
- for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root)
- if (TRI->isArtificial(*Root))
- return true;
- return false;
-}
diff --git a/llvm/lib/MC/MCRegisterInfo.cpp b/llvm/lib/MC/MCRegisterInfo.cpp
index 178b1d21e5200a..4a9bacdbc8fc47 100644
--- a/llvm/lib/MC/MCRegisterInfo.cpp
+++ b/llvm/lib/MC/MCRegisterInfo.cpp
@@ -220,3 +220,10 @@ bool MCRegisterInfo::regsOverlap(MCRegister RegA, MCRegister RegB) const {
} while (*IA < *IB ? ++IA != EA : ++IB != EB);
return false;
}
+
+bool MCRegisterInfo::isArtificialRegUnit(unsigned Unit) const {
+ for (MCRegUnitRootIterator Root(Unit, this); Root.isValid(); ++Root)
+ if (isArtificial(*Root))
+ return true;
+ return false;
+}
More information about the llvm-commits
mailing list