[llvm] [LiveIntervals] Ignore artificial regs when adding kill flags (PR #116963)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 3 07:59:24 PST 2024


https://github.com/sdesmalen-arm updated https://github.com/llvm/llvm-project/pull/116963

>From 7dbc99897a25ca370e222097342bf1dc312676f3 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Wed, 20 Nov 2024 12:07:45 +0000
Subject: [PATCH 1/3] Precommit tests

---
 llvm/test/CodeGen/AArch64/arm64-addrmode.ll   | 102 +++++++++++-------
 .../CodeGen/AArch64/nested-iv-regalloc.mir    |  10 +-
 .../AArch64/preserve_nonecc_varargs_darwin.ll |   2 +-
 3 files changed, 67 insertions(+), 47 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
index bfef61abd8c129..cb00272a7d022c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=arm64-eabi < %s | FileCheck %s
+; RUN: llc -aarch64-enable-subreg-liveness-tracking -mtriple=arm64-eabi < %s | FileCheck %s
 ; rdar://10232252
 
 @object = external hidden global i64, section "__DATA, __objc_ivar", align 8
@@ -214,8 +214,9 @@ define void @t17(i64 %a) {
 define i8 @LdOffset_i8(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
-; CHECK-NEXT:    ldrb w0, [x8, #3704]
+; CHECK-NEXT:    mov w8, #56952 // =0xde78
+; CHECK-NEXT:    movk w8, #15, lsl #16
+; CHECK-NEXT:    ldrb w0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -226,8 +227,9 @@ define i8 @LdOffset_i8(ptr %a)  {
 define i32 @LdOffset_i8_zext32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_zext32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
-; CHECK-NEXT:    ldrb w0, [x8, #3704]
+; CHECK-NEXT:    mov w8, #56952 // =0xde78
+; CHECK-NEXT:    movk w8, #15, lsl #16
+; CHECK-NEXT:    ldrb w0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -239,8 +241,9 @@ define i32 @LdOffset_i8_zext32(ptr %a)  {
 define i32 @LdOffset_i8_sext32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_sext32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
-; CHECK-NEXT:    ldrsb w0, [x8, #3704]
+; CHECK-NEXT:    mov w8, #56952 // =0xde78
+; CHECK-NEXT:    movk w8, #15, lsl #16
+; CHECK-NEXT:    ldrsb w0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -252,8 +255,9 @@ define i32 @LdOffset_i8_sext32(ptr %a)  {
 define i64 @LdOffset_i8_zext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_zext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
-; CHECK-NEXT:    ldrb w0, [x8, #3704]
+; CHECK-NEXT:    mov w8, #56952 // =0xde78
+; CHECK-NEXT:    movk w8, #15, lsl #16
+; CHECK-NEXT:    ldrb w0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -265,8 +269,9 @@ define i64 @LdOffset_i8_zext64(ptr %a)  {
 define i64 @LdOffset_i8_sext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_sext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
-; CHECK-NEXT:    ldrsb x0, [x8, #3704]
+; CHECK-NEXT:    mov w8, #56952 // =0xde78
+; CHECK-NEXT:    movk w8, #15, lsl #16
+; CHECK-NEXT:    ldrsb x0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -278,8 +283,9 @@ define i64 @LdOffset_i8_sext64(ptr %a)  {
 define i16 @LdOffset_i16(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
-; CHECK-NEXT:    ldrh w0, [x8, #7408]
+; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
+; CHECK-NEXT:    movk w8, #31, lsl #16
+; CHECK-NEXT:    ldrh w0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
   %val = load i16, ptr %arrayidx, align 2
@@ -290,8 +296,9 @@ define i16 @LdOffset_i16(ptr %a)  {
 define i32 @LdOffset_i16_zext32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16_zext32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
-; CHECK-NEXT:    ldrh w0, [x8, #7408]
+; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
+; CHECK-NEXT:    movk w8, #31, lsl #16
+; CHECK-NEXT:    ldrh w0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
   %val = load i16, ptr %arrayidx, align 2
@@ -303,8 +310,9 @@ define i32 @LdOffset_i16_zext32(ptr %a)  {
 define i32 @LdOffset_i16_sext32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16_sext32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
-; CHECK-NEXT:    ldrsh w0, [x8, #7408]
+; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
+; CHECK-NEXT:    movk w8, #31, lsl #16
+; CHECK-NEXT:    ldrsh w0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
   %val = load i16, ptr %arrayidx, align 2
@@ -316,8 +324,9 @@ define i32 @LdOffset_i16_sext32(ptr %a)  {
 define i64 @LdOffset_i16_zext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16_zext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
-; CHECK-NEXT:    ldrh w0, [x8, #7408]
+; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
+; CHECK-NEXT:    movk w8, #31, lsl #16
+; CHECK-NEXT:    ldrh w0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
   %val = load i16, ptr %arrayidx, align 2
@@ -329,8 +338,9 @@ define i64 @LdOffset_i16_zext64(ptr %a)  {
 define i64 @LdOffset_i16_sext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16_sext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
-; CHECK-NEXT:    ldrsh x0, [x8, #7408]
+; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
+; CHECK-NEXT:    movk w8, #31, lsl #16
+; CHECK-NEXT:    ldrsh x0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
   %val = load i16, ptr %arrayidx, align 2
@@ -342,8 +352,9 @@ define i64 @LdOffset_i16_sext64(ptr %a)  {
 define i32 @LdOffset_i32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #1012, lsl #12 // =4145152
-; CHECK-NEXT:    ldr w0, [x8, #14816]
+; CHECK-NEXT:    mov w8, #31200 // =0x79e0
+; CHECK-NEXT:    movk w8, #63, lsl #16
+; CHECK-NEXT:    ldr w0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
   %val = load i32, ptr %arrayidx, align 4
@@ -354,8 +365,9 @@ define i32 @LdOffset_i32(ptr %a)  {
 define i64 @LdOffset_i32_zext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i32_zext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #1012, lsl #12 // =4145152
-; CHECK-NEXT:    ldr w0, [x8, #14816]
+; CHECK-NEXT:    mov w8, #31200 // =0x79e0
+; CHECK-NEXT:    movk w8, #63, lsl #16
+; CHECK-NEXT:    ldr w0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
   %val = load i32, ptr %arrayidx, align 2
@@ -367,8 +379,9 @@ define i64 @LdOffset_i32_zext64(ptr %a)  {
 define i64 @LdOffset_i32_sext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i32_sext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #1012, lsl #12 // =4145152
-; CHECK-NEXT:    ldrsw x0, [x8, #14816]
+; CHECK-NEXT:    mov w8, #31200 // =0x79e0
+; CHECK-NEXT:    movk w8, #63, lsl #16
+; CHECK-NEXT:    ldrsw x0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
   %val = load i32, ptr %arrayidx, align 2
@@ -380,8 +393,9 @@ define i64 @LdOffset_i32_sext64(ptr %a)  {
 define i64 @LdOffset_i64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #2024, lsl #12 // =8290304
-; CHECK-NEXT:    ldr x0, [x8, #29632]
+; CHECK-NEXT:    mov w8, #62400 // =0xf3c0
+; CHECK-NEXT:    movk w8, #126, lsl #16
+; CHECK-NEXT:    ldr x0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
   %val = load i64, ptr %arrayidx, align 4
@@ -392,8 +406,9 @@ define i64 @LdOffset_i64(ptr %a)  {
 define <2 x i32> @LdOffset_v2i32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #2024, lsl #12 // =8290304
-; CHECK-NEXT:    ldr d0, [x8, #29632]
+; CHECK-NEXT:    mov w8, #62400 // =0xf3c0
+; CHECK-NEXT:    movk w8, #126, lsl #16
+; CHECK-NEXT:    ldr d0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds <2 x i32>, ptr %a, i64 1039992
   %val = load <2 x i32>, ptr %arrayidx, align 4
@@ -404,8 +419,9 @@ define <2 x i32> @LdOffset_v2i32(ptr %a)  {
 define <2 x i64> @LdOffset_v2i64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #4048, lsl #12 // =16580608
-; CHECK-NEXT:    ldr q0, [x8, #59264]
+; CHECK-NEXT:    mov w8, #59264 // =0xe780
+; CHECK-NEXT:    movk w8, #253, lsl #16
+; CHECK-NEXT:    ldr q0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds <2 x i64>, ptr %a, i64 1039992
   %val = load <2 x i64>, ptr %arrayidx, align 4
@@ -416,8 +432,9 @@ define <2 x i64> @LdOffset_v2i64(ptr %a)  {
 define double @LdOffset_i8_f64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
-; CHECK-NEXT:    ldrsb w8, [x8, #3704]
+; CHECK-NEXT:    mov w8, #56952 // =0xde78
+; CHECK-NEXT:    movk w8, #15, lsl #16
+; CHECK-NEXT:    ldrsb w8, [x0, x8]
 ; CHECK-NEXT:    scvtf d0, w8
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
@@ -430,8 +447,9 @@ define double @LdOffset_i8_f64(ptr %a)  {
 define double @LdOffset_i16_f64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
-; CHECK-NEXT:    ldrsh w8, [x8, #7408]
+; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
+; CHECK-NEXT:    movk w8, #31, lsl #16
+; CHECK-NEXT:    ldrsh w8, [x0, x8]
 ; CHECK-NEXT:    scvtf d0, w8
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
@@ -444,8 +462,9 @@ define double @LdOffset_i16_f64(ptr %a)  {
 define double @LdOffset_i32_f64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i32_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #1012, lsl #12 // =4145152
-; CHECK-NEXT:    ldr s0, [x8, #14816]
+; CHECK-NEXT:    mov w8, #31200 // =0x79e0
+; CHECK-NEXT:    movk w8, #63, lsl #16
+; CHECK-NEXT:    ldr s0, [x0, x8]
 ; CHECK-NEXT:    ucvtf d0, d0
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
@@ -458,8 +477,9 @@ define double @LdOffset_i32_f64(ptr %a)  {
 define double @LdOffset_i64_f64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i64_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #2024, lsl #12 // =8290304
-; CHECK-NEXT:    ldr d0, [x8, #29632]
+; CHECK-NEXT:    mov w8, #62400 // =0xf3c0
+; CHECK-NEXT:    movk w8, #126, lsl #16
+; CHECK-NEXT:    ldr d0, [x0, x8]
 ; CHECK-NEXT:    scvtf d0, d0
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
diff --git a/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir b/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir
index 3bd8f83d27c2da..56405a2675f7ab 100644
--- a/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir
+++ b/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple aarch64 --run-pass=greedy,virtregrewriter -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple aarch64 -aarch64-enable-subreg-liveness-tracking --run-pass=greedy,virtregrewriter -verify-machineinstrs %s -o - | FileCheck %s
 
 # We should ideally not spill around any of the SUBSWri in the loop exit blocks (if.end and if.end27).
 
@@ -219,8 +219,8 @@ body:             |
   ; CHECK-NEXT:   liveins: $w10, $w11, $x2, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   STRXui renamable $x8, %stack.1, 0 :: (store (s64) into %stack.1)
-  ; CHECK-NEXT:   renamable $w9 = MOVi32imm 36, implicit-def $x9
-  ; CHECK-NEXT:   renamable $x8 = MADDXrrr killed renamable $x8, killed renamable $x9, $xzr
+  ; CHECK-NEXT:   renamable $w9 = MOVi32imm 36
+  ; CHECK-NEXT:   renamable $x8 = MADDXrrr killed renamable $x8, renamable $x9, $xzr
   ; CHECK-NEXT:   renamable $x9 = MOVaddr target-flags(aarch64-page) @g, target-flags(aarch64-pageoff, aarch64-nc) @g
   ; CHECK-NEXT:   renamable $w8 = LDRWroX killed renamable $x9, killed renamable $x8, 0, 0 :: (load (s32) from %ir.arrayidx9)
   ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w8, 1, 0, implicit-def $nzcv
@@ -244,8 +244,8 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.5(0x50000000), %bb.8(0x30000000)
   ; CHECK-NEXT:   liveins: $w10, $w11, $x2, $x12
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = MOVi32imm 36, implicit-def $x8
-  ; CHECK-NEXT:   renamable $x8 = MADDXrrr renamable $x12, killed renamable $x8, $xzr
+  ; CHECK-NEXT:   renamable $w8 = MOVi32imm 36
+  ; CHECK-NEXT:   renamable $x8 = MADDXrrr renamable $x12, renamable $x8, $xzr
   ; CHECK-NEXT:   renamable $x9 = MOVaddr target-flags(aarch64-page) @g, target-flags(aarch64-pageoff, aarch64-nc) @g
   ; CHECK-NEXT:   renamable $w8 = LDRWroX killed renamable $x9, killed renamable $x8, 0, 0 :: (load (s32) from %ir.arrayidx14)
   ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w8, 1, 0, implicit-def $nzcv
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
index e227f14542cc11..4206c0bc269915 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -aarch64-enable-subreg-liveness-tracking < %s | FileCheck %s
 
 define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind noinline ssp {
 ; CHECK-LABEL: callee:

>From 34e12da10133cd818c5d9273059e30f8dda8a5e3 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Wed, 20 Nov 2024 12:23:19 +0000
Subject: [PATCH 2/3] [LiveIntervals] Ignore artificial regs when adding kill
 flags

If parts of a physical register for a given liverange, as assigned by the
register allocator, can be used to store other values not represented by
this liverange, then `LiveIntervals::addKillFlags` normally avoids adding a
kill flag on the use of this register when the value's liverange ends.

However, if all the other regunits are artificial, then we can still safely
add the kill flag, since those parts of the register can never be accessed
independently.
---
 llvm/lib/CodeGen/LiveIntervals.cpp            |  38 ++++++-
 llvm/test/CodeGen/AArch64/arm64-addrmode.ll   | 100 +++++++-----------
 .../CodeGen/AArch64/nested-iv-regalloc.mir    |   4 +-
 .../AArch64/preserve_nonecc_varargs_darwin.ll |  10 +-
 4 files changed, 84 insertions(+), 68 deletions(-)

diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index a0b6bf445fa8af..18059a1d384580 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -710,6 +710,30 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
 // Register allocator hooks.
 //
 
+/// Returns true if the physreg has multiple regunits that can be accessed
+/// as independent registers.
+///
+/// Returns 'true' for e.g.:
+///   gpr64_0_gpr64_1
+//               => two independently accessible registers gpr64_0 and gpr64_1.
+///
+/// Returns 'false' for e.g.:
+///   gpr64_0:   => accessible register, reads/writes 64bits
+///   gpr32_0:   => accessible sub-regsiter of gpr64_0, reads/writes 32bits
+//    gpr32_0_hi => top 32bits of gpr64_0, not independently accessible.
+static bool hasMultipleAddressableRegUnits(const TargetRegisterInfo *TRI,
+                                           MCPhysReg PhysReg) {
+  unsigned NumAddressableRegUnits = 0;
+  for (MCRegUnit U : TRI->regunits(PhysReg)) {
+    for (MCRegUnitRootIterator RI(U, TRI); RI.isValid(); ++RI)
+      if (!TRI->isArtificial(*RI) && TRI->isInAllocatableClass(*RI))
+        NumAddressableRegUnits++;
+    if (NumAddressableRegUnits > 1)
+      return true;
+  }
+  return false;
+}
+
 void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
   // Keep track of regunit ranges.
   SmallVector<std::pair<const LiveRange*, LiveRange::const_iterator>, 8> RU;
@@ -736,6 +760,18 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
         continue;
       RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
     }
+
+    // If parts of a physical register for a given liverange, as assigned by the
+    // register allocator, can be used to store other values not represented by
+    // this liverange, then `LiveIntervals::addKillFlags` normally avoids adding
+    // a kill flag on the use of this register when the value's liverange ends.
+    //
+    // However, if all the other regunits are artificial, then we can still
+    // safely add the kill flag, since those parts of the register can never be
+    // accessed independently.
+    bool AssumeOtherUnitsCanBeUsed =
+        hasMultipleAddressableRegUnits(TRI, PhysReg);
+
     // Every instruction that kills Reg corresponds to a segment range end
     // point.
     for (LiveInterval::const_iterator RI = LI.begin(), RE = LI.end(); RI != RE;
@@ -780,7 +816,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
         // are actually never written by %2. After assignment the <kill>
         // flag at the read instruction is invalid.
         LaneBitmask DefinedLanesMask;
-        if (LI.hasSubRanges()) {
+        if (LI.hasSubRanges() && AssumeOtherUnitsCanBeUsed) {
           // Compute a mask of lanes that are defined.
           DefinedLanesMask = LaneBitmask::getNone();
           for (const LiveInterval::SubRange &SR : LI.subranges())
diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
index cb00272a7d022c..f8695b62619c09 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -214,9 +214,8 @@ define void @t17(i64 %a) {
 define i8 @LdOffset_i8(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #56952 // =0xde78
-; CHECK-NEXT:    movk w8, #15, lsl #16
-; CHECK-NEXT:    ldrb w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT:    ldrb w0, [x8, #3704]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -227,9 +226,8 @@ define i8 @LdOffset_i8(ptr %a)  {
 define i32 @LdOffset_i8_zext32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_zext32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #56952 // =0xde78
-; CHECK-NEXT:    movk w8, #15, lsl #16
-; CHECK-NEXT:    ldrb w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT:    ldrb w0, [x8, #3704]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -241,9 +239,8 @@ define i32 @LdOffset_i8_zext32(ptr %a)  {
 define i32 @LdOffset_i8_sext32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_sext32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #56952 // =0xde78
-; CHECK-NEXT:    movk w8, #15, lsl #16
-; CHECK-NEXT:    ldrsb w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT:    ldrsb w0, [x8, #3704]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -255,9 +252,8 @@ define i32 @LdOffset_i8_sext32(ptr %a)  {
 define i64 @LdOffset_i8_zext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_zext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #56952 // =0xde78
-; CHECK-NEXT:    movk w8, #15, lsl #16
-; CHECK-NEXT:    ldrb w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT:    ldrb w0, [x8, #3704]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -269,9 +265,8 @@ define i64 @LdOffset_i8_zext64(ptr %a)  {
 define i64 @LdOffset_i8_sext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_sext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #56952 // =0xde78
-; CHECK-NEXT:    movk w8, #15, lsl #16
-; CHECK-NEXT:    ldrsb x0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT:    ldrsb x0, [x8, #3704]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -283,9 +278,8 @@ define i64 @LdOffset_i8_sext64(ptr %a)  {
 define i16 @LdOffset_i16(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
-; CHECK-NEXT:    movk w8, #31, lsl #16
-; CHECK-NEXT:    ldrh w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT:    ldrh w0, [x8, #7408]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
   %val = load i16, ptr %arrayidx, align 2
@@ -296,9 +290,8 @@ define i16 @LdOffset_i16(ptr %a)  {
 define i32 @LdOffset_i16_zext32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16_zext32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
-; CHECK-NEXT:    movk w8, #31, lsl #16
-; CHECK-NEXT:    ldrh w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT:    ldrh w0, [x8, #7408]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
   %val = load i16, ptr %arrayidx, align 2
@@ -310,9 +303,8 @@ define i32 @LdOffset_i16_zext32(ptr %a)  {
 define i32 @LdOffset_i16_sext32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16_sext32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
-; CHECK-NEXT:    movk w8, #31, lsl #16
-; CHECK-NEXT:    ldrsh w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT:    ldrsh w0, [x8, #7408]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
   %val = load i16, ptr %arrayidx, align 2
@@ -324,9 +316,8 @@ define i32 @LdOffset_i16_sext32(ptr %a)  {
 define i64 @LdOffset_i16_zext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16_zext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
-; CHECK-NEXT:    movk w8, #31, lsl #16
-; CHECK-NEXT:    ldrh w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT:    ldrh w0, [x8, #7408]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
   %val = load i16, ptr %arrayidx, align 2
@@ -338,9 +329,8 @@ define i64 @LdOffset_i16_zext64(ptr %a)  {
 define i64 @LdOffset_i16_sext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16_sext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
-; CHECK-NEXT:    movk w8, #31, lsl #16
-; CHECK-NEXT:    ldrsh x0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT:    ldrsh x0, [x8, #7408]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
   %val = load i16, ptr %arrayidx, align 2
@@ -352,9 +342,8 @@ define i64 @LdOffset_i16_sext64(ptr %a)  {
 define i32 @LdOffset_i32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #31200 // =0x79e0
-; CHECK-NEXT:    movk w8, #63, lsl #16
-; CHECK-NEXT:    ldr w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #1012, lsl #12 // =4145152
+; CHECK-NEXT:    ldr w0, [x8, #14816]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
   %val = load i32, ptr %arrayidx, align 4
@@ -365,9 +354,8 @@ define i32 @LdOffset_i32(ptr %a)  {
 define i64 @LdOffset_i32_zext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i32_zext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #31200 // =0x79e0
-; CHECK-NEXT:    movk w8, #63, lsl #16
-; CHECK-NEXT:    ldr w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #1012, lsl #12 // =4145152
+; CHECK-NEXT:    ldr w0, [x8, #14816]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
   %val = load i32, ptr %arrayidx, align 2
@@ -379,9 +367,8 @@ define i64 @LdOffset_i32_zext64(ptr %a)  {
 define i64 @LdOffset_i32_sext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i32_sext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #31200 // =0x79e0
-; CHECK-NEXT:    movk w8, #63, lsl #16
-; CHECK-NEXT:    ldrsw x0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #1012, lsl #12 // =4145152
+; CHECK-NEXT:    ldrsw x0, [x8, #14816]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
   %val = load i32, ptr %arrayidx, align 2
@@ -393,9 +380,8 @@ define i64 @LdOffset_i32_sext64(ptr %a)  {
 define i64 @LdOffset_i64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #62400 // =0xf3c0
-; CHECK-NEXT:    movk w8, #126, lsl #16
-; CHECK-NEXT:    ldr x0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #2024, lsl #12 // =8290304
+; CHECK-NEXT:    ldr x0, [x8, #29632]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
   %val = load i64, ptr %arrayidx, align 4
@@ -406,9 +392,8 @@ define i64 @LdOffset_i64(ptr %a)  {
 define <2 x i32> @LdOffset_v2i32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #62400 // =0xf3c0
-; CHECK-NEXT:    movk w8, #126, lsl #16
-; CHECK-NEXT:    ldr d0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #2024, lsl #12 // =8290304
+; CHECK-NEXT:    ldr d0, [x8, #29632]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds <2 x i32>, ptr %a, i64 1039992
   %val = load <2 x i32>, ptr %arrayidx, align 4
@@ -419,9 +404,8 @@ define <2 x i32> @LdOffset_v2i32(ptr %a)  {
 define <2 x i64> @LdOffset_v2i64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #59264 // =0xe780
-; CHECK-NEXT:    movk w8, #253, lsl #16
-; CHECK-NEXT:    ldr q0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #4048, lsl #12 // =16580608
+; CHECK-NEXT:    ldr q0, [x8, #59264]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds <2 x i64>, ptr %a, i64 1039992
   %val = load <2 x i64>, ptr %arrayidx, align 4
@@ -432,9 +416,8 @@ define <2 x i64> @LdOffset_v2i64(ptr %a)  {
 define double @LdOffset_i8_f64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #56952 // =0xde78
-; CHECK-NEXT:    movk w8, #15, lsl #16
-; CHECK-NEXT:    ldrsb w8, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT:    ldrsb w8, [x8, #3704]
 ; CHECK-NEXT:    scvtf d0, w8
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
@@ -447,9 +430,8 @@ define double @LdOffset_i8_f64(ptr %a)  {
 define double @LdOffset_i16_f64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
-; CHECK-NEXT:    movk w8, #31, lsl #16
-; CHECK-NEXT:    ldrsh w8, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT:    ldrsh w8, [x8, #7408]
 ; CHECK-NEXT:    scvtf d0, w8
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
@@ -462,9 +444,8 @@ define double @LdOffset_i16_f64(ptr %a)  {
 define double @LdOffset_i32_f64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i32_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #31200 // =0x79e0
-; CHECK-NEXT:    movk w8, #63, lsl #16
-; CHECK-NEXT:    ldr s0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #1012, lsl #12 // =4145152
+; CHECK-NEXT:    ldr s0, [x8, #14816]
 ; CHECK-NEXT:    ucvtf d0, d0
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
@@ -477,9 +458,8 @@ define double @LdOffset_i32_f64(ptr %a)  {
 define double @LdOffset_i64_f64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i64_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #62400 // =0xf3c0
-; CHECK-NEXT:    movk w8, #126, lsl #16
-; CHECK-NEXT:    ldr d0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #2024, lsl #12 // =8290304
+; CHECK-NEXT:    ldr d0, [x8, #29632]
 ; CHECK-NEXT:    scvtf d0, d0
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
diff --git a/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir b/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir
index 56405a2675f7ab..ff29c78b5a0ce5 100644
--- a/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir
+++ b/llvm/test/CodeGen/AArch64/nested-iv-regalloc.mir
@@ -220,7 +220,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   STRXui renamable $x8, %stack.1, 0 :: (store (s64) into %stack.1)
   ; CHECK-NEXT:   renamable $w9 = MOVi32imm 36
-  ; CHECK-NEXT:   renamable $x8 = MADDXrrr killed renamable $x8, renamable $x9, $xzr
+  ; CHECK-NEXT:   renamable $x8 = MADDXrrr killed renamable $x8, killed renamable $x9, $xzr
   ; CHECK-NEXT:   renamable $x9 = MOVaddr target-flags(aarch64-page) @g, target-flags(aarch64-pageoff, aarch64-nc) @g
   ; CHECK-NEXT:   renamable $w8 = LDRWroX killed renamable $x9, killed renamable $x8, 0, 0 :: (load (s32) from %ir.arrayidx9)
   ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w8, 1, 0, implicit-def $nzcv
@@ -245,7 +245,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $w10, $w11, $x2, $x12
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = MOVi32imm 36
-  ; CHECK-NEXT:   renamable $x8 = MADDXrrr renamable $x12, renamable $x8, $xzr
+  ; CHECK-NEXT:   renamable $x8 = MADDXrrr renamable $x12, killed renamable $x8, $xzr
   ; CHECK-NEXT:   renamable $x9 = MOVaddr target-flags(aarch64-page) @g, target-flags(aarch64-pageoff, aarch64-nc) @g
   ; CHECK-NEXT:   renamable $w8 = LDRWroX killed renamable $x9, killed renamable $x8, 0, 0 :: (load (s32) from %ir.arrayidx14)
   ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w8, 1, 0, implicit-def $nzcv
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
index 4206c0bc269915..2a77d4dd33fe53 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
@@ -27,12 +27,11 @@ define i32 @caller() nounwind ssp {
 ; CHECK-NEXT:    sub sp, sp, #208
 ; CHECK-NEXT:    mov w8, #10 ; =0xa
 ; CHECK-NEXT:    mov w9, #9 ; =0x9
-; CHECK-NEXT:    mov w0, #1 ; =0x1
+; CHECK-NEXT:    mov w10, #8 ; =0x8
 ; CHECK-NEXT:    stp x9, x8, [sp, #24]
-; CHECK-NEXT:    mov w8, #8 ; =0x8
-; CHECK-NEXT:    mov w9, #6 ; =0x6
-; CHECK-NEXT:    str x8, [sp, #16]
 ; CHECK-NEXT:    mov w8, #7 ; =0x7
+; CHECK-NEXT:    mov w9, #6 ; =0x6
+; CHECK-NEXT:    mov w0, #1 ; =0x1
 ; CHECK-NEXT:    mov w1, #2 ; =0x2
 ; CHECK-NEXT:    mov w2, #3 ; =0x3
 ; CHECK-NEXT:    mov w3, #4 ; =0x4
@@ -47,7 +46,8 @@ define i32 @caller() nounwind ssp {
 ; CHECK-NEXT:    stp x22, x21, [sp, #160] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #176] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x29, x30, [sp, #192] ; 16-byte Folded Spill
-; CHECK-NEXT:    stp x9, x8, [sp]
+; CHECK-NEXT:    stp x8, x10, [sp, #8]
+; CHECK-NEXT:    str x9, [sp]
 ; CHECK-NEXT:    bl _callee
 ; CHECK-NEXT:    ldp x29, x30, [sp, #192] ; 16-byte Folded Reload
 ; CHECK-NEXT:    ldp x20, x19, [sp, #176] ; 16-byte Folded Reload

>From fd023ac802c57f0fadb4031bf09fab9b0601af71 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 3 Dec 2024 15:49:26 +0000
Subject: [PATCH 3/3] Precompute subreg info with TableGen

---
 .../include/llvm/CodeGen/TargetRegisterInfo.h | 13 ++++++
 llvm/lib/CodeGen/LiveIntervals.cpp            | 41 ++-----------------
 llvm/utils/TableGen/RegisterInfoEmitter.cpp   | 33 ++++++++++++++-
 3 files changed, 49 insertions(+), 38 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 292fa3c94969be..953a5ca648ef98 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -745,6 +745,15 @@ class TargetRegisterInfo : public MCRegisterInfo {
     return reverseComposeSubRegIndexLaneMaskImpl(IdxA, LaneMask);
   }
 
+  /// Returns the number of allocatable sub registers for R, which is the
+  /// number of register units that are not artificial and part of an
+  /// allocatable register class. For a register like D0_D1, which consists of
+  /// D0 and D1, this function would return '2'. For an architecture where
+  /// D0=S0_S1 and D1=S2_S3, this would return '4' for S0, S1, S2, S3.
+  unsigned getNumAllocatableSubRegs(MCPhysReg R) const {
+    return getNumAllocatableSubRegsImpl(R);
+  }
+
   /// Debugging helper: dump register in human readable form to dbgs() stream.
   static void dumpReg(Register Reg, unsigned SubRegIndex = 0,
                       const TargetRegisterInfo *TRI = nullptr);
@@ -774,6 +783,10 @@ class TargetRegisterInfo : public MCRegisterInfo {
     llvm_unreachable("Target has no sub-registers");
   }
 
+  virtual unsigned getNumAllocatableSubRegsImpl(MCPhysReg) const {
+    llvm_unreachable("Target has no sub-registers");
+  }
+
   /// Return the register cost table index. This implementation is sufficient
   /// for most architectures and can be overriden by targets in case there are
   /// multiple cost values associated with each register.
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 18059a1d384580..d8acdb0c2c9e6d 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -710,30 +710,6 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
 // Register allocator hooks.
 //
 
-/// Returns true if the physreg has multiple regunits that can be accessed
-/// as independent registers.
-///
-/// Returns 'true' for e.g.:
-///   gpr64_0_gpr64_1
-//               => two independently accessible registers gpr64_0 and gpr64_1.
-///
-/// Returns 'false' for e.g.:
-///   gpr64_0:   => accessible register, reads/writes 64bits
-///   gpr32_0:   => accessible sub-regsiter of gpr64_0, reads/writes 32bits
-//    gpr32_0_hi => top 32bits of gpr64_0, not independently accessible.
-static bool hasMultipleAddressableRegUnits(const TargetRegisterInfo *TRI,
-                                           MCPhysReg PhysReg) {
-  unsigned NumAddressableRegUnits = 0;
-  for (MCRegUnit U : TRI->regunits(PhysReg)) {
-    for (MCRegUnitRootIterator RI(U, TRI); RI.isValid(); ++RI)
-      if (!TRI->isArtificial(*RI) && TRI->isInAllocatableClass(*RI))
-        NumAddressableRegUnits++;
-    if (NumAddressableRegUnits > 1)
-      return true;
-  }
-  return false;
-}
-
 void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
   // Keep track of regunit ranges.
   SmallVector<std::pair<const LiveRange*, LiveRange::const_iterator>, 8> RU;
@@ -760,18 +736,6 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
         continue;
       RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
     }
-
-    // If parts of a physical register for a given liverange, as assigned by the
-    // register allocator, can be used to store other values not represented by
-    // this liverange, then `LiveIntervals::addKillFlags` normally avoids adding
-    // a kill flag on the use of this register when the value's liverange ends.
-    //
-    // However, if all the other regunits are artificial, then we can still
-    // safely add the kill flag, since those parts of the register can never be
-    // accessed independently.
-    bool AssumeOtherUnitsCanBeUsed =
-        hasMultipleAddressableRegUnits(TRI, PhysReg);
-
     // Every instruction that kills Reg corresponds to a segment range end
     // point.
     for (LiveInterval::const_iterator RI = LI.begin(), RE = LI.end(); RI != RE;
@@ -806,6 +770,9 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
       if (MRI->subRegLivenessEnabled()) {
         // When reading a partial undefined value we must not add a kill flag.
         // The regalloc might have used the undef lane for something else.
+        // If the register consists of a single allocatable subreg, then
+        // we can assume the other (undef) lanes cannot be used.
+        //
         // Example:
         //     %1 = ...                  ; R32: %1
         //     %2:high16 = ...           ; R64: %2
@@ -816,7 +783,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
         // are actually never written by %2. After assignment the <kill>
         // flag at the read instruction is invalid.
         LaneBitmask DefinedLanesMask;
-        if (LI.hasSubRanges() && AssumeOtherUnitsCanBeUsed) {
+        if (LI.hasSubRanges() && TRI->getNumAllocatableSubRegs(PhysReg) > 1) {
           // Compute a mask of lanes that are defined.
           DefinedLanesMask = LaneBitmask::getNone();
           for (const LiveInterval::SubRange &SR : LI.subranges())
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index a6f87119aca5ba..f855fe852c76c9 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -93,6 +93,8 @@ class RegisterInfoEmitter {
   void EmitRegUnitPressure(raw_ostream &OS, StringRef ClassName);
   void emitComposeSubRegIndices(raw_ostream &OS, StringRef ClassName);
   void emitComposeSubRegIndexLaneMask(raw_ostream &OS, StringRef ClassName);
+  void emitNumAllocatableSubRegs(raw_ostream &OS, StringRef ClassName,
+                                 llvm::BitVector &InAllocClass);
 };
 
 } // end anonymous namespace
@@ -677,6 +679,32 @@ static bool combine(const CodeGenSubRegIndex *Idx,
   return true;
 }
 
+void RegisterInfoEmitter::emitNumAllocatableSubRegs(raw_ostream &OS,
+                                                    StringRef ClassName,
+                                                    llvm::BitVector &InAllocClass) {
+  OS << "unsigned " << ClassName
+     << "::getNumAllocatableSubRegsImpl(MCPhysReg R) const {\n";
+  OS << "  static unsigned numAllocatableSubRegsMap[] = { \n";
+  OS << "    0, // NoRegister\n";
+  const auto &Regs = RegBank.getRegisters();
+  for (auto [I, R] : llvm::enumerate(Regs)) {
+    unsigned NumAllocatableSubRegs = 0;
+    for (unsigned U : R.getRegUnits()) {
+      for (const CodeGenRegister *UR : RegBank.getRegUnit(U).getRoots())
+        if (!UR->Artificial && InAllocClass[UR->EnumValue])
+          NumAllocatableSubRegs++;
+    }
+    OS << "    " << NumAllocatableSubRegs;
+    if (I < Regs.size() - 1)
+      OS << ",";
+    OS << " // " << R.getName() << "\n";
+  }
+  OS << "  };\n";
+  OS << "  assert(R <= " << Regs.size() << " && \"Unexpected physreg\");\n";
+  OS << "  return numAllocatableSubRegsMap[R];\n";
+  OS << "};\n";
+}
+
 void RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
                                                    StringRef ClassName) {
   const auto &SubRegIndices = RegBank.getSubRegIndices();
@@ -1122,7 +1150,9 @@ void RegisterInfoEmitter::runTargetHeader(raw_ostream &OS) {
        << "  const TargetRegisterClass *getSubClassWithSubReg"
        << "(const TargetRegisterClass *, unsigned) const override;\n"
        << "  const TargetRegisterClass *getSubRegisterClass"
-       << "(const TargetRegisterClass *, unsigned) const override;\n";
+       << "(const TargetRegisterClass *, unsigned) const override;\n"
+       << "  unsigned getNumAllocatableSubRegsImpl(MCPhysReg) const "
+          "override;\n";
   }
   OS << "  const RegClassWeight &getRegClassWeight("
      << "const TargetRegisterClass *RC) const override;\n"
@@ -1483,6 +1513,7 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
   if (!SubRegIndices.empty()) {
     emitComposeSubRegIndices(OS, ClassName);
     emitComposeSubRegIndexLaneMask(OS, ClassName);
+    emitNumAllocatableSubRegs(OS, ClassName, InAllocClass);
   }
 
   if (!SubRegIndices.empty()) {



More information about the llvm-commits mailing list