[llvm] [AArch64][GlobalISel] Fix creation of incorrect COPY gpr32, grp32.sub_32 (PR #171100)

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 8 01:48:56 PST 2025


https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/171100

In selecting a G_STORE with i8 memty and i16 type, we would try to generate a %6:gpr32 = COPY %3.sub_32:gpr32all instruction. Make sure we only add the subreg if the Ty is larger than 32bits and the subreg is needed.

>From 8838d7fbd624e6af9848bd782a6d0a72a8089cd4 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 8 Dec 2025 09:48:04 +0000
Subject: [PATCH] [AArch64][GlobalISel] Fix creation of incorrect COPY gpr32,
 grp32.sub_32

In selecting a G_STORE with i8 memty and i16 type, we would try to generate a
%6:gpr32 = COPY %3.sub_32:gpr32all instruction. Make sure we only add the
subreg if the Ty is larger than 32bits and the subreg is needed.
---
 .../GISel/AArch64InstructionSelector.cpp      |   2 +
 .../CodeGen/AArch64/load-store-forwarding.ll  | 103 ++++++++++++++----
 2 files changed, 82 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index f9db39e5f8622..88f1778680efe 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -3015,6 +3015,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
         return false;
 
       // Generate a subreg copy.
+      if (RB.getID() == AArch64::GPRRegBankID && ValTy.getSizeInBits() < 32)
+        SubReg = 0;
       auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
                       .addReg(ValReg, 0, SubReg)
                       .getReg(0);
diff --git a/llvm/test/CodeGen/AArch64/load-store-forwarding.ll b/llvm/test/CodeGen/AArch64/load-store-forwarding.ll
index 02efbe9b409de..f1635239b146b 100644
--- a/llvm/test/CodeGen/AArch64/load-store-forwarding.ll
+++ b/llvm/test/CodeGen/AArch64/load-store-forwarding.ll
@@ -1,8 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck %s --check-prefix CHECK-BE
-; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s --check-prefix CHECK-LE
+; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s --check-prefixes CHECK-LE,CHECK-SD
+; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck %s --check-prefixes CHECK-BE
+; RUN: llc -mtriple=aarch64 -global-isel -o - %s | FileCheck %s --check-prefixes CHECK-LE,CHECK-GI
 
 define i8 @test1(i32 %a, ptr %pa) {
+; CHECK-SD-LABEL: test1:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    str w0, [x1]
+; CHECK-SD-NEXT:    ret
+;
 ; CHECK-BE-LABEL: test1:
 ; CHECK-BE:       // %bb.0:
 ; CHECK-BE-NEXT:    mov w8, w0
@@ -10,27 +16,28 @@ define i8 @test1(i32 %a, ptr %pa) {
 ; CHECK-BE-NEXT:    str w8, [x1]
 ; CHECK-BE-NEXT:    ret
 ;
-; CHECK-LE-LABEL: test1:
-; CHECK-LE:       // %bb.0:
-; CHECK-LE-NEXT:    str w0, [x1]
-; CHECK-LE-NEXT:    ret
+; CHECK-GI-LABEL: test1:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str w0, [x1]
+; CHECK-GI-NEXT:    and w0, w0, #0xff
+; CHECK-GI-NEXT:    ret
   store i32 %a, ptr %pa
   %res = load i8, ptr %pa
   ret i8 %res
 }
 
 define i8 @test2(i32 %a, ptr %pa) {
-; CHECK-BE-LABEL: test2:
-; CHECK-BE:       // %bb.0:
-; CHECK-BE-NEXT:    str w0, [x1]
-; CHECK-BE-NEXT:    ldrb w0, [x1, #1]
-; CHECK-BE-NEXT:    ret
-;
 ; CHECK-LE-LABEL: test2:
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    str w0, [x1]
 ; CHECK-LE-NEXT:    ubfx w0, w0, #8, #8
 ; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test2:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str w0, [x1]
+; CHECK-BE-NEXT:    ldrb w0, [x1, #1]
+; CHECK-BE-NEXT:    ret
   %p8 = getelementptr i8, ptr %pa, i32 1
   store i32 %a, ptr %pa
   %res = load i8, ptr %p8
@@ -38,17 +45,17 @@ define i8 @test2(i32 %a, ptr %pa) {
 }
 
 define i8 @test3(i32 %a, ptr %pa) {
-; CHECK-BE-LABEL: test3:
-; CHECK-BE:       // %bb.0:
-; CHECK-BE-NEXT:    str w0, [x1]
-; CHECK-BE-NEXT:    ldrb w0, [x1, #2]
-; CHECK-BE-NEXT:    ret
-;
 ; CHECK-LE-LABEL: test3:
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    str w0, [x1]
 ; CHECK-LE-NEXT:    ubfx w0, w0, #16, #8
 ; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test3:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str w0, [x1]
+; CHECK-BE-NEXT:    ldrb w0, [x1, #2]
+; CHECK-BE-NEXT:    ret
   %p8 = getelementptr i8, ptr %pa, i32 2
   store i32 %a, ptr %pa
   %res = load i8, ptr %p8
@@ -56,18 +63,68 @@ define i8 @test3(i32 %a, ptr %pa) {
 }
 
 define i8 @test4(i32 %a, ptr %pa) {
-; CHECK-BE-LABEL: test4:
-; CHECK-BE:       // %bb.0:
-; CHECK-BE-NEXT:    str w0, [x1]
-; CHECK-BE-NEXT:    ret
-;
 ; CHECK-LE-LABEL: test4:
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    str w0, [x1]
 ; CHECK-LE-NEXT:    lsr w0, w0, #24
 ; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test4:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str w0, [x1]
+; CHECK-BE-NEXT:    ret
   %p8 = getelementptr i8, ptr %pa, i32 3
   store i32 %a, ptr %pa
   %res = load i8, ptr %p8
   ret i8 %res
 }
+
+define i32 @load_i16_store_i8(ptr %p, ptr %q) {
+; CHECK-SD-LABEL: load_i16_store_i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldrb w8, [x0]
+; CHECK-SD-NEXT:    mov w0, wzr
+; CHECK-SD-NEXT:    strb w8, [x1]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-BE-LABEL: load_i16_store_i8:
+; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    ldrb w8, [x0, #1]
+; CHECK-BE-NEXT:    mov w0, wzr
+; CHECK-BE-NEXT:    strb w8, [x1]
+; CHECK-BE-NEXT:    ret
+;
+; CHECK-GI-LABEL: load_i16_store_i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldrh w8, [x0]
+; CHECK-GI-NEXT:    mov w0, wzr
+; CHECK-GI-NEXT:    strb w8, [x1]
+; CHECK-GI-NEXT:    ret
+entry:
+  %l = load i16, ptr %p, align 4
+  %tr = trunc i16 %l to i8
+  store i8 %tr, ptr %q, align 1
+  ret i32 0
+}
+
+define i32 @load_i16_store_i8_freeze(ptr %p, ptr %q) {
+; CHECK-LE-LABEL: load_i16_store_i8_freeze:
+; CHECK-LE:       // %bb.0: // %entry
+; CHECK-LE-NEXT:    ldrh w8, [x0]
+; CHECK-LE-NEXT:    mov w0, wzr
+; CHECK-LE-NEXT:    strb w8, [x1]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: load_i16_store_i8_freeze:
+; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    ldrh w8, [x0]
+; CHECK-BE-NEXT:    mov w0, wzr
+; CHECK-BE-NEXT:    strb w8, [x1]
+; CHECK-BE-NEXT:    ret
+entry:
+  %l = load i16, ptr %p, align 4
+  %fr = freeze i16 %l
+  %tr = trunc i16 %fr to i8
+  store i8 %tr, ptr %q, align 1
+  ret i32 0
+}



More information about the llvm-commits mailing list