[llvm] [AArch64][GlobalISel] Extend smaller than i32 gpr loads/stores in RegBankSelect. (PR #175810)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 16 23:53:36 PST 2026
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/175810
>From dad09d5a37780313d7a17bb29507c63600d77d3a Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sat, 17 Jan 2026 07:53:21 +0000
Subject: [PATCH] [AArch64][GlobalISel] Extend smaller than i32 gpr
loads/stores in RegBankSelect.
A i8 / i16 load and store is only legal for FPR registers. This patch extends
the types on i8/i16 G_LOADS and G_STORES to i32 using anyext / trunc, so that
selection can be simpler and does not need to handle illegal operations.
This can leave some anyext(trunc) operations that could be removed yet but
should be possible to optimize away.
---
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 51 ++++++++++++--
.../Atomics/aarch64-atomic-load-rcpc_immo.ll | 24 +++----
.../AArch64/GlobalISel/arm64-atomic.ll | 42 +++++-------
.../AArch64/GlobalISel/arm64-pcsections.ll | 10 +--
.../GlobalISel/sink-and-fold-illegal-shift.ll | 4 +-
llvm/test/CodeGen/AArch64/aarch64-mops.ll | 68 +++++++++++++------
llvm/test/CodeGen/AArch64/cpa-globalisel.ll | 11 ++-
.../CodeGen/AArch64/load-store-forwarding.ll | 22 ++++++
8 files changed, 152 insertions(+), 80 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index c5885b53b513b..f8b5739d1d13a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -364,9 +364,33 @@ void AArch64RegisterBankInfo::applyMappingImpl(
MachineRegisterInfo &MRI = OpdMapper.getMRI();
switch (MI.getOpcode()) {
+ case TargetOpcode::G_STORE: {
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ if (MRI.getRegBank(Dst) == &AArch64::GPRRegBank && Ty.isScalar() &&
+ Ty.getSizeInBits() < 32) {
+ Builder.setInsertPt(*MI.getParent(), MI.getIterator());
+ auto Ext = Builder.buildAnyExt(LLT::scalar(32), Dst);
+ MI.getOperand(0).setReg(Ext.getReg(0));
+ MRI.setRegBank(Ext.getReg(0), AArch64::GPRRegBank);
+ }
+ return applyDefaultMapping(OpdMapper);
+ }
+ case TargetOpcode::G_LOAD: {
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ if (MRI.getRegBank(Dst) == &AArch64::GPRRegBank && Ty.isScalar() &&
+ Ty.getSizeInBits() < 32) {
+ Builder.setInsertPt(*MI.getParent(), std::next(MI.getIterator()));
+ Register ExtReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ Builder.buildTrunc(Dst, ExtReg);
+ MI.getOperand(0).setReg(ExtReg);
+ MRI.setRegBank(ExtReg, AArch64::GPRRegBank);
+ }
+ [[fallthrough]];
+ }
case TargetOpcode::G_OR:
case TargetOpcode::G_BITCAST:
- case TargetOpcode::G_LOAD:
// Those ID must match getInstrAlternativeMappings.
assert((OpdMapper.getInstrMapping().getID() >= 1 &&
OpdMapper.getInstrMapping().getID() <= 4) &&
@@ -934,6 +958,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
if (cast<GLoad>(MI).isAtomic()) {
// Atomics always use GPR destinations. Don't refine any further.
OpRegBankIdx[0] = PMI_FirstGPR;
+ if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() < 32)
+ MappingID = CustomMappingID;
break;
}
@@ -964,18 +990,29 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
prefersFPUse(UseMI, MRI, TRI);
}))
OpRegBankIdx[0] = PMI_FirstFPR;
+
+ // On GPR, extend any load < 32bits to 32bit.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (Ty.isScalar() && Ty.getSizeInBits() < 32)
+ MappingID = CustomMappingID;
break;
}
case TargetOpcode::G_STORE:
// Check if that store is fed by fp instructions.
if (OpRegBankIdx[0] == PMI_FirstGPR) {
Register VReg = MI.getOperand(0).getReg();
- if (!VReg)
- break;
- MachineInstr *DefMI = MRI.getVRegDef(VReg);
- if (onlyDefinesFP(*DefMI, MRI, TRI))
- OpRegBankIdx[0] = PMI_FirstFPR;
- break;
+ if (VReg) {
+ MachineInstr *DefMI = MRI.getVRegDef(VReg);
+ if (onlyDefinesFP(*DefMI, MRI, TRI)) {
+ OpRegBankIdx[0] = PMI_FirstFPR;
+ break;
+ }
+ }
+
+ // On GPR, extend any store < 32bits to 32bit.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (Ty.isScalar() && Ty.getSizeInBits() < 32)
+ MappingID = CustomMappingID;
}
break;
case TargetOpcode::G_INDEXED_STORE:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
index a93d27f9b06cb..cbc87106e02f8 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
@@ -45,8 +45,7 @@ define i8 @load_atomic_i8_aligned_monotonic_const(ptr readonly %ptr) {
define i8 @load_atomic_i8_aligned_acquire(ptr %ptr) {
; GISEL-LABEL: load_atomic_i8_aligned_acquire:
-; GISEL: add x8, x0, #4
-; GISEL: ldaprb w0, [x8]
+; GISEL: ldapurb w0, [x0, #4]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i8_aligned_acquire:
; SDAG-AVOIDLDAPUR: add x8, x0, #4
@@ -61,8 +60,7 @@ define i8 @load_atomic_i8_aligned_acquire(ptr %ptr) {
define i8 @load_atomic_i8_aligned_acquire_const(ptr readonly %ptr) {
; GISEL-LABEL: load_atomic_i8_aligned_acquire_const:
-; GISEL: add x8, x0, #4
-; GISEL: ldaprb w0, [x8]
+; GISEL: ldapurb w0, [x0, #4]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i8_aligned_acquire_const:
; SDAG-AVOIDLDAPUR: add x8, x0, #4
@@ -127,8 +125,7 @@ define i16 @load_atomic_i16_aligned_monotonic_const(ptr readonly %ptr) {
define i16 @load_atomic_i16_aligned_acquire(ptr %ptr) {
; GISEL-LABEL: load_atomic_i16_aligned_acquire:
-; GISEL: add x8, x0, #8
-; GISEL: ldaprh w0, [x8]
+; GISEL: ldapurh w0, [x0, #8]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i16_aligned_acquire:
; SDAG-AVOIDLDAPUR: add x8, x0, #8
@@ -143,8 +140,7 @@ define i16 @load_atomic_i16_aligned_acquire(ptr %ptr) {
define i16 @load_atomic_i16_aligned_acquire_const(ptr readonly %ptr) {
; GISEL-LABEL: load_atomic_i16_aligned_acquire_const:
-; GISEL: add x8, x0, #8
-; GISEL: ldaprh w0, [x8]
+; GISEL: ldapurh w0, [x0, #8]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i16_aligned_acquire_const:
; SDAG-AVOIDLDAPUR: add x8, x0, #8
@@ -437,8 +433,7 @@ define i8 @load_atomic_i8_unaligned_monotonic_const(ptr readonly %ptr) {
define i8 @load_atomic_i8_unaligned_acquire(ptr %ptr) {
; GISEL-LABEL: load_atomic_i8_unaligned_acquire:
-; GISEL: add x8, x0, #4
-; GISEL: ldaprb w0, [x8]
+; GISEL: ldapurb w0, [x0, #4]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i8_unaligned_acquire:
; SDAG-AVOIDLDAPUR: add x8, x0, #4
@@ -453,8 +448,7 @@ define i8 @load_atomic_i8_unaligned_acquire(ptr %ptr) {
define i8 @load_atomic_i8_unaligned_acquire_const(ptr readonly %ptr) {
; GISEL-LABEL: load_atomic_i8_unaligned_acquire_const:
-; GISEL: add x8, x0, #4
-; GISEL: ldaprb w0, [x8]
+; GISEL: ldapurb w0, [x0, #4]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i8_unaligned_acquire_const:
; SDAG-AVOIDLDAPUR: add x8, x0, #4
@@ -904,8 +898,7 @@ define i128 @load_atomic_i128_unaligned_seq_cst_const(ptr readonly %ptr) {
define i8 @load_atomic_i8_from_gep() {
; GISEL-LABEL: load_atomic_i8_from_gep:
; GISEL: bl init
-; GISEL: add x8, x8, #1
-; GISEL: ldaprb w0, [x8]
+; GISEL: ldapurb w0, [x8, #1]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i8_from_gep:
; SDAG-AVOIDLDAPUR: bl init
@@ -925,8 +918,7 @@ define i8 @load_atomic_i8_from_gep() {
define i16 @load_atomic_i16_from_gep() {
; GISEL-LABEL: load_atomic_i16_from_gep:
; GISEL: bl init
-; GISEL: add x8, x8, #2
-; GISEL: ldaprh w0, [x8]
+; GISEL: ldapurh w0, [x8, #2]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i16_from_gep:
; SDAG-AVOIDLDAPUR: bl init
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index 3f51ec747182a..123df841402fc 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -1016,11 +1016,9 @@ define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: ldrb w9, [x0, #4095]
-; CHECK-NOLSE-O0-NEXT: add x8, x0, w1, sxtw
-; CHECK-NOLSE-O0-NEXT: ldrb w8, [x8]
+; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0, w1, sxtw]
; CHECK-NOLSE-O0-NEXT: add w8, w8, w9, uxtb
-; CHECK-NOLSE-O0-NEXT: subs x9, x0, #256
-; CHECK-NOLSE-O0-NEXT: ldrb w9, [x9]
+; CHECK-NOLSE-O0-NEXT: ldurb w9, [x0, #-256]
; CHECK-NOLSE-O0-NEXT: add w8, w8, w9, uxtb
; CHECK-NOLSE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O0-NEXT: ldrb w9, [x9]
@@ -1030,11 +1028,9 @@ define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) #0 {
; CHECK-OUTLINE-O0-LABEL: atomic_load_relaxed_8:
; CHECK-OUTLINE-O0: ; %bb.0:
; CHECK-OUTLINE-O0-NEXT: ldrb w9, [x0, #4095]
-; CHECK-OUTLINE-O0-NEXT: add x8, x0, w1, sxtw
-; CHECK-OUTLINE-O0-NEXT: ldrb w8, [x8]
+; CHECK-OUTLINE-O0-NEXT: ldrb w8, [x0, w1, sxtw]
; CHECK-OUTLINE-O0-NEXT: add w8, w8, w9, uxtb
-; CHECK-OUTLINE-O0-NEXT: subs x9, x0, #256
-; CHECK-OUTLINE-O0-NEXT: ldrb w9, [x9]
+; CHECK-OUTLINE-O0-NEXT: ldurb w9, [x0, #-256]
; CHECK-OUTLINE-O0-NEXT: add w8, w8, w9, uxtb
; CHECK-OUTLINE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O0-NEXT: ldrb w9, [x9]
@@ -1056,11 +1052,9 @@ define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) #0 {
; CHECK-LSE-O0-LABEL: atomic_load_relaxed_8:
; CHECK-LSE-O0: ; %bb.0:
; CHECK-LSE-O0-NEXT: ldrb w9, [x0, #4095]
-; CHECK-LSE-O0-NEXT: add x8, x0, w1, sxtw
-; CHECK-LSE-O0-NEXT: ldrb w8, [x8]
+; CHECK-LSE-O0-NEXT: ldrb w8, [x0, w1, sxtw]
; CHECK-LSE-O0-NEXT: add w8, w8, w9, uxtb
-; CHECK-LSE-O0-NEXT: subs x9, x0, #256
-; CHECK-LSE-O0-NEXT: ldrb w9, [x9]
+; CHECK-LSE-O0-NEXT: ldurb w9, [x0, #-256]
; CHECK-LSE-O0-NEXT: add w8, w8, w9, uxtb
; CHECK-LSE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O0-NEXT: ldrb w9, [x9]
@@ -1112,11 +1106,9 @@ define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: ldrh w9, [x0, #8190]
-; CHECK-NOLSE-O0-NEXT: add x8, x0, w1, sxtw #1
-; CHECK-NOLSE-O0-NEXT: ldrh w8, [x8]
+; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0, w1, sxtw #1]
; CHECK-NOLSE-O0-NEXT: add w8, w8, w9, uxth
-; CHECK-NOLSE-O0-NEXT: subs x9, x0, #256
-; CHECK-NOLSE-O0-NEXT: ldrh w9, [x9]
+; CHECK-NOLSE-O0-NEXT: ldurh w9, [x0, #-256]
; CHECK-NOLSE-O0-NEXT: add w8, w8, w9, uxth
; CHECK-NOLSE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O0-NEXT: ldrh w9, [x9]
@@ -1126,11 +1118,9 @@ define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) #0 {
; CHECK-OUTLINE-O0-LABEL: atomic_load_relaxed_16:
; CHECK-OUTLINE-O0: ; %bb.0:
; CHECK-OUTLINE-O0-NEXT: ldrh w9, [x0, #8190]
-; CHECK-OUTLINE-O0-NEXT: add x8, x0, w1, sxtw #1
-; CHECK-OUTLINE-O0-NEXT: ldrh w8, [x8]
+; CHECK-OUTLINE-O0-NEXT: ldrh w8, [x0, w1, sxtw #1]
; CHECK-OUTLINE-O0-NEXT: add w8, w8, w9, uxth
-; CHECK-OUTLINE-O0-NEXT: subs x9, x0, #256
-; CHECK-OUTLINE-O0-NEXT: ldrh w9, [x9]
+; CHECK-OUTLINE-O0-NEXT: ldurh w9, [x0, #-256]
; CHECK-OUTLINE-O0-NEXT: add w8, w8, w9, uxth
; CHECK-OUTLINE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O0-NEXT: ldrh w9, [x9]
@@ -1152,11 +1142,9 @@ define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) #0 {
; CHECK-LSE-O0-LABEL: atomic_load_relaxed_16:
; CHECK-LSE-O0: ; %bb.0:
; CHECK-LSE-O0-NEXT: ldrh w9, [x0, #8190]
-; CHECK-LSE-O0-NEXT: add x8, x0, w1, sxtw #1
-; CHECK-LSE-O0-NEXT: ldrh w8, [x8]
+; CHECK-LSE-O0-NEXT: ldrh w8, [x0, w1, sxtw #1]
; CHECK-LSE-O0-NEXT: add w8, w8, w9, uxth
-; CHECK-LSE-O0-NEXT: subs x9, x0, #256
-; CHECK-LSE-O0-NEXT: ldrh w9, [x9]
+; CHECK-LSE-O0-NEXT: ldurh w9, [x0, #-256]
; CHECK-LSE-O0-NEXT: add w8, w8, w9, uxth
; CHECK-LSE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O0-NEXT: ldrh w9, [x9]
@@ -1670,6 +1658,7 @@ define i32 @load_zext(ptr %p8, ptr %p16) {
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: ldarb w8, [x0]
; CHECK-NOLSE-O1-NEXT: ldrh w9, [x1]
+; CHECK-NOLSE-O1-NEXT: uxth w9, w9
; CHECK-NOLSE-O1-NEXT: add w0, w9, w8, uxtb
; CHECK-NOLSE-O1-NEXT: ret
;
@@ -1677,6 +1666,7 @@ define i32 @load_zext(ptr %p8, ptr %p16) {
; CHECK-OUTLINE-O1: ; %bb.0:
; CHECK-OUTLINE-O1-NEXT: ldarb w8, [x0]
; CHECK-OUTLINE-O1-NEXT: ldrh w9, [x1]
+; CHECK-OUTLINE-O1-NEXT: uxth w9, w9
; CHECK-OUTLINE-O1-NEXT: add w0, w9, w8, uxtb
; CHECK-OUTLINE-O1-NEXT: ret
;
@@ -1684,6 +1674,7 @@ define i32 @load_zext(ptr %p8, ptr %p16) {
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: ldarb w9, [x0]
; CHECK-NOLSE-O0-NEXT: ldrh w8, [x1]
+; CHECK-NOLSE-O0-NEXT: uxth w8, w8
; CHECK-NOLSE-O0-NEXT: add w0, w8, w9, uxtb
; CHECK-NOLSE-O0-NEXT: ret
;
@@ -1691,6 +1682,7 @@ define i32 @load_zext(ptr %p8, ptr %p16) {
; CHECK-OUTLINE-O0: ; %bb.0:
; CHECK-OUTLINE-O0-NEXT: ldarb w9, [x0]
; CHECK-OUTLINE-O0-NEXT: ldrh w8, [x1]
+; CHECK-OUTLINE-O0-NEXT: uxth w8, w8
; CHECK-OUTLINE-O0-NEXT: add w0, w8, w9, uxtb
; CHECK-OUTLINE-O0-NEXT: ret
;
@@ -1698,6 +1690,7 @@ define i32 @load_zext(ptr %p8, ptr %p16) {
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldaprb w8, [x0]
; CHECK-LSE-O1-NEXT: ldrh w9, [x1]
+; CHECK-LSE-O1-NEXT: uxth w9, w9
; CHECK-LSE-O1-NEXT: add w0, w9, w8, uxtb
; CHECK-LSE-O1-NEXT: ret
;
@@ -1705,6 +1698,7 @@ define i32 @load_zext(ptr %p8, ptr %p16) {
; CHECK-LSE-O0: ; %bb.0:
; CHECK-LSE-O0-NEXT: ldaprb w9, [x0]
; CHECK-LSE-O0-NEXT: ldrh w8, [x1]
+; CHECK-LSE-O0-NEXT: uxth w8, w8
; CHECK-LSE-O0-NEXT: add w0, w8, w9, uxtb
; CHECK-LSE-O0-NEXT: ret
%val1.8 = load atomic i8, ptr %p8 acquire, align 1
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index fadd8c38f414d..98840bb4b68e2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -377,8 +377,8 @@ define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDRBBui renamable $x0, 4095, pcsections !0 :: (load monotonic (s8) from %ir.ptr_unsigned)
- ; CHECK-NEXT: renamable $w9 = LDRBBroW renamable $x0, killed renamable $w1, 1, 0 :: (load unordered (s8) from %ir.ptr_regoff)
- ; CHECK-NEXT: renamable $w10 = LDURBBi renamable $x0, -256 :: (load monotonic (s8) from %ir.ptr_unscaled)
+ ; CHECK-NEXT: renamable $w9 = LDRBBroW renamable $x0, killed renamable $w1, 1, 0, pcsections !0 :: (load unordered (s8) from %ir.ptr_regoff)
+ ; CHECK-NEXT: renamable $w10 = LDURBBi renamable $x0, -256, pcsections !0 :: (load monotonic (s8) from %ir.ptr_unscaled)
; CHECK-NEXT: renamable $w8 = ADDWrx killed renamable $w9, killed renamable $w8, 0, pcsections !0
; CHECK-NEXT: renamable $x9 = ADDXri killed renamable $x0, 291, 12
; CHECK-NEXT: renamable $w8 = ADDWrx killed renamable $w8, killed renamable $w10, 0, pcsections !0
@@ -409,8 +409,8 @@ define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDRHHui renamable $x0, 4095, pcsections !0 :: (load monotonic (s16) from %ir.ptr_unsigned)
- ; CHECK-NEXT: renamable $w9 = LDRHHroW renamable $x0, killed renamable $w1, 1, 1 :: (load unordered (s16) from %ir.ptr_regoff)
- ; CHECK-NEXT: renamable $w10 = LDURHHi renamable $x0, -256 :: (load monotonic (s16) from %ir.ptr_unscaled)
+ ; CHECK-NEXT: renamable $w9 = LDRHHroW renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (load unordered (s16) from %ir.ptr_regoff)
+ ; CHECK-NEXT: renamable $w10 = LDURHHi renamable $x0, -256, pcsections !0 :: (load monotonic (s16) from %ir.ptr_unscaled)
; CHECK-NEXT: renamable $w8 = ADDWrx killed renamable $w9, killed renamable $w8, 8, pcsections !0
; CHECK-NEXT: renamable $x9 = ADDXri killed renamable $x0, 291, 12
; CHECK-NEXT: renamable $w8 = ADDWrx killed renamable $w8, killed renamable $w10, 8, pcsections !0
@@ -623,6 +623,7 @@ define i32 @load_zext(ptr %p8, ptr %p16) {
; CHECK-NOLSE-NEXT: {{ $}}
; CHECK-NOLSE-NEXT: renamable $w8 = LDARB killed renamable $x0, pcsections !0 :: (load acquire (s8) from %ir.p8)
; CHECK-NOLSE-NEXT: renamable $w9 = LDRHHui killed renamable $x1, 0, pcsections !0 :: (load unordered (s16) from %ir.p16)
+ ; CHECK-NOLSE-NEXT: renamable $w9 = UBFMWri killed renamable $w9, 0, 15
; CHECK-NOLSE-NEXT: renamable $w0 = ADDWrx killed renamable $w9, killed renamable $w8, 0, pcsections !0
; CHECK-NOLSE-NEXT: RET undef $lr, implicit $w0
;
@@ -632,6 +633,7 @@ define i32 @load_zext(ptr %p8, ptr %p16) {
; CHECK-LDAPR-NEXT: {{ $}}
; CHECK-LDAPR-NEXT: renamable $w8 = LDAPRB killed renamable $x0, pcsections !0 :: (load acquire (s8) from %ir.p8)
; CHECK-LDAPR-NEXT: renamable $w9 = LDRHHui killed renamable $x1, 0, pcsections !0 :: (load unordered (s16) from %ir.p16)
+ ; CHECK-LDAPR-NEXT: renamable $w9 = UBFMWri killed renamable $w9, 0, 15
; CHECK-LDAPR-NEXT: renamable $w0 = ADDWrx killed renamable $w9, killed renamable $w8, 0, pcsections !0
; CHECK-LDAPR-NEXT: RET undef $lr, implicit $w0
%val1.8 = load atomic i8, ptr %p8 acquire, align 1, !pcsections !0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/sink-and-fold-illegal-shift.ll b/llvm/test/CodeGen/AArch64/GlobalISel/sink-and-fold-illegal-shift.ll
index b9892fc31bedb..2160ff973425d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/sink-and-fold-illegal-shift.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/sink-and-fold-illegal-shift.ll
@@ -7,8 +7,8 @@ target triple = "aarch64-linux"
define void @f(ptr %p, i64 %i) optsize {
; CHECK-LABEL: f:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, x1, asr #32
-; CHECK-NEXT: strb wzr, [x8]
+; CHECK-NEXT: asr x8, x1, #32
+; CHECK-NEXT: strb wzr, [x0, x8]
; CHECK-NEXT: ret
%d = ashr i64 %i, 32
%a = getelementptr i8, ptr %p, i64 %d
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
index 9e3a0fc30d8fa..3a6d2e045cd70 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
@@ -60,17 +60,31 @@ entry:
}
define void @memset_10_zeroval(ptr %dst) {
-; GISel-WITHOUT-MOPS-LABEL: memset_10_zeroval:
-; GISel-WITHOUT-MOPS: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-NEXT: str xzr, [x0]
-; GISel-WITHOUT-MOPS-NEXT: strh wzr, [x0, #8]
-; GISel-WITHOUT-MOPS-NEXT: ret
+; GISel-WITHOUT-MOPS-O0-LABEL: memset_10_zeroval:
+; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT: str xzr, [x0]
+; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, wzr
+; GISel-WITHOUT-MOPS-O0-NEXT: strh w8, [x0, #8]
+; GISel-WITHOUT-MOPS-O0-NEXT: ret
;
-; GISel-MOPS-LABEL: memset_10_zeroval:
-; GISel-MOPS: // %bb.0: // %entry
-; GISel-MOPS-NEXT: str xzr, [x0]
-; GISel-MOPS-NEXT: strh wzr, [x0, #8]
-; GISel-MOPS-NEXT: ret
+; GISel-WITHOUT-MOPS-O3-LABEL: memset_10_zeroval:
+; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT: str xzr, [x0]
+; GISel-WITHOUT-MOPS-O3-NEXT: strh wzr, [x0, #8]
+; GISel-WITHOUT-MOPS-O3-NEXT: ret
+;
+; GISel-MOPS-O0-LABEL: memset_10_zeroval:
+; GISel-MOPS-O0: // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT: str xzr, [x0]
+; GISel-MOPS-O0-NEXT: mov w8, wzr
+; GISel-MOPS-O0-NEXT: strh w8, [x0, #8]
+; GISel-MOPS-O0-NEXT: ret
+;
+; GISel-MOPS-O3-LABEL: memset_10_zeroval:
+; GISel-MOPS-O3: // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT: str xzr, [x0]
+; GISel-MOPS-O3-NEXT: strh wzr, [x0, #8]
+; GISel-MOPS-O3-NEXT: ret
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_zeroval:
; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry
@@ -89,17 +103,31 @@ entry:
}
define void @memset_10_zeroval_volatile(ptr %dst) {
-; GISel-WITHOUT-MOPS-LABEL: memset_10_zeroval_volatile:
-; GISel-WITHOUT-MOPS: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-NEXT: str xzr, [x0]
-; GISel-WITHOUT-MOPS-NEXT: strh wzr, [x0, #8]
-; GISel-WITHOUT-MOPS-NEXT: ret
+; GISel-WITHOUT-MOPS-O0-LABEL: memset_10_zeroval_volatile:
+; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O0-NEXT: str xzr, [x0]
+; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, wzr
+; GISel-WITHOUT-MOPS-O0-NEXT: strh w8, [x0, #8]
+; GISel-WITHOUT-MOPS-O0-NEXT: ret
;
-; GISel-MOPS-LABEL: memset_10_zeroval_volatile:
-; GISel-MOPS: // %bb.0: // %entry
-; GISel-MOPS-NEXT: str xzr, [x0]
-; GISel-MOPS-NEXT: strh wzr, [x0, #8]
-; GISel-MOPS-NEXT: ret
+; GISel-WITHOUT-MOPS-O3-LABEL: memset_10_zeroval_volatile:
+; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT: str xzr, [x0]
+; GISel-WITHOUT-MOPS-O3-NEXT: strh wzr, [x0, #8]
+; GISel-WITHOUT-MOPS-O3-NEXT: ret
+;
+; GISel-MOPS-O0-LABEL: memset_10_zeroval_volatile:
+; GISel-MOPS-O0: // %bb.0: // %entry
+; GISel-MOPS-O0-NEXT: str xzr, [x0]
+; GISel-MOPS-O0-NEXT: mov w8, wzr
+; GISel-MOPS-O0-NEXT: strh w8, [x0, #8]
+; GISel-MOPS-O0-NEXT: ret
+;
+; GISel-MOPS-O3-LABEL: memset_10_zeroval_volatile:
+; GISel-MOPS-O3: // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT: str xzr, [x0]
+; GISel-MOPS-O3-NEXT: strh wzr, [x0, #8]
+; GISel-MOPS-O3-NEXT: ret
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_zeroval_volatile:
; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry
diff --git a/llvm/test/CodeGen/AArch64/cpa-globalisel.ll b/llvm/test/CodeGen/AArch64/cpa-globalisel.ll
index c9b48b9685df3..5cad4c0db02db 100644
--- a/llvm/test/CodeGen/AArch64/cpa-globalisel.ll
+++ b/llvm/test/CodeGen/AArch64/cpa-globalisel.ll
@@ -652,17 +652,14 @@ define hidden void @multidim() {
; CHECK-CPA-O0-NEXT: mov w10, w9
; CHECK-CPA-O0-NEXT: ldrh w8, [x8, :lo12:b]
; CHECK-CPA-O0-NEXT: add w9, w8, #1
-; CHECK-CPA-O0-NEXT: // implicit-def: $x8
-; CHECK-CPA-O0-NEXT: mov w8, w9
-; CHECK-CPA-O0-NEXT: sxtw x9, w8
; CHECK-CPA-O0-NEXT: mov w8, #2 // =0x2
; CHECK-CPA-O0-NEXT: mov w11, w8
; CHECK-CPA-O0-NEXT: adrp x8, a
; CHECK-CPA-O0-NEXT: add x8, x8, :lo12:a
; CHECK-CPA-O0-NEXT: addpt x8, x8, x11
; CHECK-CPA-O0-NEXT: addpt x8, x8, x10, lsl #1
-; CHECK-CPA-O0-NEXT: addpt x8, x8, x9
-; CHECK-CPA-O0-NEXT: ldrb w8, [x8]
+; CHECK-CPA-O0-NEXT: ldrb w8, [x8, w9, sxtw]
+; CHECK-CPA-O0-NEXT: uxtb w8, w8
; CHECK-CPA-O0-NEXT: cbz w8, .LBB14_2
; CHECK-CPA-O0-NEXT: b .LBB14_1
; CHECK-CPA-O0-NEXT: .LBB14_1:
@@ -710,8 +707,8 @@ define hidden void @multidim() {
; CHECK-NOCPA-O0-NEXT: add x8, x8, :lo12:a
; CHECK-NOCPA-O0-NEXT: add x8, x8, #2
; CHECK-NOCPA-O0-NEXT: add x8, x8, x10, lsl #1
-; CHECK-NOCPA-O0-NEXT: add x8, x8, w9, sxtw
-; CHECK-NOCPA-O0-NEXT: ldrb w8, [x8]
+; CHECK-NOCPA-O0-NEXT: ldrb w8, [x8, w9, sxtw]
+; CHECK-NOCPA-O0-NEXT: uxtb w8, w8
; CHECK-NOCPA-O0-NEXT: cbz w8, .LBB14_2
; CHECK-NOCPA-O0-NEXT: b .LBB14_1
; CHECK-NOCPA-O0-NEXT: .LBB14_1:
diff --git a/llvm/test/CodeGen/AArch64/load-store-forwarding.ll b/llvm/test/CodeGen/AArch64/load-store-forwarding.ll
index 5be4b4d554360..c58d4b81ec693 100644
--- a/llvm/test/CodeGen/AArch64/load-store-forwarding.ll
+++ b/llvm/test/CodeGen/AArch64/load-store-forwarding.ll
@@ -106,3 +106,25 @@ entry:
store i8 %tr, ptr %q, align 1
ret i32 0
}
+
+define i32 @load_i16_store_i8_freeze(ptr %p, ptr %q) {
+; CHECK-LE-LABEL: load_i16_store_i8_freeze:
+; CHECK-LE: // %bb.0: // %entry
+; CHECK-LE-NEXT: ldrh w8, [x0]
+; CHECK-LE-NEXT: mov w0, wzr
+; CHECK-LE-NEXT: strb w8, [x1]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: load_i16_store_i8_freeze:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: ldrh w8, [x0]
+; CHECK-BE-NEXT: mov w0, wzr
+; CHECK-BE-NEXT: strb w8, [x1]
+; CHECK-BE-NEXT: ret
+entry:
+ %l = load i16, ptr %p, align 4
+ %fr = freeze i16 %l
+ %tr = trunc i16 %fr to i8
+ store i8 %tr, ptr %q, align 1
+ ret i32 0
+}
More information about the llvm-commits
mailing list