[llvm] [AArch64][GISel] Always fold G_SHL into addressing mode where possible, unless the subtarget has addr-lsl-slow-14 (PR #96603)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 11 03:32:04 PDT 2024
https://github.com/Him188 updated https://github.com/llvm/llvm-project/pull/96603
>From e813b6d4d8ae435056440c5f2dd5d7b891013bcb Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Mon, 24 Jun 2024 11:43:56 +0100
Subject: [PATCH 1/2] [AArch64][GISel] Addd test cases for folding shifts into
load/store addressing modes (NFC)
---
.../GlobalISel/load-addressing-modes.mir | 325 ++++++++++++--
.../GlobalISel/store-addressing-modes.mir | 206 +++++++--
.../CodeGen/AArch64/aarch64-fold-lslfast.ll | 406 +++++++++++++++---
3 files changed, 794 insertions(+), 143 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
index 7921de6ce2362..3af2aaf57eed8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
@@ -1,22 +1,30 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FAST --allow-unused-prefixes
+# RUN: llc -mtriple=aarch64-unknown-unknown -mattr=+addr-lsl-slow-14 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SLOW --allow-unused-prefixes
--- |
define void @ldrxrox_breg_oreg(ptr %addr) { ret void }
define void @ldrdrox_breg_oreg(ptr %addr) { ret void }
define void @more_than_one_use(ptr %addr) { ret void }
+ define void @ldrhrox_shl(ptr %addr) { ret void }
+ define void @ldrwrox_shl(ptr %addr) { ret void }
define void @ldrxrox_shl(ptr %addr) { ret void }
define void @ldrdrox_shl(ptr %addr) { ret void }
+ define void @ldrqrox_shl(ptr %addr) { ret void }
define void @ldrxrox_mul_rhs(ptr %addr) { ret void }
define void @ldrdrox_mul_rhs(ptr %addr) { ret void }
define void @ldrxrox_mul_lhs(ptr %addr) { ret void }
define void @ldrdrox_mul_lhs(ptr %addr) { ret void }
define void @mul_not_pow_2(ptr %addr) { ret void }
define void @mul_wrong_pow_2(ptr %addr) { ret void }
- define void @more_than_one_use_shl_1(ptr %addr) { ret void }
- define void @more_than_one_use_shl_2(ptr %addr) { ret void }
- define void @more_than_one_use_shl_lsl_fast(ptr %addr) { ret void }
- define void @more_than_one_use_shl_lsl_slow(ptr %addr) { ret void }
+ define void @more_than_one_use_shl_fallback(ptr %addr) { ret void }
+ define void @ldrxrox_more_than_one_mem_use_shl(ptr %addr) { ret void }
+ define void @ldrxrox_more_than_one_use_shl(ptr %addr) { ret void }
+ define void @ldrhrox_more_than_one_mem_use_shl(ptr %addr) { ret void }
+ define void @ldrhrox_more_than_one_use_shl(ptr %addr) { ret void }
+ define void @ldrwrox_more_than_one_use_shl(ptr %addr) { ret void }
+ define void @ldrqrox_more_than_one_use_shl(ptr %addr) { ret void }
+ define void @more_than_one_use_shl_lsl(ptr %addr) { ret void }
define void @more_than_one_use_shl_minsize(ptr %addr) #0 { ret void }
define void @ldrwrox(ptr %addr) { ret void }
define void @ldrsrox(ptr %addr) { ret void }
@@ -113,6 +121,67 @@ body: |
...
---
+name: ldrhrox_shl
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ liveins: $w1, $x0
+
+ ; CHECK-LABEL: name: ldrhrox_shl
+ ; CHECK: liveins: $x0, $x1, $x2, $w1, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
+ ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+ ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
+ ; CHECK-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
+ ; CHECK-NEXT: RET_ReallyLR implicit [[LDRHHroX]]
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s32) = COPY $w1
+ %15:gpr(s64) = G_CONSTANT i64 9
+ %3:gpr(s32) = G_LSHR %1, %15(s64)
+ %4:gpr(s64) = G_ZEXT %3(s32)
+ %5:gpr(s64) = G_CONSTANT i64 255
+ %6:gpr(s64) = G_AND %4, %5
+ %13:gpr(s64) = G_CONSTANT i64 1
+ %8:gpr(s64) = G_SHL %6, %13(s64)
+ %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
+ %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
+ RET_ReallyLR implicit %12
+...
+---
+name: ldrwrox_shl
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: ldrwrox_shl
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY1]], [[COPY]], 0, 1 :: (load (s32) from %ir.addr)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[LDRWroX]]
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 2
+ %2:gpr(s64) = G_SHL %0, %1(s64)
+ %3:gpr(p0) = COPY $x1
+ %4:gpr(p0) = G_PTR_ADD %3, %2
+ %5:gpr(s32) = G_LOAD %4(p0) :: (load (s32) from %ir.addr)
+ RET_ReallyLR implicit %5
+...
+---
name: ldrxrox_shl
alignment: 4
legalized: true
@@ -167,6 +236,32 @@ body: |
$d2 = COPY %5(s64)
RET_ReallyLR implicit $d2
+...
+---
+name: ldrqrox_shl
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $d2
+ ; CHECK-LABEL: name: ldrqrox_shl
+ ; CHECK: liveins: $x0, $x1, $d2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK-NEXT: [[LDRQroX:%[0-9]+]]:fpr128 = LDRQroX [[COPY1]], [[COPY]], 0, 1 :: (load (s128) from %ir.addr)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[LDRQroX]]
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:gpr(s64) = G_SHL %0, %1(s64)
+ %3:gpr(p0) = COPY $x1
+ %4:gpr(p0) = G_PTR_ADD %3, %2
+ %5:fpr(s128) = G_LOAD %4(p0) :: (load (s128) from %ir.addr)
+ RET_ReallyLR implicit %5
+
...
---
name: ldrxrox_mul_rhs
@@ -352,7 +447,7 @@ body: |
# Show that we can still fall back to the register-register addressing
# mode when we fail to pull in the shift.
-name: more_than_one_use_shl_1
+name: more_than_one_use_shl_fallback
alignment: 4
legalized: true
regBankSelected: true
@@ -361,19 +456,19 @@ machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $x2
- ; CHECK-LABEL: name: more_than_one_use_shl_1
+ ; CHECK-LABEL: name: more_than_one_use_shl_fallback
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
- ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60
+ ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 62, 61
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[UBFMXri]], 0, 0 :: (load (s64) from %ir.addr)
- ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0
+ ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 2, 0
; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[ADDXri]]
; CHECK-NEXT: $x2 = COPY [[ADDXrr]]
; CHECK-NEXT: RET_ReallyLR implicit $x2
%0:gpr(s64) = COPY $x0
- %1:gpr(s64) = G_CONSTANT i64 3
+ %1:gpr(s64) = G_CONSTANT i64 2
%2:gpr(s64) = G_SHL %0, %1(s64)
%3:gpr(p0) = COPY $x1
%4:gpr(p0) = G_PTR_ADD %3, %2
@@ -385,10 +480,48 @@ body: |
...
---
-# Show that when the GEP is used outside a memory op, we don't do any
-# folding at all.
+name: ldrxrox_more_than_one_mem_use_shl
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: ldrxrox_more_than_one_mem_use_shl
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
+ ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+ ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
+ ; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s64))
+ ; CHECK-NEXT: [[LDRXroX1:%[0-9]+]]:gpr64 = LDRXroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s64))
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[LDRXroX1]]
+ ; CHECK-NEXT: RET_ReallyLR implicit [[ADDXrr]]
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s32) = COPY $w1
+ %15:gpr(s64) = G_CONSTANT i64 9
+ %3:gpr(s32) = G_LSHR %1, %15(s64)
+ %4:gpr(s64) = G_ZEXT %3(s32)
+ %5:gpr(s64) = G_CONSTANT i64 255
+ %6:gpr(s64) = G_AND %4, %5
+ %13:gpr(s64) = G_CONSTANT i64 3
+ %8:gpr(s64) = G_SHL %6, %13(s64)
+ %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
+ %12:gpr(s64) = G_LOAD %9(p0) :: (load (s64))
+ %17:gpr(s64) = G_LOAD %9(p0) :: (load (s64))
+ %18:gpr(s64) = G_ADD %12, %17
+ RET_ReallyLR implicit %18
-name: more_than_one_use_shl_2
+...
+---
+# Show that when the GEP is used both inside and outside a memory op, we only fold the memory op.
+
+name: ldrxrox_more_than_one_use_shl
alignment: 4
legalized: true
regBankSelected: true
@@ -397,7 +530,7 @@ machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $x2
- ; CHECK-LABEL: name: more_than_one_use_shl_2
+ ; CHECK-LABEL: name: ldrxrox_more_than_one_use_shl
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
@@ -426,10 +559,90 @@ body: |
...
---
-# Show that when we have a fastpath for shift-left, we perform the folding
-# if it has more than one use.
+# Fold SHL into LSL for mem ops. Do not fold if the target has LSLSLOW14.
+name: ldrhrox_more_than_one_mem_use_shl
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ liveins: $w1, $x0
+
+ ; CHECK-LABEL: name: ldrhrox_more_than_one_mem_use_shl
+ ; CHECK: liveins: $x0, $x1, $x2, $w1, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
+ ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+ ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
+ ; CHECK-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
+ ; CHECK-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
+ ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
+ ; CHECK-NEXT: RET_ReallyLR implicit [[ADDWrr]]
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s32) = COPY $w1
+ %15:gpr(s64) = G_CONSTANT i64 9
+ %3:gpr(s32) = G_LSHR %1, %15(s64)
+ %4:gpr(s64) = G_ZEXT %3(s32)
+ %5:gpr(s64) = G_CONSTANT i64 255
+ %6:gpr(s64) = G_AND %4, %5
+ %13:gpr(s64) = G_CONSTANT i64 1
+ %8:gpr(s64) = G_SHL %6, %13(s64)
+ %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
+ %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
+ %17:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
+ %18:gpr(s32) = G_ADD %12, %17
+ RET_ReallyLR implicit %18
+...
+---
+# Fold SHL into LSL for memory ops. Do not fold if the target has LSLSLOW14.
+name: ldrhrox_more_than_one_use_shl
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ liveins: $w1, $x0
-name: more_than_one_use_shl_lsl_fast
+ ; CHECK-LABEL: name: ldrhrox_more_than_one_use_shl
+ ; CHECK: liveins: $x0, $x1, $x2, $w1, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
+ ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+ ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
+ ; CHECK-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
+ ; CHECK-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
+ ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
+ ; CHECK-NEXT: RET_ReallyLR implicit [[ADDWrr]]
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s32) = COPY $w1
+ %15:gpr(s64) = G_CONSTANT i64 9
+ %3:gpr(s32) = G_LSHR %1, %15(s64)
+ %4:gpr(s64) = G_ZEXT %3(s32)
+ %5:gpr(s64) = G_CONSTANT i64 255
+ %6:gpr(s64) = G_AND %4, %5
+ %13:gpr(s64) = G_CONSTANT i64 1
+ %8:gpr(s64) = G_SHL %6, %13(s64)
+ %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
+ %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
+ %17:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
+ %18:gpr(s32) = G_ADD %12, %17
+ RET_ReallyLR implicit %18
+...
+---
+# Fold SHL into LSL for memory ops.
+name: ldrwrox_more_than_one_use_shl
alignment: 4
legalized: true
regBankSelected: true
@@ -438,33 +651,81 @@ machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $x2
- ; CHECK-LABEL: name: more_than_one_use_shl_lsl_fast
+ ; CHECK-LABEL: name: ldrwrox_more_than_one_use_shl
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
- ; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr)
- ; CHECK-NEXT: [[LDRXroX1:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr)
- ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[LDRXroX1]]
- ; CHECK-NEXT: $x2 = COPY [[ADDXrr]]
+ ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 62, 61
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
+ ; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[ADDXrr]], 0 :: (load (s32) from %ir.addr)
+ ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[LDRWui]], 0
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+ ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 2, 0
+ ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[SUBREG_TO_REG]], [[ADDXri]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
+ ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[ADDXrr1]]
+ ; CHECK-NEXT: $x2 = COPY [[ADDXrr2]]
; CHECK-NEXT: RET_ReallyLR implicit $x2
%0:gpr(s64) = COPY $x0
- %1:gpr(s64) = G_CONSTANT i64 3
+ %1:gpr(s64) = G_CONSTANT i64 2
%2:gpr(s64) = G_SHL %0, %1(s64)
%3:gpr(p0) = COPY $x1
%4:gpr(p0) = G_PTR_ADD %3, %2
- %5:gpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr)
- %6:gpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr)
+ %20:gpr(s32) = G_LOAD %4(p0) :: (load (s32) from %ir.addr)
+ %5:gpr(s64) = G_ZEXT %20
+ %6:gpr(s64) = G_ADD %2, %1
%7:gpr(s64) = G_ADD %5, %6
- $x2 = COPY %7(s64)
+ %8:gpr(s64) = G_PTRTOINT %4
+ %9:gpr(s64) = G_ADD %8, %7
+ $x2 = COPY %9(s64)
RET_ReallyLR implicit $x2
-
...
---
-# Show that we don't fold into multiple memory ops when we don't have a
-# fastpath for shift-left.
+# Fold SHL into LSL for memory ops. Do not fold if the target has LSLSLOW14.
+name: ldrqrox_more_than_one_use_shl
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: ldrqrox_more_than_one_use_shl
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 60, 59
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
+ ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADDXrr]], 0 :: (load (s128) from %ir.addr)
+ ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 4, 0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
+ ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY3]], [[ADDXri]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
+ ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY4]], [[ADDXrr1]]
+ ; CHECK-NEXT: RET_ReallyLR implicit [[ADDXrr2]]
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:gpr(s64) = G_SHL %0, %1(s64)
+ %3:gpr(p0) = COPY $x1
+ %4:gpr(p0) = G_PTR_ADD %3, %2
+ %20:fpr(s128) = G_LOAD %4(p0) :: (load (s128) from %ir.addr)
+ %6:gpr(s64) = G_ADD %2, %1
+ %200:fpr(s64) = G_TRUNC %20
+ %2000:gpr(s64) = COPY %200
+ %7:gpr(s64) = G_ADD %2000, %6
+ %8:gpr(s64) = G_PTRTOINT %4
+ %9:gpr(s64) = G_ADD %8, %7
+ RET_ReallyLR implicit %9
+...
+---
+# Show that when we have a fastpath for shift-left, we perform the folding
+# if it has more than one use.
-name: more_than_one_use_shl_lsl_slow
+name: more_than_one_use_shl_lsl
alignment: 4
legalized: true
regBankSelected: true
@@ -473,7 +734,7 @@ machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $x2
- ; CHECK-LABEL: name: more_than_one_use_shl_lsl_slow
+ ; CHECK-LABEL: name: more_than_one_use_shl_lsl
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/store-addressing-modes.mir
index 8214b632e5f33..10f611cf8bb1c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/store-addressing-modes.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-addressing-modes.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FAST
+# RUN: llc -mtriple=aarch64-unknown-unknown -mattr=+addr-lsl-slow-14 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SLOW
--- |
define void @strxrox(ptr %addr) { ret void }
@@ -9,7 +10,11 @@
define void @strsrox(ptr %addr) { ret void }
define void @strhrox(ptr %addr) { ret void }
define void @strqrox(ptr %addr) { ret void }
- define void @shl(ptr %addr) { ret void }
+ define void @shl_fast_3(ptr %addr) { ret void }
+ define void @shl_slow_1(ptr %addr) { ret void }
+ define void @shl_slow_1_more_than_one_use(ptr %addr) { ret void }
+ define void @shl_slow_4(ptr %addr) { ret void }
+ define void @shl_slow_4_more_than_one_use(ptr %addr) { ret void }
define void @shl_p0(ptr %addr) { ret void }
...
@@ -25,10 +30,11 @@ body: |
liveins: $x0, $x1, $x2
; CHECK-LABEL: name: strxrox
; CHECK: liveins: $x0, $x1, $x2
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
- ; CHECK: STRXroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (s64) into %ir.addr)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: STRXroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (s64) into %ir.addr)
%0:gpr(p0) = COPY $x0
%1:gpr(s64) = COPY $x1
%ptr:gpr(p0) = G_PTR_ADD %0, %1
@@ -47,11 +53,12 @@ body: |
liveins: $x0, $x1, $x2
; CHECK-LABEL: name: strxrox_p0
; CHECK: liveins: $x0, $x1, $x2
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK: [[COPY2:%[0-9]+]]:gpr64all = COPY $x2
- ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
- ; CHECK: STRXroX [[COPY3]], [[COPY]], [[COPY1]], 0, 0 :: (store (p0) into %ir.addr)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
+ ; CHECK-NEXT: STRXroX [[COPY3]], [[COPY]], [[COPY1]], 0, 0 :: (store (p0) into %ir.addr)
%0:gpr(p0) = COPY $x0
%1:gpr(s64) = COPY $x1
%ptr:gpr(p0) = G_PTR_ADD %0, %1
@@ -70,10 +77,11 @@ body: |
liveins: $x0, $x1, $d2
; CHECK-LABEL: name: strdrox
; CHECK: liveins: $x0, $x1, $d2
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d2
- ; CHECK: STRDroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (s64) into %ir.addr)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $d2
+ ; CHECK-NEXT: STRDroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (s64) into %ir.addr)
%0:gpr(p0) = COPY $x0
%1:gpr(s64) = COPY $x1
%ptr:gpr(p0) = G_PTR_ADD %0, %1
@@ -92,10 +100,11 @@ body: |
liveins: $x0, $x1, $w2
; CHECK-LABEL: name: strwrox
; CHECK: liveins: $x0, $x1, $w2
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2
- ; CHECK: STRWroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (s32) into %ir.addr)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2
+ ; CHECK-NEXT: STRWroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (s32) into %ir.addr)
%0:gpr(p0) = COPY $x0
%1:gpr(s64) = COPY $x1
%ptr:gpr(p0) = G_PTR_ADD %0, %1
@@ -114,10 +123,11 @@ body: |
liveins: $x0, $x1, $s2
; CHECK-LABEL: name: strsrox
; CHECK: liveins: $x0, $x1, $s2
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s2
- ; CHECK: STRSroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (s32) into %ir.addr)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $s2
+ ; CHECK-NEXT: STRSroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (s32) into %ir.addr)
%0:gpr(p0) = COPY $x0
%1:gpr(s64) = COPY $x1
%ptr:gpr(p0) = G_PTR_ADD %0, %1
@@ -136,10 +146,11 @@ body: |
liveins: $x0, $x1, $h0
; CHECK-LABEL: name: strhrox
; CHECK: liveins: $x0, $x1, $h0
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0
- ; CHECK: STRHroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (s16) into %ir.addr)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0
+ ; CHECK-NEXT: STRHroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (s16) into %ir.addr)
%0:gpr(p0) = COPY $x0
%1:gpr(s64) = COPY $x1
%ptr:gpr(p0) = G_PTR_ADD %0, %1
@@ -158,10 +169,11 @@ body: |
liveins: $x0, $x1, $q2
; CHECK-LABEL: name: strqrox
; CHECK: liveins: $x0, $x1, $q2
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q2
- ; CHECK: STRQroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (<2 x s64>) into %ir.addr)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr128 = COPY $q2
+ ; CHECK-NEXT: STRQroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (<2 x s64>) into %ir.addr)
%0:gpr(p0) = COPY $x0
%1:gpr(s64) = COPY $x1
%ptr:gpr(p0) = G_PTR_ADD %0, %1
@@ -169,7 +181,7 @@ body: |
G_STORE %2, %ptr :: (store (<2 x s64>) into %ir.addr)
...
---
-name: shl
+name: shl_fast_3
alignment: 4
legalized: true
regBankSelected: true
@@ -178,12 +190,13 @@ machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $x2
- ; CHECK-LABEL: name: shl
+ ; CHECK-LABEL: name: shl_fast_3
; CHECK: liveins: $x0, $x1, $x2
- ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
- ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
- ; CHECK: STRXroX [[COPY2]], [[COPY1]], [[COPY]], 0, 1 :: (store (s64) into %ir.addr)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: STRXroX [[COPY2]], [[COPY1]], [[COPY]], 0, 1 :: (store (s64) into %ir.addr)
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 3
%2:gpr(s64) = G_SHL %0, %1(s64)
@@ -193,6 +206,114 @@ body: |
G_STORE %4, %ptr :: (store (s64) into %ir.addr)
...
---
+name: shl_slow_1
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: shl_slow_1
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]].sub_32
+ ; CHECK-NEXT: STRHHroX [[COPY3]], [[COPY1]], [[COPY]], 0, 1 :: (store (s16) into %ir.addr)
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 1
+ %2:gpr(s64) = G_SHL %0, %1(s64)
+ %3:gpr(p0) = COPY $x1
+ %ptr:gpr(p0) = G_PTR_ADD %3, %2
+ %4:gpr(s64) = COPY $x2
+ G_STORE %4, %ptr :: (store (s16) into %ir.addr)
+...
+---
+name: shl_slow_1_more_than_one_use
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: shl_slow_1_more_than_one_use
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]].sub_32
+ ; CHECK-NEXT: STRHHroX [[COPY3]], [[COPY1]], [[COPY]], 0, 1 :: (store (s16) into %ir.addr)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY2]].sub_32
+ ; CHECK-NEXT: STRHHroX [[COPY4]], [[COPY1]], [[COPY]], 0, 1 :: (store (s16) into %ir.addr)
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 1
+ %2:gpr(s64) = G_SHL %0, %1(s64)
+ %3:gpr(p0) = COPY $x1
+ %ptr:gpr(p0) = G_PTR_ADD %3, %2
+ %4:gpr(s64) = COPY $x2
+ %5:gpr(s16) = G_TRUNC %4
+ G_STORE %4, %ptr :: (store (s16) into %ir.addr)
+ G_STORE %4, %ptr :: (store (s16) into %ir.addr)
+...
+---
+name: shl_slow_4
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2, $q0
+ ; CHECK-LABEL: name: shl_slow_4
+ ; CHECK: liveins: $x0, $x1, $x2, $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-NEXT: STRQroX [[COPY2]], [[COPY1]], [[COPY]], 0, 1 :: (store (s128) into %ir.addr)
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:gpr(s64) = G_SHL %0, %1(s64)
+ %3:gpr(p0) = COPY $x1
+ %ptr:gpr(p0) = G_PTR_ADD %3, %2
+ %5:fpr(s128) = COPY $q0
+ G_STORE %5, %ptr :: (store (s128) into %ir.addr)
+...
+---
+name: shl_slow_4_more_than_one_use
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2, $q0
+ ; CHECK-LABEL: name: shl_slow_4_more_than_one_use
+ ; CHECK: liveins: $x0, $x1, $x2, $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-NEXT: STRQroX [[COPY2]], [[COPY1]], [[COPY]], 0, 1 :: (store (s128) into %ir.addr)
+ ; CHECK-NEXT: STRQroX [[COPY2]], [[COPY1]], [[COPY]], 0, 1 :: (store (s128) into %ir.addr)
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:gpr(s64) = G_SHL %0, %1(s64)
+ %3:gpr(p0) = COPY $x1
+ %ptr:gpr(p0) = G_PTR_ADD %3, %2
+ %5:fpr(s128) = COPY $q0
+ G_STORE %5, %ptr :: (store (s128) into %ir.addr)
+ G_STORE %5, %ptr :: (store (s128) into %ir.addr)
+...
+---
name: shl_p0
alignment: 4
legalized: true
@@ -204,11 +325,12 @@ body: |
liveins: $x0, $x1, $x2
; CHECK-LABEL: name: shl_p0
; CHECK: liveins: $x0, $x1, $x2
- ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
- ; CHECK: [[COPY2:%[0-9]+]]:gpr64all = COPY $x2
- ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
- ; CHECK: STRXroX [[COPY3]], [[COPY1]], [[COPY]], 0, 1 :: (store (p0) into %ir.addr)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
+ ; CHECK-NEXT: STRXroX [[COPY3]], [[COPY1]], [[COPY]], 0, 1 :: (store (p0) into %ir.addr)
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 3
%2:gpr(s64) = G_SHL %0, %1(s64)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
index 022aaea9ef0cc..3689aa3850e73 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+addr-lsl-slow-14 | FileCheck %s --check-prefixes=CHECK,CHECK0
-; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK3
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+addr-lsl-slow-14 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes=CHECK,CHECK0,CHECK0-GISEL
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+addr-lsl-slow-14 | FileCheck %s --check-prefixes=CHECK,CHECK0,CHECK0-SDAG
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes=CHECK,CHECK3,CHECK3-GISEL
+; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK3,CHECK3-SDAG
%struct.a = type [256 x i16]
%struct.b = type [256 x i32]
@@ -8,36 +10,66 @@
declare void @foo()
define i16 @halfword(ptr %ctx, i32 %xor72) nounwind {
-; CHECK0-LABEL: halfword:
-; CHECK0: // %bb.0:
-; CHECK0-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK0-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK0-NEXT: ubfx x8, x1, #9, #8
-; CHECK0-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK0-NEXT: mov x19, x0
-; CHECK0-NEXT: lsl x21, x8, #1
-; CHECK0-NEXT: ldrh w20, [x0, x21]
-; CHECK0-NEXT: bl foo
-; CHECK0-NEXT: mov w0, w20
-; CHECK0-NEXT: strh w20, [x19, x21]
-; CHECK0-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK0-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
-; CHECK0-NEXT: ret
+; CHECK0-GISEL-LABEL: halfword:
+; CHECK0-GISEL: // %bb.0:
+; CHECK0-GISEL-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK0-GISEL-NEXT: lsr w8, w1, #9
+; CHECK0-GISEL-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK0-GISEL-NEXT: mov x19, x0
+; CHECK0-GISEL-NEXT: and x21, x8, #0xff
+; CHECK0-GISEL-NEXT: ldrh w20, [x0, x21, lsl #1]
+; CHECK0-GISEL-NEXT: bl foo
+; CHECK0-GISEL-NEXT: mov w0, w20
+; CHECK0-GISEL-NEXT: strh w20, [x19, x21, lsl #1]
+; CHECK0-GISEL-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK0-GISEL-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK0-GISEL-NEXT: ret
;
-; CHECK3-LABEL: halfword:
-; CHECK3: // %bb.0:
-; CHECK3-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK3-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK3-NEXT: ubfx x21, x1, #9, #8
-; CHECK3-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK3-NEXT: mov x19, x0
-; CHECK3-NEXT: ldrh w20, [x0, x21, lsl #1]
-; CHECK3-NEXT: bl foo
-; CHECK3-NEXT: mov w0, w20
-; CHECK3-NEXT: strh w20, [x19, x21, lsl #1]
-; CHECK3-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK3-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
-; CHECK3-NEXT: ret
+; CHECK0-SDAG-LABEL: halfword:
+; CHECK0-SDAG: // %bb.0:
+; CHECK0-SDAG-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK0-SDAG-NEXT: ubfx x8, x1, #9, #8
+; CHECK0-SDAG-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK0-SDAG-NEXT: mov x19, x0
+; CHECK0-SDAG-NEXT: lsl x21, x8, #1
+; CHECK0-SDAG-NEXT: ldrh w20, [x0, x21]
+; CHECK0-SDAG-NEXT: bl foo
+; CHECK0-SDAG-NEXT: mov w0, w20
+; CHECK0-SDAG-NEXT: strh w20, [x19, x21]
+; CHECK0-SDAG-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK0-SDAG-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK0-SDAG-NEXT: ret
+;
+; CHECK3-GISEL-LABEL: halfword:
+; CHECK3-GISEL: // %bb.0:
+; CHECK3-GISEL-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK3-GISEL-NEXT: lsr w8, w1, #9
+; CHECK3-GISEL-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK3-GISEL-NEXT: mov x19, x0
+; CHECK3-GISEL-NEXT: and x21, x8, #0xff
+; CHECK3-GISEL-NEXT: ldrh w20, [x0, x21, lsl #1]
+; CHECK3-GISEL-NEXT: bl foo
+; CHECK3-GISEL-NEXT: mov w0, w20
+; CHECK3-GISEL-NEXT: strh w20, [x19, x21, lsl #1]
+; CHECK3-GISEL-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK3-GISEL-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK3-GISEL-NEXT: ret
+;
+; CHECK3-SDAG-LABEL: halfword:
+; CHECK3-SDAG: // %bb.0:
+; CHECK3-SDAG-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK3-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK3-SDAG-NEXT: ubfx x21, x1, #9, #8
+; CHECK3-SDAG-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK3-SDAG-NEXT: mov x19, x0
+; CHECK3-SDAG-NEXT: ldrh w20, [x0, x21, lsl #1]
+; CHECK3-SDAG-NEXT: bl foo
+; CHECK3-SDAG-NEXT: mov w0, w20
+; CHECK3-SDAG-NEXT: strh w20, [x19, x21, lsl #1]
+; CHECK3-SDAG-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK3-SDAG-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK3-SDAG-NEXT: ret
%shr81 = lshr i32 %xor72, 9
%conv82 = zext i32 %shr81 to i64
%idxprom83 = and i64 %conv82, 255
@@ -49,20 +81,65 @@ define i16 @halfword(ptr %ctx, i32 %xor72) nounwind {
}
define i32 @word(ptr %ctx, i32 %xor72) nounwind {
-; CHECK-LABEL: word:
-; CHECK: // %bb.0:
-; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: ubfx x21, x1, #9, #8
-; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: mov x19, x0
-; CHECK-NEXT: ldr w20, [x0, x21, lsl #2]
-; CHECK-NEXT: bl foo
-; CHECK-NEXT: mov w0, w20
-; CHECK-NEXT: str w20, [x19, x21, lsl #2]
-; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK0-GISEL-LABEL: word:
+; CHECK0-GISEL: // %bb.0:
+; CHECK0-GISEL-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK0-GISEL-NEXT: lsr w8, w1, #9
+; CHECK0-GISEL-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK0-GISEL-NEXT: mov x19, x0
+; CHECK0-GISEL-NEXT: and x21, x8, #0xff
+; CHECK0-GISEL-NEXT: ldr w20, [x0, x21, lsl #2]
+; CHECK0-GISEL-NEXT: bl foo
+; CHECK0-GISEL-NEXT: mov w0, w20
+; CHECK0-GISEL-NEXT: str w20, [x19, x21, lsl #2]
+; CHECK0-GISEL-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK0-GISEL-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK0-GISEL-NEXT: ret
+;
+; CHECK0-SDAG-LABEL: word:
+; CHECK0-SDAG: // %bb.0:
+; CHECK0-SDAG-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK0-SDAG-NEXT: ubfx x21, x1, #9, #8
+; CHECK0-SDAG-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK0-SDAG-NEXT: mov x19, x0
+; CHECK0-SDAG-NEXT: ldr w20, [x0, x21, lsl #2]
+; CHECK0-SDAG-NEXT: bl foo
+; CHECK0-SDAG-NEXT: mov w0, w20
+; CHECK0-SDAG-NEXT: str w20, [x19, x21, lsl #2]
+; CHECK0-SDAG-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK0-SDAG-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK0-SDAG-NEXT: ret
+;
+; CHECK3-GISEL-LABEL: word:
+; CHECK3-GISEL: // %bb.0:
+; CHECK3-GISEL-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK3-GISEL-NEXT: lsr w8, w1, #9
+; CHECK3-GISEL-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK3-GISEL-NEXT: mov x19, x0
+; CHECK3-GISEL-NEXT: and x21, x8, #0xff
+; CHECK3-GISEL-NEXT: ldr w20, [x0, x21, lsl #2]
+; CHECK3-GISEL-NEXT: bl foo
+; CHECK3-GISEL-NEXT: mov w0, w20
+; CHECK3-GISEL-NEXT: str w20, [x19, x21, lsl #2]
+; CHECK3-GISEL-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK3-GISEL-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK3-GISEL-NEXT: ret
+;
+; CHECK3-SDAG-LABEL: word:
+; CHECK3-SDAG: // %bb.0:
+; CHECK3-SDAG-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK3-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK3-SDAG-NEXT: ubfx x21, x1, #9, #8
+; CHECK3-SDAG-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK3-SDAG-NEXT: mov x19, x0
+; CHECK3-SDAG-NEXT: ldr w20, [x0, x21, lsl #2]
+; CHECK3-SDAG-NEXT: bl foo
+; CHECK3-SDAG-NEXT: mov w0, w20
+; CHECK3-SDAG-NEXT: str w20, [x19, x21, lsl #2]
+; CHECK3-SDAG-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK3-SDAG-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK3-SDAG-NEXT: ret
%shr81 = lshr i32 %xor72, 9
%conv82 = zext i32 %shr81 to i64
%idxprom83 = and i64 %conv82, 255
@@ -74,20 +151,65 @@ define i32 @word(ptr %ctx, i32 %xor72) nounwind {
}
define i64 @doubleword(ptr %ctx, i32 %xor72) nounwind {
-; CHECK-LABEL: doubleword:
-; CHECK: // %bb.0:
-; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: ubfx x21, x1, #9, #8
-; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: mov x19, x0
-; CHECK-NEXT: ldr x20, [x0, x21, lsl #3]
-; CHECK-NEXT: bl foo
-; CHECK-NEXT: mov x0, x20
-; CHECK-NEXT: str x20, [x19, x21, lsl #3]
-; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK0-GISEL-LABEL: doubleword:
+; CHECK0-GISEL: // %bb.0:
+; CHECK0-GISEL-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK0-GISEL-NEXT: lsr w8, w1, #9
+; CHECK0-GISEL-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK0-GISEL-NEXT: mov x19, x0
+; CHECK0-GISEL-NEXT: and x21, x8, #0xff
+; CHECK0-GISEL-NEXT: ldr x20, [x0, x21, lsl #3]
+; CHECK0-GISEL-NEXT: bl foo
+; CHECK0-GISEL-NEXT: mov x0, x20
+; CHECK0-GISEL-NEXT: str x20, [x19, x21, lsl #3]
+; CHECK0-GISEL-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK0-GISEL-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK0-GISEL-NEXT: ret
+;
+; CHECK0-SDAG-LABEL: doubleword:
+; CHECK0-SDAG: // %bb.0:
+; CHECK0-SDAG-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK0-SDAG-NEXT: ubfx x21, x1, #9, #8
+; CHECK0-SDAG-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK0-SDAG-NEXT: mov x19, x0
+; CHECK0-SDAG-NEXT: ldr x20, [x0, x21, lsl #3]
+; CHECK0-SDAG-NEXT: bl foo
+; CHECK0-SDAG-NEXT: mov x0, x20
+; CHECK0-SDAG-NEXT: str x20, [x19, x21, lsl #3]
+; CHECK0-SDAG-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK0-SDAG-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK0-SDAG-NEXT: ret
+;
+; CHECK3-GISEL-LABEL: doubleword:
+; CHECK3-GISEL: // %bb.0:
+; CHECK3-GISEL-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK3-GISEL-NEXT: lsr w8, w1, #9
+; CHECK3-GISEL-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK3-GISEL-NEXT: mov x19, x0
+; CHECK3-GISEL-NEXT: and x21, x8, #0xff
+; CHECK3-GISEL-NEXT: ldr x20, [x0, x21, lsl #3]
+; CHECK3-GISEL-NEXT: bl foo
+; CHECK3-GISEL-NEXT: mov x0, x20
+; CHECK3-GISEL-NEXT: str x20, [x19, x21, lsl #3]
+; CHECK3-GISEL-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK3-GISEL-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK3-GISEL-NEXT: ret
+;
+; CHECK3-SDAG-LABEL: doubleword:
+; CHECK3-SDAG: // %bb.0:
+; CHECK3-SDAG-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK3-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK3-SDAG-NEXT: ubfx x21, x1, #9, #8
+; CHECK3-SDAG-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK3-SDAG-NEXT: mov x19, x0
+; CHECK3-SDAG-NEXT: ldr x20, [x0, x21, lsl #3]
+; CHECK3-SDAG-NEXT: bl foo
+; CHECK3-SDAG-NEXT: mov x0, x20
+; CHECK3-SDAG-NEXT: str x20, [x19, x21, lsl #3]
+; CHECK3-SDAG-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK3-SDAG-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK3-SDAG-NEXT: ret
%shr81 = lshr i32 %xor72, 9
%conv82 = zext i32 %shr81 to i64
%idxprom83 = and i64 %conv82, 255
@@ -98,17 +220,129 @@ define i64 @doubleword(ptr %ctx, i32 %xor72) nounwind {
ret i64 %result
}
-define i64 @multi_use_non_memory(i64 %a, i64 %b) {
-; CHECK-LABEL: multi_use_non_memory:
+define i16 @multi_use_half_word(ptr %ctx, i32 %xor72) {
+; CHECK0-GISEL-LABEL: multi_use_half_word:
+; CHECK0-GISEL: // %bb.0: // %entry
+; CHECK0-GISEL-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
+; CHECK0-GISEL-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; CHECK0-GISEL-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK0-GISEL-NEXT: .cfi_def_cfa_offset 48
+; CHECK0-GISEL-NEXT: .cfi_offset w19, -8
+; CHECK0-GISEL-NEXT: .cfi_offset w20, -16
+; CHECK0-GISEL-NEXT: .cfi_offset w21, -24
+; CHECK0-GISEL-NEXT: .cfi_offset w22, -32
+; CHECK0-GISEL-NEXT: .cfi_offset w30, -48
+; CHECK0-GISEL-NEXT: lsr w8, w1, #9
+; CHECK0-GISEL-NEXT: mov x19, x0
+; CHECK0-GISEL-NEXT: and x21, x8, #0xff
+; CHECK0-GISEL-NEXT: ldrh w20, [x0, x21, lsl #1]
+; CHECK0-GISEL-NEXT: add w22, w20, #1
+; CHECK0-GISEL-NEXT: bl foo
+; CHECK0-GISEL-NEXT: strh w20, [x19, x21, lsl #1]
+; CHECK0-GISEL-NEXT: mov w0, w20
+; CHECK0-GISEL-NEXT: strh w22, [x19, x21, lsl #1]
+; CHECK0-GISEL-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK0-GISEL-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
+; CHECK0-GISEL-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
+; CHECK0-GISEL-NEXT: ret
+;
+; CHECK0-SDAG-LABEL: multi_use_half_word:
+; CHECK0-SDAG: // %bb.0: // %entry
+; CHECK0-SDAG-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
+; CHECK0-SDAG-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; CHECK0-SDAG-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK0-SDAG-NEXT: .cfi_def_cfa_offset 48
+; CHECK0-SDAG-NEXT: .cfi_offset w19, -8
+; CHECK0-SDAG-NEXT: .cfi_offset w20, -16
+; CHECK0-SDAG-NEXT: .cfi_offset w21, -24
+; CHECK0-SDAG-NEXT: .cfi_offset w22, -32
+; CHECK0-SDAG-NEXT: .cfi_offset w30, -48
+; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK0-SDAG-NEXT: ubfx x8, x1, #9, #8
+; CHECK0-SDAG-NEXT: mov x19, x0
+; CHECK0-SDAG-NEXT: lsl x21, x8, #1
+; CHECK0-SDAG-NEXT: ldrh w20, [x0, x21]
+; CHECK0-SDAG-NEXT: add w22, w20, #1
+; CHECK0-SDAG-NEXT: bl foo
+; CHECK0-SDAG-NEXT: mov w0, w20
+; CHECK0-SDAG-NEXT: strh w22, [x19, x21]
+; CHECK0-SDAG-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK0-SDAG-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
+; CHECK0-SDAG-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
+; CHECK0-SDAG-NEXT: ret
+;
+; CHECK3-GISEL-LABEL: multi_use_half_word:
+; CHECK3-GISEL: // %bb.0: // %entry
+; CHECK3-GISEL-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
+; CHECK3-GISEL-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; CHECK3-GISEL-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK3-GISEL-NEXT: .cfi_def_cfa_offset 48
+; CHECK3-GISEL-NEXT: .cfi_offset w19, -8
+; CHECK3-GISEL-NEXT: .cfi_offset w20, -16
+; CHECK3-GISEL-NEXT: .cfi_offset w21, -24
+; CHECK3-GISEL-NEXT: .cfi_offset w22, -32
+; CHECK3-GISEL-NEXT: .cfi_offset w30, -48
+; CHECK3-GISEL-NEXT: lsr w8, w1, #9
+; CHECK3-GISEL-NEXT: mov x19, x0
+; CHECK3-GISEL-NEXT: and x21, x8, #0xff
+; CHECK3-GISEL-NEXT: ldrh w20, [x0, x21, lsl #1]
+; CHECK3-GISEL-NEXT: add w22, w20, #1
+; CHECK3-GISEL-NEXT: bl foo
+; CHECK3-GISEL-NEXT: strh w20, [x19, x21, lsl #1]
+; CHECK3-GISEL-NEXT: mov w0, w20
+; CHECK3-GISEL-NEXT: strh w22, [x19, x21, lsl #1]
+; CHECK3-GISEL-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK3-GISEL-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
+; CHECK3-GISEL-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
+; CHECK3-GISEL-NEXT: ret
+;
+; CHECK3-SDAG-LABEL: multi_use_half_word:
+; CHECK3-SDAG: // %bb.0: // %entry
+; CHECK3-SDAG-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
+; CHECK3-SDAG-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; CHECK3-SDAG-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK3-SDAG-NEXT: .cfi_def_cfa_offset 48
+; CHECK3-SDAG-NEXT: .cfi_offset w19, -8
+; CHECK3-SDAG-NEXT: .cfi_offset w20, -16
+; CHECK3-SDAG-NEXT: .cfi_offset w21, -24
+; CHECK3-SDAG-NEXT: .cfi_offset w22, -32
+; CHECK3-SDAG-NEXT: .cfi_offset w30, -48
+; CHECK3-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK3-SDAG-NEXT: ubfx x21, x1, #9, #8
+; CHECK3-SDAG-NEXT: mov x19, x0
+; CHECK3-SDAG-NEXT: ldrh w20, [x0, x21, lsl #1]
+; CHECK3-SDAG-NEXT: add w22, w20, #1
+; CHECK3-SDAG-NEXT: bl foo
+; CHECK3-SDAG-NEXT: mov w0, w20
+; CHECK3-SDAG-NEXT: strh w22, [x19, x21, lsl #1]
+; CHECK3-SDAG-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK3-SDAG-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
+; CHECK3-SDAG-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
+; CHECK3-SDAG-NEXT: ret
+entry:
+ %shr81 = lshr i32 %xor72, 9
+ %conv82 = zext i32 %shr81 to i64
+ %idxprom83 = and i64 %conv82, 255
+ %arrayidx86 = getelementptr inbounds %struct.a, ptr %ctx, i64 0, i64 %idxprom83
+ %result = load i16, ptr %arrayidx86, align 2
+ %result2 = add i16 %result, 1
+ call void @foo()
+ store i16 %result, ptr %arrayidx86, align 2
+ store i16 %result2, ptr %arrayidx86, align 2
+ ret i16 %result
+}
+
+define i64 @multi_use_non_memory_call(i64 %a, i64 %b) {
+; CHECK-LABEL: multi_use_non_memory_call:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: lsl x8, x0, #3
; CHECK-NEXT: lsl x9, x1, #3
; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: b.lt .LBB3_2
+; CHECK-NEXT: b.lt .LBB4_2
; CHECK-NEXT: // %bb.1: // %falsebb
; CHECK-NEXT: csel x0, x8, x9, gt
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB3_2: // %truebb
+; CHECK-NEXT: .LBB4_2: // %truebb
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
@@ -144,12 +378,43 @@ define i64 @gep3(ptr %p, i64 %b) {
}
define i128 @gep4(ptr %p, i128 %a, i64 %b) {
-; CHECK-LABEL: gep4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, x4, lsl #4
-; CHECK-NEXT: ldp x0, x1, [x8]
-; CHECK-NEXT: stp x2, x3, [x8]
-; CHECK-NEXT: ret
+; CHECK0-GISEL-LABEL: gep4:
+; CHECK0-GISEL: // %bb.0:
+; CHECK0-GISEL-NEXT: ldr q1, [x0, x4, lsl #4]
+; CHECK0-GISEL-NEXT: mov v0.d[0], x2
+; CHECK0-GISEL-NEXT: mov x8, x0
+; CHECK0-GISEL-NEXT: mov d2, v1.d[1]
+; CHECK0-GISEL-NEXT: fmov x0, d1
+; CHECK0-GISEL-NEXT: mov v0.d[1], x3
+; CHECK0-GISEL-NEXT: fmov x1, d2
+; CHECK0-GISEL-NEXT: str q0, [x8, x4, lsl #4]
+; CHECK0-GISEL-NEXT: ret
+;
+; CHECK0-SDAG-LABEL: gep4:
+; CHECK0-SDAG: // %bb.0:
+; CHECK0-SDAG-NEXT: add x8, x0, x4, lsl #4
+; CHECK0-SDAG-NEXT: ldp x0, x1, [x8]
+; CHECK0-SDAG-NEXT: stp x2, x3, [x8]
+; CHECK0-SDAG-NEXT: ret
+;
+; CHECK3-GISEL-LABEL: gep4:
+; CHECK3-GISEL: // %bb.0:
+; CHECK3-GISEL-NEXT: ldr q1, [x0, x4, lsl #4]
+; CHECK3-GISEL-NEXT: mov v0.d[0], x2
+; CHECK3-GISEL-NEXT: mov x8, x0
+; CHECK3-GISEL-NEXT: mov d2, v1.d[1]
+; CHECK3-GISEL-NEXT: fmov x0, d1
+; CHECK3-GISEL-NEXT: mov v0.d[1], x3
+; CHECK3-GISEL-NEXT: fmov x1, d2
+; CHECK3-GISEL-NEXT: str q0, [x8, x4, lsl #4]
+; CHECK3-GISEL-NEXT: ret
+;
+; CHECK3-SDAG-LABEL: gep4:
+; CHECK3-SDAG: // %bb.0:
+; CHECK3-SDAG-NEXT: add x8, x0, x4, lsl #4
+; CHECK3-SDAG-NEXT: ldp x0, x1, [x8]
+; CHECK3-SDAG-NEXT: stp x2, x3, [x8]
+; CHECK3-SDAG-NEXT: ret
%g = getelementptr inbounds i128, ptr %p, i64 %b
%l = load i128, ptr %g
store i128 %a, ptr %g
@@ -185,3 +450,6 @@ define i64 @addlsl4(i64 %a, i64 %b) {
%r = xor i64 %y, %z
ret i64 %r
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK0: {{.*}}
+; CHECK3: {{.*}}
>From 93edeea8cb187e17e8491f6ef6b71f84a262774c Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Mon, 1 Jul 2024 10:52:25 +0100
Subject: [PATCH 2/2] [AArch64][GISel] Always fold G_SHL into addressing mode
where possible, unless the subtarget has addr-lsl-slow-14
---
.../GISel/AArch64InstructionSelector.cpp | 76 +++++++--
.../GlobalISel/load-addressing-modes.mir | 156 ++++++++++++------
.../GlobalISel/store-addressing-modes.mir | 58 +++++--
.../CodeGen/AArch64/aarch64-fold-lslfast.ll | 49 +++---
4 files changed, 231 insertions(+), 108 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 0357a7206c478..bc7a16ef56d5e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -412,8 +412,13 @@ class AArch64InstructionSelector : public InstructionSelector {
return selectAddrModeIndexed(Root, Width / 8);
}
+ std::optional<bool>
+ isWorthFoldingIntoAddrMode(MachineInstr &MI,
+ const MachineRegisterInfo &MRI) const;
+
bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
- const MachineRegisterInfo &MRI) const;
+ const MachineRegisterInfo &MRI,
+ bool IsAddrOperand) const;
ComplexRendererFns
selectAddrModeShiftedExtendXReg(MachineOperand &Root,
unsigned SizeInBytes) const;
@@ -6717,19 +6722,70 @@ AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
return select12BitValueWithLeftShift(Immed);
}
+/// Checks if we are sure that folding MI into load/store addressing mode is
+/// beneficial or not.
+///
+/// Returns:
+/// - true if folding MI would be beneficial.
+/// - false if folding MI would be bad.
+/// - std::nullopt if it is not sure whether folding MI is beneficial.
+///
+/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
+///
+/// %13:gpr(s64) = G_CONSTANT i64 1
+/// %8:gpr(s64) = G_SHL %6, %13(s64)
+/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
+/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
+std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
+ MachineInstr &MI, const MachineRegisterInfo &MRI) const {
+ if (MI.getOpcode() == AArch64::G_SHL) {
+ // Address operands with shifts are free, except for running on sub targets
+ // with AddrLSLSlow14.
+ if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
+ MI.getOperand(2).getReg(), MRI)) {
+ const APInt ShiftVal = ValAndVeg->Value;
+
+ // Don't fold if we know this will be slow.
+ return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
+ }
+ }
+ return std::nullopt;
+}
+
/// Return true if it is worth folding MI into an extended register. That is,
/// if it's safe to pull it into the addressing mode of a load or store as a
/// shift.
+/// \p IsAddrOperand whether the def of MI is used as an address operand
+/// (e.g. feeding into an LDR/STR).
bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
- MachineInstr &MI, const MachineRegisterInfo &MRI) const {
+ MachineInstr &MI, const MachineRegisterInfo &MRI,
+ bool IsAddrOperand) const {
+
// Always fold if there is one use, or if we're optimizing for size.
Register DefReg = MI.getOperand(0).getReg();
if (MRI.hasOneNonDBGUse(DefReg) ||
MI.getParent()->getParent()->getFunction().hasOptSize())
return true;
- // FIXME: Consider checking HasAddrLSLSlow14 and HasALULSLFast as
- // appropriate.
+ if (IsAddrOperand) {
+ // If we are already sure that folding MI is good or bad, return the result.
+ if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
+ return *Worth;
+
+ // Fold G_PTR_ADD if its offset operand can be folded
+ if (MI.getOpcode() == AArch64::G_PTR_ADD) {
+ MachineInstr *OffsetInst =
+ getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
+
+ // Note, we already know G_PTR_ADD is used by at least two instructions.
+ // If we are also sure about whether folding is beneficial or not,
+ // return the result.
+ if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
+ return *Worth;
+ }
+ }
+
+ // FIXME: Consider checking HasALULSLFast as appropriate.
// We have a fastpath, so folding a shift in and potentially computing it
// many times may be beneficial. Check if this is only used in memory ops.
@@ -6777,7 +6833,7 @@ AArch64InstructionSelector::selectExtendedSHL(
int64_t LegalShiftVal = Log2_32(SizeInBytes);
if (LegalShiftVal == 0)
return std::nullopt;
- if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
+ if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
return std::nullopt;
// Now, try to find the specific G_CONSTANT. Start by assuming that the
@@ -6884,7 +6940,7 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
// Check if we can find the G_PTR_ADD.
MachineInstr *PtrAdd =
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
- if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
+ if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
return std::nullopt;
// Now, try to match an opcode which will match our specific offset.
@@ -7018,7 +7074,7 @@ AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
MachineInstr *PtrAdd =
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
- if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
+ if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
return std::nullopt;
MachineOperand &LHS = PtrAdd->getOperand(1);
@@ -7049,7 +7105,7 @@ AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
//
// e.g.
// ldr something, [base_reg, ext_reg, sxtw]
- if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
+ if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
return std::nullopt;
// Check if this is an extend. We'll get an extend type if it is.
@@ -7244,7 +7300,7 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
return std::nullopt;
if (ShType == AArch64_AM::ROR && !AllowROR)
return std::nullopt;
- if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
+ if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
return std::nullopt;
// Need an immediate on the RHS.
@@ -7358,7 +7414,7 @@ AArch64InstructionSelector::selectArithExtendedRegister(
if (!RootDef)
return std::nullopt;
- if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
+ if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
return std::nullopt;
// Check if we can fold a shift and an extend.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
index 3af2aaf57eed8..dc2e1c5dc28d4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
@@ -535,13 +535,13 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
- ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load (s64) from %ir.addr)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[UBFMXri]]
+ ; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr)
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0
- ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[ADDXri]]
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
- ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[ADDXrr1]]
+ ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[ADDXri]]
+ ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[ADDXrr1]]
; CHECK-NEXT: $x2 = COPY [[ADDXrr2]]
; CHECK-NEXT: RET_ReallyLR implicit $x2
%0:gpr(s64) = COPY $x0
@@ -571,19 +571,36 @@ body: |
liveins: $x0, $x1, $x2
liveins: $w1, $x0
- ; CHECK-LABEL: name: ldrhrox_more_than_one_mem_use_shl
- ; CHECK: liveins: $x0, $x1, $x2, $w1, $x0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
- ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
- ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
- ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
- ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
- ; CHECK-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
- ; CHECK-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
- ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
- ; CHECK-NEXT: RET_ReallyLR implicit [[ADDWrr]]
+ ; CHECK-FAST-LABEL: name: ldrhrox_more_than_one_mem_use_shl
+ ; CHECK-FAST: liveins: $x0, $x1, $x2, $w1, $x0
+ ; CHECK-FAST-NEXT: {{ $}}
+ ; CHECK-FAST-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-FAST-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-FAST-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
+ ; CHECK-FAST-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
+ ; CHECK-FAST-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+ ; CHECK-FAST-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
+ ; CHECK-FAST-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
+ ; CHECK-FAST-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
+ ; CHECK-FAST-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
+ ; CHECK-FAST-NEXT: RET_ReallyLR implicit [[ADDWrr]]
+ ;
+ ; CHECK-SLOW-LABEL: name: ldrhrox_more_than_one_mem_use_shl
+ ; CHECK-SLOW: liveins: $x0, $x1, $x2, $w1, $x0
+ ; CHECK-SLOW-NEXT: {{ $}}
+ ; CHECK-SLOW-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-SLOW-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-SLOW-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
+ ; CHECK-SLOW-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
+ ; CHECK-SLOW-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+ ; CHECK-SLOW-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]].sub_32
+ ; CHECK-SLOW-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
+ ; CHECK-SLOW-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY [[COPY]]
+ ; CHECK-SLOW-NEXT: [[ADDXrx:%[0-9]+]]:gpr64sp = ADDXrx [[COPY4]], [[COPY3]], 1
+ ; CHECK-SLOW-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
+ ; CHECK-SLOW-NEXT: [[LDRHHui1:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
+ ; CHECK-SLOW-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHui]], [[LDRHHui1]]
+ ; CHECK-SLOW-NEXT: RET_ReallyLR implicit [[ADDWrr]]
%0:gpr(p0) = COPY $x0
%1:gpr(s32) = COPY $w1
%15:gpr(s64) = G_CONSTANT i64 9
@@ -612,19 +629,36 @@ body: |
liveins: $x0, $x1, $x2
liveins: $w1, $x0
- ; CHECK-LABEL: name: ldrhrox_more_than_one_use_shl
- ; CHECK: liveins: $x0, $x1, $x2, $w1, $x0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
- ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
- ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
- ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
- ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
- ; CHECK-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
- ; CHECK-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
- ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
- ; CHECK-NEXT: RET_ReallyLR implicit [[ADDWrr]]
+ ; CHECK-FAST-LABEL: name: ldrhrox_more_than_one_use_shl
+ ; CHECK-FAST: liveins: $x0, $x1, $x2, $w1, $x0
+ ; CHECK-FAST-NEXT: {{ $}}
+ ; CHECK-FAST-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-FAST-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-FAST-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
+ ; CHECK-FAST-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
+ ; CHECK-FAST-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+ ; CHECK-FAST-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[SUBREG_TO_REG]], 4103
+ ; CHECK-FAST-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
+ ; CHECK-FAST-NEXT: [[LDRHHroX1:%[0-9]+]]:gpr32 = LDRHHroX [[COPY]], [[ANDXri]], 0, 1 :: (load (s16))
+ ; CHECK-FAST-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHroX]], [[LDRHHroX1]]
+ ; CHECK-FAST-NEXT: RET_ReallyLR implicit [[ADDWrr]]
+ ;
+ ; CHECK-SLOW-LABEL: name: ldrhrox_more_than_one_use_shl
+ ; CHECK-SLOW: liveins: $x0, $x1, $x2, $w1, $x0
+ ; CHECK-SLOW-NEXT: {{ $}}
+ ; CHECK-SLOW-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-SLOW-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-SLOW-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 9, 31
+ ; CHECK-SLOW-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[UBFMWri]], 0
+ ; CHECK-SLOW-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+ ; CHECK-SLOW-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]].sub_32
+ ; CHECK-SLOW-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
+ ; CHECK-SLOW-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY [[COPY]]
+ ; CHECK-SLOW-NEXT: [[ADDXrx:%[0-9]+]]:gpr64sp = ADDXrx [[COPY4]], [[COPY3]], 1
+ ; CHECK-SLOW-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
+ ; CHECK-SLOW-NEXT: [[LDRHHui1:%[0-9]+]]:gpr32 = LDRHHui [[ADDXrx]], 0 :: (load (s16))
+ ; CHECK-SLOW-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRHHui]], [[LDRHHui1]]
+ ; CHECK-SLOW-NEXT: RET_ReallyLR implicit [[ADDWrr]]
%0:gpr(p0) = COPY $x0
%1:gpr(s32) = COPY $w1
%15:gpr(s64) = G_CONSTANT i64 9
@@ -656,15 +690,15 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 62, 61
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
- ; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[ADDXrr]], 0 :: (load (s32) from %ir.addr)
- ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[LDRWui]], 0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[UBFMXri]]
+ ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY1]], [[COPY]], 0, 1 :: (load (s32) from %ir.addr)
+ ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[LDRWroX]], 0
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 2, 0
; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[SUBREG_TO_REG]], [[ADDXri]]
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
- ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[ADDXrr1]]
+ ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[ADDXrr1]]
; CHECK-NEXT: $x2 = COPY [[ADDXrr2]]
; CHECK-NEXT: RET_ReallyLR implicit $x2
%0:gpr(s64) = COPY $x0
@@ -692,21 +726,37 @@ machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $x2
- ; CHECK-LABEL: name: ldrqrox_more_than_one_use_shl
- ; CHECK: liveins: $x0, $x1, $x2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
- ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 60, 59
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
- ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADDXrr]], 0 :: (load (s128) from %ir.addr)
- ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 4, 0
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
- ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY3]], [[ADDXri]]
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
- ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY4]], [[ADDXrr1]]
- ; CHECK-NEXT: RET_ReallyLR implicit [[ADDXrr2]]
+ ; CHECK-FAST-LABEL: name: ldrqrox_more_than_one_use_shl
+ ; CHECK-FAST: liveins: $x0, $x1, $x2
+ ; CHECK-FAST-NEXT: {{ $}}
+ ; CHECK-FAST-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-FAST-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 60, 59
+ ; CHECK-FAST-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-FAST-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]]
+ ; CHECK-FAST-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[UBFMXri]]
+ ; CHECK-FAST-NEXT: [[LDRQroX:%[0-9]+]]:fpr128 = LDRQroX [[COPY1]], [[COPY]], 0, 1 :: (load (s128) from %ir.addr)
+ ; CHECK-FAST-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 4, 0
+ ; CHECK-FAST-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[LDRQroX]].dsub
+ ; CHECK-FAST-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY [[COPY3]]
+ ; CHECK-FAST-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY4]], [[ADDXri]]
+ ; CHECK-FAST-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[ADDXrr1]]
+ ; CHECK-FAST-NEXT: RET_ReallyLR implicit [[ADDXrr2]]
+ ;
+ ; CHECK-SLOW-LABEL: name: ldrqrox_more_than_one_use_shl
+ ; CHECK-SLOW: liveins: $x0, $x1, $x2
+ ; CHECK-SLOW-NEXT: {{ $}}
+ ; CHECK-SLOW-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-SLOW-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 60, 59
+ ; CHECK-SLOW-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-SLOW-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]]
+ ; CHECK-SLOW-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADDXrr]], 0 :: (load (s128) from %ir.addr)
+ ; CHECK-SLOW-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 4, 0
+ ; CHECK-SLOW-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
+ ; CHECK-SLOW-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]]
+ ; CHECK-SLOW-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY3]], [[ADDXri]]
+ ; CHECK-SLOW-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
+ ; CHECK-SLOW-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY4]], [[ADDXrr1]]
+ ; CHECK-SLOW-NEXT: RET_ReallyLR implicit [[ADDXrr2]]
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 4
%2:gpr(s64) = G_SHL %0, %1(s64)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/store-addressing-modes.mir
index 10f611cf8bb1c..64ac2e9bcba6c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/store-addressing-modes.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-addressing-modes.mir
@@ -241,16 +241,28 @@ machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $x2
- ; CHECK-LABEL: name: shl_slow_1_more_than_one_use
- ; CHECK: liveins: $x0, $x1, $x2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]].sub_32
- ; CHECK-NEXT: STRHHroX [[COPY3]], [[COPY1]], [[COPY]], 0, 1 :: (store (s16) into %ir.addr)
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY2]].sub_32
- ; CHECK-NEXT: STRHHroX [[COPY4]], [[COPY1]], [[COPY]], 0, 1 :: (store (s16) into %ir.addr)
+ ; CHECK-FAST-LABEL: name: shl_slow_1_more_than_one_use
+ ; CHECK-FAST: liveins: $x0, $x1, $x2
+ ; CHECK-FAST-NEXT: {{ $}}
+ ; CHECK-FAST-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-FAST-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK-FAST-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-FAST-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]].sub_32
+ ; CHECK-FAST-NEXT: STRHHroX [[COPY3]], [[COPY1]], [[COPY]], 0, 1 :: (store (s16) into %ir.addr)
+ ; CHECK-FAST-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY2]].sub_32
+ ; CHECK-FAST-NEXT: STRHHroX [[COPY4]], [[COPY1]], [[COPY]], 0, 1 :: (store (s16) into %ir.addr)
+ ;
+ ; CHECK-SLOW-LABEL: name: shl_slow_1_more_than_one_use
+ ; CHECK-SLOW: liveins: $x0, $x1, $x2
+ ; CHECK-SLOW-NEXT: {{ $}}
+ ; CHECK-SLOW-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-SLOW-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-SLOW-NEXT: %ptr:gpr64common = ADDXrs [[COPY1]], [[COPY]], 1
+ ; CHECK-SLOW-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-SLOW-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]].sub_32
+ ; CHECK-SLOW-NEXT: STRHHui [[COPY3]], %ptr, 0 :: (store (s16) into %ir.addr)
+ ; CHECK-SLOW-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY2]].sub_32
+ ; CHECK-SLOW-NEXT: STRHHui [[COPY4]], %ptr, 0 :: (store (s16) into %ir.addr)
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 1
%2:gpr(s64) = G_SHL %0, %1(s64)
@@ -296,14 +308,24 @@ machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $x2, $q0
- ; CHECK-LABEL: name: shl_slow_4_more_than_one_use
- ; CHECK: liveins: $x0, $x1, $x2, $q0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0
- ; CHECK-NEXT: STRQroX [[COPY2]], [[COPY1]], [[COPY]], 0, 1 :: (store (s128) into %ir.addr)
- ; CHECK-NEXT: STRQroX [[COPY2]], [[COPY1]], [[COPY]], 0, 1 :: (store (s128) into %ir.addr)
+ ; CHECK-FAST-LABEL: name: shl_slow_4_more_than_one_use
+ ; CHECK-FAST: liveins: $x0, $x1, $x2, $q0
+ ; CHECK-FAST-NEXT: {{ $}}
+ ; CHECK-FAST-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-FAST-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK-FAST-NEXT: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-FAST-NEXT: STRQroX [[COPY2]], [[COPY1]], [[COPY]], 0, 1 :: (store (s128) into %ir.addr)
+ ; CHECK-FAST-NEXT: STRQroX [[COPY2]], [[COPY1]], [[COPY]], 0, 1 :: (store (s128) into %ir.addr)
+ ;
+ ; CHECK-SLOW-LABEL: name: shl_slow_4_more_than_one_use
+ ; CHECK-SLOW: liveins: $x0, $x1, $x2, $q0
+ ; CHECK-SLOW-NEXT: {{ $}}
+ ; CHECK-SLOW-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-SLOW-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-SLOW-NEXT: %ptr:gpr64common = ADDXrs [[COPY1]], [[COPY]], 4
+ ; CHECK-SLOW-NEXT: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-SLOW-NEXT: STRQui [[COPY2]], %ptr, 0 :: (store (s128) into %ir.addr)
+ ; CHECK-SLOW-NEXT: STRQui [[COPY2]], %ptr, 0 :: (store (s128) into %ir.addr)
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 4
%2:gpr(s64) = G_SHL %0, %1(s64)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
index 3689aa3850e73..4ecbebd989ad3 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
@@ -12,17 +12,16 @@ declare void @foo()
define i16 @halfword(ptr %ctx, i32 %xor72) nounwind {
; CHECK0-GISEL-LABEL: halfword:
; CHECK0-GISEL: // %bb.0:
-; CHECK0-GISEL-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK0-GISEL-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
; CHECK0-GISEL-NEXT: lsr w8, w1, #9
; CHECK0-GISEL-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK0-GISEL-NEXT: mov x19, x0
-; CHECK0-GISEL-NEXT: and x21, x8, #0xff
-; CHECK0-GISEL-NEXT: ldrh w20, [x0, x21, lsl #1]
+; CHECK0-GISEL-NEXT: add x20, x0, w8, uxtb #1
+; CHECK0-GISEL-NEXT: ldrh w19, [x20]
; CHECK0-GISEL-NEXT: bl foo
-; CHECK0-GISEL-NEXT: mov w0, w20
-; CHECK0-GISEL-NEXT: strh w20, [x19, x21, lsl #1]
+; CHECK0-GISEL-NEXT: mov w0, w19
+; CHECK0-GISEL-NEXT: strh w19, [x20]
; CHECK0-GISEL-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK0-GISEL-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK0-GISEL-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
; CHECK0-GISEL-NEXT: ret
;
; CHECK0-SDAG-LABEL: halfword:
@@ -223,27 +222,23 @@ define i64 @doubleword(ptr %ctx, i32 %xor72) nounwind {
define i16 @multi_use_half_word(ptr %ctx, i32 %xor72) {
; CHECK0-GISEL-LABEL: multi_use_half_word:
; CHECK0-GISEL: // %bb.0: // %entry
-; CHECK0-GISEL-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
-; CHECK0-GISEL-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; CHECK0-GISEL-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK0-GISEL-NEXT: .cfi_def_cfa_offset 48
+; CHECK0-GISEL-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK0-GISEL-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK0-GISEL-NEXT: .cfi_def_cfa_offset 32
; CHECK0-GISEL-NEXT: .cfi_offset w19, -8
; CHECK0-GISEL-NEXT: .cfi_offset w20, -16
; CHECK0-GISEL-NEXT: .cfi_offset w21, -24
-; CHECK0-GISEL-NEXT: .cfi_offset w22, -32
-; CHECK0-GISEL-NEXT: .cfi_offset w30, -48
+; CHECK0-GISEL-NEXT: .cfi_offset w30, -32
; CHECK0-GISEL-NEXT: lsr w8, w1, #9
-; CHECK0-GISEL-NEXT: mov x19, x0
-; CHECK0-GISEL-NEXT: and x21, x8, #0xff
-; CHECK0-GISEL-NEXT: ldrh w20, [x0, x21, lsl #1]
-; CHECK0-GISEL-NEXT: add w22, w20, #1
+; CHECK0-GISEL-NEXT: add x20, x0, w8, uxtb #1
+; CHECK0-GISEL-NEXT: ldrh w19, [x20]
+; CHECK0-GISEL-NEXT: add w21, w19, #1
; CHECK0-GISEL-NEXT: bl foo
-; CHECK0-GISEL-NEXT: strh w20, [x19, x21, lsl #1]
-; CHECK0-GISEL-NEXT: mov w0, w20
-; CHECK0-GISEL-NEXT: strh w22, [x19, x21, lsl #1]
-; CHECK0-GISEL-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK0-GISEL-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; CHECK0-GISEL-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
+; CHECK0-GISEL-NEXT: strh w19, [x20]
+; CHECK0-GISEL-NEXT: mov w0, w19
+; CHECK0-GISEL-NEXT: strh w21, [x20]
+; CHECK0-GISEL-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK0-GISEL-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; CHECK0-GISEL-NEXT: ret
;
; CHECK0-SDAG-LABEL: multi_use_half_word:
@@ -380,14 +375,14 @@ define i64 @gep3(ptr %p, i64 %b) {
define i128 @gep4(ptr %p, i128 %a, i64 %b) {
; CHECK0-GISEL-LABEL: gep4:
; CHECK0-GISEL: // %bb.0:
-; CHECK0-GISEL-NEXT: ldr q1, [x0, x4, lsl #4]
+; CHECK0-GISEL-NEXT: add x8, x0, x4, lsl #4
; CHECK0-GISEL-NEXT: mov v0.d[0], x2
-; CHECK0-GISEL-NEXT: mov x8, x0
+; CHECK0-GISEL-NEXT: ldr q1, [x8]
; CHECK0-GISEL-NEXT: mov d2, v1.d[1]
-; CHECK0-GISEL-NEXT: fmov x0, d1
; CHECK0-GISEL-NEXT: mov v0.d[1], x3
+; CHECK0-GISEL-NEXT: fmov x0, d1
; CHECK0-GISEL-NEXT: fmov x1, d2
-; CHECK0-GISEL-NEXT: str q0, [x8, x4, lsl #4]
+; CHECK0-GISEL-NEXT: str q0, [x8]
; CHECK0-GISEL-NEXT: ret
;
; CHECK0-SDAG-LABEL: gep4:
More information about the llvm-commits
mailing list