[llvm] [AArch64] Preprocess bitcast(load) (PR #160035)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 21 23:22:26 PDT 2025
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/160035
None
>From 78cf616629af0edde9cb932dbe9a2670d124620f Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 16 Sep 2025 19:18:46 +0100
Subject: [PATCH 1/2] [AArch64] Add patterns for extending loads bitcast to an
vector.
Similar to <>, we can convert a extending load + bitcast to vector to a b/h/s
register load, tha also zeros the top parts of the register.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 29 +++-
llvm/test/CodeGen/AArch64/aarch64-addv.ll | 9 +-
llvm/test/CodeGen/AArch64/bitcast-extend.ll | 144 +++++++++++---------
llvm/test/CodeGen/AArch64/dp1.ll | 6 +-
4 files changed, 108 insertions(+), 80 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 6cea453f271be..c1570ba5be7d8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4026,6 +4026,27 @@ def : Pat <(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s
def : Pat <(f32 (bitconvert (i32 (zextloadi8 (am_indexed16 GPR64sp:$Rn, uimm12s1:$offset))))),
(SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+// Similar to the patterns above we can turn a bitcast zextload to a vector type into a FPR load.
+multiclass BitcastLoad<ValueType VT> {
+ def : Pat <(VT (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
+ def : Pat <(VT (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+ def : Pat <(VT (bitconvert (i64 (zextloadi8 (am_indexed32 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+}
+let Predicates = [IsLE] in {
+ defm : BitcastLoad<v8i8>;
+ defm : BitcastLoad<v4i16>;
+ defm : BitcastLoad<v2i32>;
+ defm : BitcastLoad<v1i64>;
+ defm : BitcastLoad<v4f16>;
+ defm : BitcastLoad<v4bf16>;
+ defm : BitcastLoad<v2f32>;
+ defm : BitcastLoad<v1f64>;
+}
+
+
// Pre-fetch.
def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
[(AArch64Prefetch timm:$Rt,
@@ -4172,13 +4193,13 @@ def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
- (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+ (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
- (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+ (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
- (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+ (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
- (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+ (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
//---
diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index d9180a28bd40b..4c168e3306500 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -501,8 +501,7 @@ define i16 @addv_zero_lanes_v4i16(ptr %arr) {
;
; CHECK-GI-LABEL: addv_zero_lanes_v4i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldrb w8, [x0]
-; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ldr b0, [x0]
; CHECK-GI-NEXT: addv h0, v0.4h
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
@@ -521,8 +520,7 @@ define i8 @addv_zero_lanes_v8i8(ptr %arr) {
;
; CHECK-GI-LABEL: addv_zero_lanes_v8i8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldrb w8, [x0]
-; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ldr b0, [x0]
; CHECK-GI-NEXT: addv b0, v0.8b
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
@@ -633,8 +631,7 @@ define i32 @addv_zero_lanes_v2i32(ptr %arr) {
;
; CHECK-GI-LABEL: addv_zero_lanes_v2i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ldr s0, [x0]
; CHECK-GI-NEXT: addp v0.2s, v0.2s, v0.2s
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
index 741dcf3ad4c2f..1ecc945eb107c 100644
--- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
@@ -291,8 +291,7 @@ define void @extractbitcastext_s(i32 %bytes, ptr %output) {
define <8 x i8> @load_zext_i8_v8i8(ptr %p) {
; CHECK-LABEL: load_zext_i8_v8i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrb w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ret
%l = load i8, ptr %p
%z = zext i8 %l to i64
@@ -303,8 +302,7 @@ define <8 x i8> @load_zext_i8_v8i8(ptr %p) {
define <8 x i8> @load_zext_i16_v8i8(ptr %p) {
; CHECK-LABEL: load_zext_i16_v8i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load i16, ptr %p
%z = zext i16 %l to i64
@@ -315,8 +313,7 @@ define <8 x i8> @load_zext_i16_v8i8(ptr %p) {
define <8 x i8> @load_zext_i32_v8i8(ptr %p) {
; CHECK-LABEL: load_zext_i32_v8i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ret
%l = load i32, ptr %p
%z = zext i32 %l to i64
@@ -360,8 +357,7 @@ define <16 x i8> @load_zext_v16i8(ptr %p) {
define <4 x i16> @load_zext_i8_v4i16(ptr %p) {
; CHECK-LABEL: load_zext_i8_v4i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrb w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ret
%l = load i8, ptr %p
%z = zext i8 %l to i64
@@ -372,8 +368,7 @@ define <4 x i16> @load_zext_i8_v4i16(ptr %p) {
define <4 x i16> @load_zext_i16_v4i16(ptr %p) {
; CHECK-LABEL: load_zext_i16_v4i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load i16, ptr %p
%z = zext i16 %l to i64
@@ -384,8 +379,7 @@ define <4 x i16> @load_zext_i16_v4i16(ptr %p) {
define <4 x i16> @load_zext_i32_v4i16(ptr %p) {
; CHECK-LABEL: load_zext_i32_v4i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ret
%l = load i32, ptr %p
%z = zext i32 %l to i64
@@ -396,8 +390,7 @@ define <4 x i16> @load_zext_i32_v4i16(ptr %p) {
define <2 x i32> @load_zext_i8_v2i32(ptr %p) {
; CHECK-LABEL: load_zext_i8_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrb w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ret
%l = load i8, ptr %p
%z = zext i8 %l to i64
@@ -408,8 +401,7 @@ define <2 x i32> @load_zext_i8_v2i32(ptr %p) {
define <2 x i32> @load_zext_i16_v2i32(ptr %p) {
; CHECK-LABEL: load_zext_i16_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load i16, ptr %p
%z = zext i16 %l to i64
@@ -420,8 +412,7 @@ define <2 x i32> @load_zext_i16_v2i32(ptr %p) {
define <2 x i32> @load_zext_i32_v2i32(ptr %p) {
; CHECK-LABEL: load_zext_i32_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ret
%l = load i32, ptr %p
%z = zext i32 %l to i64
@@ -430,11 +421,16 @@ define <2 x i32> @load_zext_i32_v2i32(ptr %p) {
}
define <1 x i64> @load_zext_i8_v1i64(ptr %p) {
-; CHECK-LABEL: load_zext_i8_v1i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrb w8, [x0]
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i8_v1i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr b0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i8_v1i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldrb w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i8, ptr %p
%z = zext i8 %l to i64
%b = bitcast i64 %z to <1 x i64>
@@ -442,11 +438,16 @@ define <1 x i64> @load_zext_i8_v1i64(ptr %p) {
}
define <1 x i64> @load_zext_i16_v1i64(ptr %p) {
-; CHECK-LABEL: load_zext_i16_v1i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i16_v1i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr h0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i16_v1i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldrh w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i16, ptr %p
%z = zext i16 %l to i64
%b = bitcast i64 %z to <1 x i64>
@@ -454,11 +455,16 @@ define <1 x i64> @load_zext_i16_v1i64(ptr %p) {
}
define <1 x i64> @load_zext_i32_v1i64(ptr %p) {
-; CHECK-LABEL: load_zext_i32_v1i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i32_v1i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr s0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i32_v1i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i32, ptr %p
%z = zext i32 %l to i64
%b = bitcast i64 %z to <1 x i64>
@@ -469,8 +475,7 @@ define <1 x i64> @load_zext_i32_v1i64(ptr %p) {
define <4 x half> @load_zext_i8_v4f16(ptr %p) {
; CHECK-LABEL: load_zext_i8_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrb w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ret
%l = load i8, ptr %p
%z = zext i8 %l to i64
@@ -481,8 +486,7 @@ define <4 x half> @load_zext_i8_v4f16(ptr %p) {
define <4 x half> @load_zext_i16_v4f16(ptr %p) {
; CHECK-LABEL: load_zext_i16_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load i16, ptr %p
%z = zext i16 %l to i64
@@ -493,8 +497,7 @@ define <4 x half> @load_zext_i16_v4f16(ptr %p) {
define <4 x half> @load_zext_i32_v4f16(ptr %p) {
; CHECK-LABEL: load_zext_i32_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ret
%l = load i32, ptr %p
%z = zext i32 %l to i64
@@ -505,8 +508,7 @@ define <4 x half> @load_zext_i32_v4f16(ptr %p) {
define <4 x bfloat> @load_zext_i8_v4bf16(ptr %p) {
; CHECK-LABEL: load_zext_i8_v4bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrb w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ret
%l = load i8, ptr %p
%z = zext i8 %l to i64
@@ -517,8 +519,7 @@ define <4 x bfloat> @load_zext_i8_v4bf16(ptr %p) {
define <4 x bfloat> @load_zext_i16_v4bf16(ptr %p) {
; CHECK-LABEL: load_zext_i16_v4bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load i16, ptr %p
%z = zext i16 %l to i64
@@ -529,8 +530,7 @@ define <4 x bfloat> @load_zext_i16_v4bf16(ptr %p) {
define <4 x bfloat> @load_zext_i32_v4bf16(ptr %p) {
; CHECK-LABEL: load_zext_i32_v4bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ret
%l = load i32, ptr %p
%z = zext i32 %l to i64
@@ -541,8 +541,7 @@ define <4 x bfloat> @load_zext_i32_v4bf16(ptr %p) {
define <2 x float> @load_zext_i8_v2f32(ptr %p) {
; CHECK-LABEL: load_zext_i8_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrb w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ret
%l = load i8, ptr %p
%z = zext i8 %l to i64
@@ -553,8 +552,7 @@ define <2 x float> @load_zext_i8_v2f32(ptr %p) {
define <2 x float> @load_zext_i16_v2f32(ptr %p) {
; CHECK-LABEL: load_zext_i16_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load i16, ptr %p
%z = zext i16 %l to i64
@@ -565,8 +563,7 @@ define <2 x float> @load_zext_i16_v2f32(ptr %p) {
define <2 x float> @load_zext_i32_v2f32(ptr %p) {
; CHECK-LABEL: load_zext_i32_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ret
%l = load i32, ptr %p
%z = zext i32 %l to i64
@@ -575,11 +572,16 @@ define <2 x float> @load_zext_i32_v2f32(ptr %p) {
}
define <1 x double> @load_zext_i8_v1f64(ptr %p) {
-; CHECK-LABEL: load_zext_i8_v1f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrb w8, [x0]
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i8_v1f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr b0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i8_v1f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldrb w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i8, ptr %p
%z = zext i8 %l to i64
%b = bitcast i64 %z to <1 x double>
@@ -587,11 +589,16 @@ define <1 x double> @load_zext_i8_v1f64(ptr %p) {
}
define <1 x double> @load_zext_i16_v1f64(ptr %p) {
-; CHECK-LABEL: load_zext_i16_v1f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i16_v1f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr h0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i16_v1f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldrh w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i16, ptr %p
%z = zext i16 %l to i64
%b = bitcast i64 %z to <1 x double>
@@ -599,11 +606,16 @@ define <1 x double> @load_zext_i16_v1f64(ptr %p) {
}
define <1 x double> @load_zext_i32_v1f64(ptr %p) {
-; CHECK-LABEL: load_zext_i32_v1f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i32_v1f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr s0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i32_v1f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i32, ptr %p
%z = zext i32 %l to i64
%b = bitcast i64 %z to <1 x double>
diff --git a/llvm/test/CodeGen/AArch64/dp1.ll b/llvm/test/CodeGen/AArch64/dp1.ll
index e904f4b6d247a..c761e0ec578e9 100644
--- a/llvm/test/CodeGen/AArch64/dp1.ll
+++ b/llvm/test/CodeGen/AArch64/dp1.ll
@@ -201,8 +201,7 @@ define void @ctpop_i32() {
; CHECK-SDAG: // %bb.0:
; CHECK-SDAG-NEXT: adrp x8, :got:var32
; CHECK-SDAG-NEXT: ldr x8, [x8, :got_lo12:var32]
-; CHECK-SDAG-NEXT: ldr w9, [x8]
-; CHECK-SDAG-NEXT: fmov d0, x9
+; CHECK-SDAG-NEXT: ldr s0, [x8]
; CHECK-SDAG-NEXT: cnt v0.8b, v0.8b
; CHECK-SDAG-NEXT: addv b0, v0.8b
; CHECK-SDAG-NEXT: str s0, [x8]
@@ -212,8 +211,7 @@ define void @ctpop_i32() {
; CHECK-GISEL: // %bb.0:
; CHECK-GISEL-NEXT: adrp x8, :got:var32
; CHECK-GISEL-NEXT: ldr x8, [x8, :got_lo12:var32]
-; CHECK-GISEL-NEXT: ldr w9, [x8]
-; CHECK-GISEL-NEXT: fmov d0, x9
+; CHECK-GISEL-NEXT: ldr s0, [x8]
; CHECK-GISEL-NEXT: cnt v0.8b, v0.8b
; CHECK-GISEL-NEXT: uaddlv h0, v0.8b
; CHECK-GISEL-NEXT: str s0, [x8]
>From 962663268aa2cb98e766326bd60da8f9b3e7b2f8 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 22 Sep 2025 07:21:14 +0100
Subject: [PATCH 2/2] Preprocess bitcast(load)
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 68 ++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 29 +--
llvm/test/CodeGen/AArch64/aarch64-addv.ll | 9 +-
llvm/test/CodeGen/AArch64/bitcast-extend.ll | 210 +++++++++++++-----
llvm/test/CodeGen/AArch64/dp1.ll | 3 +-
5 files changed, 230 insertions(+), 89 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 54bdb8750f709..b2483043136a4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -60,6 +60,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return SelectionDAGISel::runOnMachineFunction(MF);
}
+ void PreprocessISelDAG() override;
+
void Select(SDNode *Node) override;
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
@@ -529,6 +531,72 @@ char AArch64DAGToDAGISelLegacy::ID = 0;
INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
+void AArch64DAGToDAGISel::PreprocessISelDAG() {
+ bool MadeChange = false;
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end();
+ I != E;) {
+ SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
+
+ switch (N->getOpcode()) {
+ case ISD::BITCAST: {
+ // Canonicalize bitcast(extload) or bitcast(zextload) into
+ // scalar_to_vector(load) or insert(zero, load), to help generate the
+ // canonical patterns that tablegen expects. This helps generate extending
+ // loads that zero the top data implicitly.
+ EVT VT = N->getValueType(0);
+ if (Subtarget->isLittleEndian() &&
+ (/*VT == MVT::f32 || VT == MVT::f64 ||*/ VT.isVector())) {
+ auto *Ld = dyn_cast<LoadSDNode>(N->getOperand(0));
+ if (Ld && Ld->isSimple() && !Ld->isIndexed() &&
+ (Ld->getExtensionType() == ISD::EXTLOAD ||
+ Ld->getExtensionType() == ISD::ZEXTLOAD)) {
+ LLVM_DEBUG({
+ dbgs() << "Found an extending load ";
+ Ld->dump();
+ });
+
+ EVT MemVT = Ld->getMemoryVT();
+ assert(VT.is64BitVector() || VT.is128BitVector() || VT == MVT::f32 ||
+ VT == MVT::f64);
+ assert(
+ VT.getScalarSizeInBits() == 8 || VT.getScalarSizeInBits() == 16 ||
+ VT.getScalarSizeInBits() == 32 || VT.getScalarSizeInBits() == 64);
+ assert(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32);
+ EVT ScalarVT = MemVT.getSizeInBits() < 32 ? MVT::i32 : MemVT;
+ EVT ExtVT =
+ EVT::getVectorVT(*CurDAG->getContext(), MemVT,
+ VT.getSizeInBits() / MemVT.getSizeInBits());
+
+ SDLoc DL(N);
+ SDValue NewLd =
+ CurDAG->getExtLoad(ISD::EXTLOAD, DL, ScalarVT, Ld->getChain(),
+ Ld->getBasePtr(), MemVT, Ld->getMemOperand());
+ SDValue Ext;
+ if (Ld->getExtensionType() == ISD::EXTLOAD)
+ Ext = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, NewLd);
+ else
+ Ext = CurDAG->getNode(ISD::INSERT_VECTOR_ELT, DL, ExtVT,
+ CurDAG->getConstant(0, DL, ExtVT), NewLd,
+ CurDAG->getConstant(0, DL, MVT::i64));
+ Ext = CurDAG->getBitcast(VT, Ext);
+
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Ext);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewLd.getValue(1));
+ ++I;
+ MadeChange = true;
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ if (MadeChange)
+ CurDAG->RemoveDeadNodes();
+}
+
/// isIntImmediate - This method tests to see if the node is a constant
/// operand. If so Imm will receive the 32-bit value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c1570ba5be7d8..6cea453f271be 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4026,27 +4026,6 @@ def : Pat <(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s
def : Pat <(f32 (bitconvert (i32 (zextloadi8 (am_indexed16 GPR64sp:$Rn, uimm12s1:$offset))))),
(SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
-// Similar to the patterns above we can turn a bitcast zextload to a vector type into a FPR load.
-multiclass BitcastLoad<ValueType VT> {
- def : Pat <(VT (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
- (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
- def : Pat <(VT (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))),
- (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
- def : Pat <(VT (bitconvert (i64 (zextloadi8 (am_indexed32 GPR64sp:$Rn, uimm12s1:$offset))))),
- (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
-}
-let Predicates = [IsLE] in {
- defm : BitcastLoad<v8i8>;
- defm : BitcastLoad<v4i16>;
- defm : BitcastLoad<v2i32>;
- defm : BitcastLoad<v1i64>;
- defm : BitcastLoad<v4f16>;
- defm : BitcastLoad<v4bf16>;
- defm : BitcastLoad<v2f32>;
- defm : BitcastLoad<v1f64>;
-}
-
-
// Pre-fetch.
def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
[(AArch64Prefetch timm:$Rt,
@@ -4193,13 +4172,13 @@ def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
- (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+ (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
- (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+ (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
- (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+ (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
- (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+ (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
//---
diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index 4c168e3306500..d9180a28bd40b 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -501,7 +501,8 @@ define i16 @addv_zero_lanes_v4i16(ptr %arr) {
;
; CHECK-GI-LABEL: addv_zero_lanes_v4i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr b0, [x0]
+; CHECK-GI-NEXT: ldrb w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: addv h0, v0.4h
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
@@ -520,7 +521,8 @@ define i8 @addv_zero_lanes_v8i8(ptr %arr) {
;
; CHECK-GI-LABEL: addv_zero_lanes_v8i8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr b0, [x0]
+; CHECK-GI-NEXT: ldrb w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: addv b0, v0.8b
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
@@ -631,7 +633,8 @@ define i32 @addv_zero_lanes_v2i32(ptr %arr) {
;
; CHECK-GI-LABEL: addv_zero_lanes_v2i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: addp v0.2s, v0.2s, v0.2s
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
index 1ecc945eb107c..b6b1d75207123 100644
--- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
@@ -289,10 +289,16 @@ define void @extractbitcastext_s(i32 %bytes, ptr %output) {
}
define <8 x i8> @load_zext_i8_v8i8(ptr %p) {
-; CHECK-LABEL: load_zext_i8_v8i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr b0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i8_v8i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr b0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i8_v8i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldrb w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i8, ptr %p
%z = zext i8 %l to i64
%b = bitcast i64 %z to <8 x i8>
@@ -300,10 +306,16 @@ define <8 x i8> @load_zext_i8_v8i8(ptr %p) {
}
define <8 x i8> @load_zext_i16_v8i8(ptr %p) {
-; CHECK-LABEL: load_zext_i16_v8i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr h0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i16_v8i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr h0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i16_v8i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldrh w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i16, ptr %p
%z = zext i16 %l to i64
%b = bitcast i64 %z to <8 x i8>
@@ -311,10 +323,16 @@ define <8 x i8> @load_zext_i16_v8i8(ptr %p) {
}
define <8 x i8> @load_zext_i32_v8i8(ptr %p) {
-; CHECK-LABEL: load_zext_i32_v8i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr s0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i32_v8i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr s0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i32_v8i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i32, ptr %p
%z = zext i32 %l to i64
%b = bitcast i64 %z to <8 x i8>
@@ -355,10 +373,16 @@ define <16 x i8> @load_zext_v16i8(ptr %p) {
define <4 x i16> @load_zext_i8_v4i16(ptr %p) {
-; CHECK-LABEL: load_zext_i8_v4i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr b0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i8_v4i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr b0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i8_v4i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldrb w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i8, ptr %p
%z = zext i8 %l to i64
%b = bitcast i64 %z to <4 x i16>
@@ -366,10 +390,16 @@ define <4 x i16> @load_zext_i8_v4i16(ptr %p) {
}
define <4 x i16> @load_zext_i16_v4i16(ptr %p) {
-; CHECK-LABEL: load_zext_i16_v4i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr h0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i16_v4i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr h0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i16_v4i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldrh w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i16, ptr %p
%z = zext i16 %l to i64
%b = bitcast i64 %z to <4 x i16>
@@ -377,10 +407,16 @@ define <4 x i16> @load_zext_i16_v4i16(ptr %p) {
}
define <4 x i16> @load_zext_i32_v4i16(ptr %p) {
-; CHECK-LABEL: load_zext_i32_v4i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr s0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i32_v4i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr s0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i32_v4i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i32, ptr %p
%z = zext i32 %l to i64
%b = bitcast i64 %z to <4 x i16>
@@ -388,10 +424,16 @@ define <4 x i16> @load_zext_i32_v4i16(ptr %p) {
}
define <2 x i32> @load_zext_i8_v2i32(ptr %p) {
-; CHECK-LABEL: load_zext_i8_v2i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr b0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i8_v2i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr b0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i8_v2i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldrb w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i8, ptr %p
%z = zext i8 %l to i64
%b = bitcast i64 %z to <2 x i32>
@@ -399,10 +441,16 @@ define <2 x i32> @load_zext_i8_v2i32(ptr %p) {
}
define <2 x i32> @load_zext_i16_v2i32(ptr %p) {
-; CHECK-LABEL: load_zext_i16_v2i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr h0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i16_v2i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr h0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i16_v2i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldrh w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i16, ptr %p
%z = zext i16 %l to i64
%b = bitcast i64 %z to <2 x i32>
@@ -410,10 +458,16 @@ define <2 x i32> @load_zext_i16_v2i32(ptr %p) {
}
define <2 x i32> @load_zext_i32_v2i32(ptr %p) {
-; CHECK-LABEL: load_zext_i32_v2i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr s0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i32_v2i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr s0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i32_v2i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i32, ptr %p
%z = zext i32 %l to i64
%b = bitcast i64 %z to <2 x i32>
@@ -473,10 +527,16 @@ define <1 x i64> @load_zext_i32_v1i64(ptr %p) {
define <4 x half> @load_zext_i8_v4f16(ptr %p) {
-; CHECK-LABEL: load_zext_i8_v4f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr b0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i8_v4f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr b0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i8_v4f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldrb w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i8, ptr %p
%z = zext i8 %l to i64
%b = bitcast i64 %z to <4 x half>
@@ -484,10 +544,16 @@ define <4 x half> @load_zext_i8_v4f16(ptr %p) {
}
define <4 x half> @load_zext_i16_v4f16(ptr %p) {
-; CHECK-LABEL: load_zext_i16_v4f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr h0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i16_v4f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr h0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i16_v4f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldrh w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i16, ptr %p
%z = zext i16 %l to i64
%b = bitcast i64 %z to <4 x half>
@@ -495,10 +561,16 @@ define <4 x half> @load_zext_i16_v4f16(ptr %p) {
}
define <4 x half> @load_zext_i32_v4f16(ptr %p) {
-; CHECK-LABEL: load_zext_i32_v4f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr s0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i32_v4f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr s0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i32_v4f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i32, ptr %p
%z = zext i32 %l to i64
%b = bitcast i64 %z to <4 x half>
@@ -539,10 +611,16 @@ define <4 x bfloat> @load_zext_i32_v4bf16(ptr %p) {
}
define <2 x float> @load_zext_i8_v2f32(ptr %p) {
-; CHECK-LABEL: load_zext_i8_v2f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr b0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i8_v2f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr b0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i8_v2f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldrb w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i8, ptr %p
%z = zext i8 %l to i64
%b = bitcast i64 %z to <2 x float>
@@ -550,10 +628,16 @@ define <2 x float> @load_zext_i8_v2f32(ptr %p) {
}
define <2 x float> @load_zext_i16_v2f32(ptr %p) {
-; CHECK-LABEL: load_zext_i16_v2f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr h0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i16_v2f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr h0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i16_v2f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldrh w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i16, ptr %p
%z = zext i16 %l to i64
%b = bitcast i64 %z to <2 x float>
@@ -561,10 +645,16 @@ define <2 x float> @load_zext_i16_v2f32(ptr %p) {
}
define <2 x float> @load_zext_i32_v2f32(ptr %p) {
-; CHECK-LABEL: load_zext_i32_v2f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr s0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: load_zext_i32_v2f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr s0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_zext_i32_v2f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%l = load i32, ptr %p
%z = zext i32 %l to i64
%b = bitcast i64 %z to <2 x float>
diff --git a/llvm/test/CodeGen/AArch64/dp1.ll b/llvm/test/CodeGen/AArch64/dp1.ll
index c761e0ec578e9..e52fec96d51e3 100644
--- a/llvm/test/CodeGen/AArch64/dp1.ll
+++ b/llvm/test/CodeGen/AArch64/dp1.ll
@@ -211,7 +211,8 @@ define void @ctpop_i32() {
; CHECK-GISEL: // %bb.0:
; CHECK-GISEL-NEXT: adrp x8, :got:var32
; CHECK-GISEL-NEXT: ldr x8, [x8, :got_lo12:var32]
-; CHECK-GISEL-NEXT: ldr s0, [x8]
+; CHECK-GISEL-NEXT: ldr w9, [x8]
+; CHECK-GISEL-NEXT: fmov d0, x9
; CHECK-GISEL-NEXT: cnt v0.8b, v0.8b
; CHECK-GISEL-NEXT: uaddlv h0, v0.8b
; CHECK-GISEL-NEXT: str s0, [x8]
More information about the llvm-commits
mailing list