[llvm] 8a40bf6 - [AArch64][SVE] More unpredicated ld1/st1 patterns for reg+reg addressing modes
Eli Friedman via llvm-commits
llvm-commits at lists.llvm.org
Mon May 3 15:06:31 PDT 2021
Author: Eli Friedman
Date: 2021-05-03T15:06:20-07:00
New Revision: 8a40bf6d210fd2b5180841579d412826c381fb2b
URL: https://github.com/llvm/llvm-project/commit/8a40bf6d210fd2b5180841579d412826c381fb2b
DIFF: https://github.com/llvm/llvm-project/commit/8a40bf6d210fd2b5180841579d412826c381fb2b.diff
LOG: [AArch64][SVE] More unpredicated ld1/st1 patterns for reg+reg addressing modes
In some cases, we can improve the generated code by using a load with
the "wrong" element width: in particular, using ld1b/st1b when we see
reg+reg without a shift.
Differential Revision: https://reviews.llvm.org/D100527
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 4af8e65b8b35..b20f175f3fe7 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1975,6 +1975,25 @@ let Predicates = [HasSVE] in {
defm : unpred_load< load, nxv2f32, LD1W_D, LD1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
defm : unpred_load< load, nxv2f64, LD1D, LD1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;
+ // Allow using the reg+reg form of ld1b/st1b for memory accesses with the
+ // same width as nxv16i8. This saves an add in cases where we would
+ // otherwise compute the address separately.
+ multiclass unpred_loadstore_bitcast<ValueType Ty> {
+ let Predicates = [IsLE] in {
+ def : Pat<(Ty (load (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset))),
+ (LD1B (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>;
+ def : Pat<(store (Ty ZPR:$val), (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset)),
+ (ST1B ZPR:$val, (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>;
+ }
+ }
+ defm : unpred_loadstore_bitcast<nxv8i16>;
+ defm : unpred_loadstore_bitcast<nxv8f16>;
+ defm : unpred_loadstore_bitcast<nxv8bf16>;
+ defm : unpred_loadstore_bitcast<nxv4f32>;
+ defm : unpred_loadstore_bitcast<nxv4i32>;
+ defm : unpred_loadstore_bitcast<nxv2i64>;
+ defm : unpred_loadstore_bitcast<nxv2f64>;
+
multiclass unpred_store_predicate<ValueType Ty, Instruction Store> {
def _fi : Pat<(store (Ty PPR:$val), (am_sve_fi GPR64sp:$base, simm9:$offset)),
(Store PPR:$val, GPR64sp:$base, simm9:$offset)>;
diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
index 5e1b4b50e99d..f3295204b008 100644
--- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
@@ -652,11 +652,12 @@ define <vscale x 8 x i16> @splice_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #16 // =16
-; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-16
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -671,11 +672,12 @@ define <vscale x 8 x i16> @splice_nxv8i16_1(<vscale x 8 x i16> %a, <vscale x 8 x
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #2 // =2
-; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-2
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -714,11 +716,12 @@ define <vscale x 4 x i32> @splice_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #16 // =16
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-16
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -733,11 +736,12 @@ define <vscale x 4 x i32> @splice_nxv4i32_1(<vscale x 4 x i32> %a, <vscale x 4 x
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #4 // =4
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-4
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -776,11 +780,12 @@ define <vscale x 2 x i64> @splice_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #16 // =16
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-16
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -795,11 +800,12 @@ define <vscale x 2 x i64> @splice_nxv2i64_1(<vscale x 2 x i64> %a, <vscale x 2 x
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #8 // =8
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-8
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -838,11 +844,12 @@ define <vscale x 8 x half> @splice_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #16 // =16
-; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-16
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -857,11 +864,12 @@ define <vscale x 8 x half> @splice_nxv8f16_1(<vscale x 8 x half> %a, <vscale x 8
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #2 // =2
-; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-2
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -900,11 +908,12 @@ define <vscale x 4 x float> @splice_nxv4f32(<vscale x 4 x float> %a, <vscale x 4
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #16 // =16
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-16
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -919,11 +928,12 @@ define <vscale x 4 x float> @splice_nxv4f32_1(<vscale x 4 x float> %a, <vscale x
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #4 // =4
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-4
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -962,11 +972,12 @@ define <vscale x 2 x double> @splice_nxv2f64(<vscale x 2 x double> %a, <vscale x
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #16 // =16
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-16
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -981,11 +992,12 @@ define <vscale x 2 x double> @splice_nxv2f64_1(<vscale x 2 x double> %a, <vscale
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #8 // =8
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-8
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -1027,11 +1039,12 @@ define <vscale x 2 x i1> @splice_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1>
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, p1/z, #1 // =0x1
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #8 // =8
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-8
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: and z0.d, z0.d, #0x1
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
; CHECK-NEXT: addvl sp, sp, #2
@@ -1051,11 +1064,12 @@ define <vscale x 4 x i1> @splice_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1>
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, p1/z, #1 // =0x1
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #4 // =4
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-4
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: and z0.s, z0.s, #0x1
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
; CHECK-NEXT: addvl sp, sp, #2
@@ -1075,11 +1089,12 @@ define <vscale x 8 x i1> @splice_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1>
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, p1/z, #1 // =0x1
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #2 // =2
-; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-2
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: and z0.h, z0.h, #0x1
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT: addvl sp, sp, #2
@@ -1121,11 +1136,12 @@ define <vscale x 2 x i8> @splice_nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8>
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1
-; CHECK-NEXT: sub x8, x8, #16 // =16
-; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
+; CHECK-NEXT: mov x9, #-16
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -1141,13 +1157,15 @@ define <vscale x 8 x i32> @splice_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i
; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: mov x9, #-32
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z3.s }, p0, [x8, #3, mul vl]
; CHECK-NEXT: st1w { z2.s }, p0, [x8, #2, mul vl]
; CHECK-NEXT: addvl x8, x8, #2
+; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
; CHECK-NEXT: sub x8, x8, #32 // =32
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #4
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
index 76228f26bb20..407912600086 100644
--- a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
@@ -15,6 +15,42 @@ define <vscale x 16 x i8> @ld1_nxv16i8(i8* %addr, i64 %off) {
ret <vscale x 16 x i8> %val
}
+define <vscale x 8 x i16> @ld1_nxv16i8_bitcast_to_i16(i8* %addr, i64 %off) {
+; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
+; CHECK-NEXT: ret
+ %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
+ %ptrcast = bitcast i8* %ptr to <vscale x 8 x i16>*
+ %val = load volatile <vscale x 8 x i16>, <vscale x 8 x i16>* %ptrcast
+ ret <vscale x 8 x i16> %val
+}
+
+define <vscale x 4 x i32> @ld1_nxv16i8_bitcast_to_i32(i8* %addr, i64 %off) {
+; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
+; CHECK-NEXT: ret
+ %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
+ %ptrcast = bitcast i8* %ptr to <vscale x 4 x i32>*
+ %val = load volatile <vscale x 4 x i32>, <vscale x 4 x i32>* %ptrcast
+ ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 2 x i64> @ld1_nxv16i8_bitcast_to_i64(i8* %addr, i64 %off) {
+; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
+; CHECK-NEXT: ret
+ %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
+ %ptrcast = bitcast i8* %ptr to <vscale x 2 x i64>*
+ %val = load volatile <vscale x 2 x i64>, <vscale x 2 x i64>* %ptrcast
+ ret <vscale x 2 x i64> %val
+}
+
define <vscale x 8 x i16> @ld1_nxv8i16_zext8(i8* %addr, i64 %off) {
; CHECK-LABEL: ld1_nxv8i16_zext8:
; CHECK: // %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll
index 916eaf3e7b63..5dbc3366bd11 100644
--- a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll
+++ b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll
@@ -15,6 +15,42 @@ define void @st1_nxv16i8(i8* %addr, i64 %off, <vscale x 16 x i8> %val) {
ret void
}
+define void @st1_nxv16i8_bitcast_from_i16(i8* %addr, i64 %off, <vscale x 8 x i16> %val) {
+; CHECK-LABEL: st1_nxv16i8_bitcast_from_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
+; CHECK-NEXT: ret
+ %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
+ %ptrcast = bitcast i8* %ptr to <vscale x 8 x i16>*
+ store <vscale x 8 x i16> %val, <vscale x 8 x i16>* %ptrcast
+ ret void
+}
+
+define void @st1_nxv16i8_bitcast_from_i32(i8* %addr, i64 %off, <vscale x 4 x i32> %val) {
+; CHECK-LABEL: st1_nxv16i8_bitcast_from_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
+; CHECK-NEXT: ret
+ %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
+ %ptrcast = bitcast i8* %ptr to <vscale x 4 x i32>*
+ store <vscale x 4 x i32> %val, <vscale x 4 x i32>* %ptrcast
+ ret void
+}
+
+define void @st1_nxv16i8_bitcast_from_i64(i8* %addr, i64 %off, <vscale x 2 x i64> %val) {
+; CHECK-LABEL: st1_nxv16i8_bitcast_from_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
+; CHECK-NEXT: ret
+ %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
+ %ptrcast = bitcast i8* %ptr to <vscale x 2 x i64>*
+ store <vscale x 2 x i64> %val, <vscale x 2 x i64>* %ptrcast
+ ret void
+}
+
define void @st1_nxv8i16_trunc8(i8* %addr, i64 %off, <vscale x 8 x i16> %val) {
; CHECK-LABEL: st1_nxv8i16_trunc8:
; CHECK: // %bb.0:
More information about the llvm-commits
mailing list