[llvm] 6a03631 - [SVE][CodeGen] Add more test cases for zero-extends of masked loads
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 17 01:06:22 PDT 2023
Author: David Sherwood
Date: 2023-07-17T08:06:15Z
New Revision: 6a036316b3ca3ce9c4c6e38d00a113a4f0f146e3
URL: https://github.com/llvm/llvm-project/commit/6a036316b3ca3ce9c4c6e38d00a113a4f0f146e3
DIFF: https://github.com/llvm/llvm-project/commit/6a036316b3ca3ce9c4c6e38d00a113a4f0f146e3.diff
LOG: [SVE][CodeGen] Add more test cases for zero-extends of masked loads
This patch adds test cases for extending masked loads of illegal
unpacked types into illegal wider types.
Pre-commits tests for D155281
Added:
Modified:
llvm/test/CodeGen/AArch64/sve-intrinsics-mask-ldst-ext.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-mask-ldst-ext.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-mask-ldst-ext.ll
index 46675f85886c8e..55bd3833f611c0 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-mask-ldst-ext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-mask-ldst-ext.ll
@@ -21,6 +21,18 @@ define <vscale x 16 x i32> @masked_ld1b_i8_sext_i32(<vscale x 16 x i8> *%base, <
ret <vscale x 16 x i32> %res
}
+define <vscale x 8 x i32> @masked_ld1b_nxv8i8_sext_i32(<vscale x 8 x i8> *%a, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: masked_ld1b_nxv8i8_sext_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
+; CHECK-NEXT: sunpklo z0.s, z1.h
+; CHECK-NEXT: sunpkhi z1.s, z1.h
+; CHECK-NEXT: ret
+ %wide.masked.load = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr %a, i32 1, <vscale x 8 x i1> %mask, <vscale x 8 x i8> poison)
+ %res = sext <vscale x 8 x i8> %wide.masked.load to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %res
+}
+
define <vscale x 16 x i32> @masked_ld1b_i8_zext_i32(<vscale x 16 x i8> *%base, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: masked_ld1b_i8_zext_i32:
; CHECK: // %bb.0:
@@ -37,6 +49,19 @@ define <vscale x 16 x i32> @masked_ld1b_i8_zext_i32(<vscale x 16 x i8> *%base, <
ret <vscale x 16 x i32> %res
}
+define <vscale x 8 x i32> @masked_ld1b_nxv8i8_zext_i32(<vscale x 8 x i8> *%a, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: masked_ld1b_nxv8i8_zext_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: uunpkhi z1.s, z0.h
+; CHECK-NEXT: and z0.h, z0.h, #0xff
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: ret
+ %wide.masked.load = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr %a, i32 1, <vscale x 8 x i1> %mask, <vscale x 8 x i8> poison)
+ %res = zext <vscale x 8 x i8> %wide.masked.load to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %res
+}
+
define <vscale x 16 x i64> @masked_ld1b_i8_sext(<vscale x 16 x i8> *%base, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: masked_ld1b_i8_sext:
; CHECK: // %bb.0:
@@ -61,6 +86,18 @@ define <vscale x 16 x i64> @masked_ld1b_i8_sext(<vscale x 16 x i8> *%base, <vsca
ret <vscale x 16 x i64> %res
}
+define <vscale x 4 x i64> @masked_ld1b_nxv4i8_sext_i64(<vscale x 4 x i8> *%a, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: masked_ld1b_nxv4i8_sext_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sb { z1.s }, p0/z, [x0]
+; CHECK-NEXT: sunpklo z0.d, z1.s
+; CHECK-NEXT: sunpkhi z1.d, z1.s
+; CHECK-NEXT: ret
+ %wide.masked.load = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i8> poison)
+ %res = sext <vscale x 4 x i8> %wide.masked.load to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %res
+}
+
define <vscale x 16 x i64> @masked_ld1b_i8_zext(<vscale x 16 x i8> *%base, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: masked_ld1b_i8_zext:
; CHECK: // %bb.0:
@@ -85,6 +122,19 @@ define <vscale x 16 x i64> @masked_ld1b_i8_zext(<vscale x 16 x i8> *%base, <vsca
ret <vscale x 16 x i64> %res
}
+define <vscale x 4 x i64> @masked_ld1b_nxv4i8_zext_i64(<vscale x 4 x i8> *%a, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: masked_ld1b_nxv4i8_zext_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: uunpkhi z1.d, z0.s
+; CHECK-NEXT: and z0.s, z0.s, #0xff
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: ret
+ %wide.masked.load = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i8> poison)
+ %res = zext <vscale x 4 x i8> %wide.masked.load to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %res
+}
+
;
; LD1H
;
@@ -105,6 +155,18 @@ define <vscale x 8 x i64> @masked_ld1h_i16_sext(<vscale x 8 x i16> *%base, <vsca
ret <vscale x 8 x i64> %res
}
+define <vscale x 4 x i64> @masked_ld1h_nxv4i16_sext(<vscale x 4 x i16> *%a, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: masked_ld1h_nxv4i16_sext:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0]
+; CHECK-NEXT: sunpklo z0.d, z1.s
+; CHECK-NEXT: sunpkhi z1.d, z1.s
+; CHECK-NEXT: ret
+ %wide.masked.load = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i16> poison)
+ %res = sext <vscale x 4 x i16> %wide.masked.load to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %res
+}
+
define <vscale x 8 x i64> @masked_ld1h_i16_zext(<vscale x 8 x i16> *%base, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: masked_ld1h_i16_zext:
; CHECK: // %bb.0:
@@ -121,6 +183,19 @@ define <vscale x 8 x i64> @masked_ld1h_i16_zext(<vscale x 8 x i16> *%base, <vsca
ret <vscale x 8 x i64> %res
}
+define <vscale x 4 x i64> @masked_ld1h_nxv4i16_zext(<vscale x 4 x i16> *%a, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: masked_ld1h_nxv4i16_zext:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: uunpkhi z1.d, z0.s
+; CHECK-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: ret
+ %wide.masked.load = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i16> poison)
+ %res = zext <vscale x 4 x i16> %wide.masked.load to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %res
+}
+
;
; LD1W
;
@@ -150,6 +225,8 @@ define <vscale x 4 x i64> @masked_ld1w_i32_zext(<vscale x 4 x i32> *%base, <vsca
}
declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(<vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(<vscale x 8 x i8>*, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i8>)
+declare <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(<vscale x 4 x i8>*, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i8>)
declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(<vscale x 8 x i16>*, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(<vscale x 4 x i16>*, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i16>)
declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(<vscale x 4 x i32>*, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i32>)
-
More information about the llvm-commits
mailing list