[llvm] 64094e3 - [DAGCombiner] Pre-commit tests for D159191
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 5 03:46:24 PDT 2023
Author: David Sherwood
Date: 2023-09-05T10:41:21Z
New Revision: 64094e3e6df223cde2861b89c31bb529bb36f8f7
URL: https://github.com/llvm/llvm-project/commit/64094e3e6df223cde2861b89c31bb529bb36f8f7
DIFF: https://github.com/llvm/llvm-project/commit/64094e3e6df223cde2861b89c31bb529bb36f8f7.diff
LOG: [DAGCombiner] Pre-commit tests for D159191
I've added some missing tests for the following cases:
1. Zero- and sign-extends from unpacked vector types to wide,
illegal types. For example,
%aext = zext <vscale x 4 x i8> %a to <vscale x 4 x i64>
2. Normal loads combined with 1
3. Masked loads combined with 1
Differential Revision: https://reviews.llvm.org/D159192
Added:
Modified:
llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll
llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
llvm/test/CodeGen/AArch64/sve-sext-zext.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll
index d44bc171b7b6f2..6764a910edb13e 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll
@@ -128,3 +128,106 @@ define <vscale x 4 x i64> @ld1w_i32_zext(<vscale x 4 x i32> *%base) {
%res = zext <vscale x 4 x i32> %wide.load to <vscale x 4 x i64>
ret <vscale x 4 x i64> %res
}
+
+
+; Extending loads from unpacked to wide illegal types
+
+define <vscale x 4 x i64> @zload_4i8_4i64(ptr %a) {
+; CHECK-LABEL: zload_4i8_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ld1b { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ret
+ %aval = load <vscale x 4 x i8>, ptr %a
+ %aext = zext <vscale x 4 x i8> %aval to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %aext
+}
+
+define <vscale x 4 x i64> @zload_4i16_4i64(ptr %a) {
+; CHECK-LABEL: zload_4i16_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ret
+ %aval = load <vscale x 4 x i16>, ptr %a
+ %aext = zext <vscale x 4 x i16> %aval to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %aext
+}
+
+define <vscale x 8 x i32> @zload_8i8_8i32(ptr %a) {
+; CHECK-LABEL: zload_8i8_8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ld1b { z1.s }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ret
+ %aval = load <vscale x 8 x i8>, ptr %a
+ %aext = zext <vscale x 8 x i8> %aval to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %aext
+}
+
+define <vscale x 8 x i64> @zload_8i8_8i64(ptr %a) {
+; CHECK-LABEL: zload_8i8_8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ld1b { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1b { z2.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: ld1b { z3.d }, p0/z, [x0, #3, mul vl]
+; CHECK-NEXT: ret
+ %aval = load <vscale x 8 x i8>, ptr %a
+ %aext = zext <vscale x 8 x i8> %aval to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %aext
+}
+
+define <vscale x 4 x i64> @sload_4i8_4i64(ptr %a) {
+; CHECK-LABEL: sload_4i8_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ld1sb { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ret
+ %aval = load <vscale x 4 x i8>, ptr %a
+ %aext = sext <vscale x 4 x i8> %aval to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %aext
+}
+
+define <vscale x 4 x i64> @sload_4i16_4i64(ptr %a) {
+; CHECK-LABEL: sload_4i16_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ld1sh { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ret
+ %aval = load <vscale x 4 x i16>, ptr %a
+ %aext = sext <vscale x 4 x i16> %aval to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %aext
+}
+
+define <vscale x 8 x i32> @sload_8i8_8i32(ptr %a) {
+; CHECK-LABEL: sload_8i8_8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ld1sb { z1.s }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ret
+ %aval = load <vscale x 8 x i8>, ptr %a
+ %aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %aext
+}
+
+define <vscale x 8 x i64> @sload_8i8_8i64(ptr %a) {
+; CHECK-LABEL: sload_8i8_8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ld1sb { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sb { z2.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: ld1sb { z3.d }, p0/z, [x0, #3, mul vl]
+; CHECK-NEXT: ret
+ %aval = load <vscale x 8 x i8>, ptr %a
+ %aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %aext
+}
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
index 476401c7ebd805..f8587ca86f392a 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
@@ -112,6 +112,152 @@ define <vscale x 4 x double> @masked_sload_4i8_4f32(<vscale x 4 x i8>* noalias %
ret <vscale x 4 x double> %res
}
+
+; Extending loads from unpacked to wide illegal types
+
+define <vscale x 4 x i64> @masked_sload_4i8_4i64(ptr %a, <vscale x 4 x i1> %b) {
+; CHECK-LABEL: masked_sload_4i8_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sb { z1.s }, p0/z, [x0]
+; CHECK-NEXT: sunpklo z0.d, z1.s
+; CHECK-NEXT: sunpkhi z1.d, z1.s
+; CHECK-NEXT: ret
+ %aval = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8(<vscale x 4 x i8> *%a, i32 16, <vscale x 4 x i1> %b, <vscale x 4 x i8> zeroinitializer)
+ %aext = sext <vscale x 4 x i8> %aval to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %aext
+}
+
+define <vscale x 4 x i64> @masked_sload_4i16_4i64(ptr %a, <vscale x 4 x i1> %b) {
+; CHECK-LABEL: masked_sload_4i16_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0]
+; CHECK-NEXT: sunpklo z0.d, z1.s
+; CHECK-NEXT: sunpkhi z1.d, z1.s
+; CHECK-NEXT: ret
+ %aval = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16(<vscale x 4 x i16> *%a, i32 16, <vscale x 4 x i1> %b, <vscale x 4 x i16> zeroinitializer)
+ %aext = sext <vscale x 4 x i16> %aval to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %aext
+}
+
+define <vscale x 8 x i32> @masked_sload_8i8_8i32(ptr %a, <vscale x 8 x i1> %b) {
+; CHECK-LABEL: masked_sload_8i8_8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
+; CHECK-NEXT: sunpklo z0.s, z1.h
+; CHECK-NEXT: sunpkhi z1.s, z1.h
+; CHECK-NEXT: ret
+ %aval = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8(<vscale x 8 x i8> *%a, i32 16, <vscale x 8 x i1> %b, <vscale x 8 x i8> zeroinitializer)
+ %aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %aext
+}
+
+define <vscale x 8 x i64> @masked_sload_8i8_8i64(ptr %a, <vscale x 8 x i1> %b) {
+; CHECK-LABEL: masked_sload_8i8_8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: sunpklo z1.s, z0.h
+; CHECK-NEXT: sunpkhi z3.s, z0.h
+; CHECK-NEXT: sunpklo z0.d, z1.s
+; CHECK-NEXT: sunpkhi z1.d, z1.s
+; CHECK-NEXT: sunpklo z2.d, z3.s
+; CHECK-NEXT: sunpkhi z3.d, z3.s
+; CHECK-NEXT: ret
+ %aval = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8(<vscale x 8 x i8> *%a, i32 16, <vscale x 8 x i1> %b, <vscale x 8 x i8> zeroinitializer)
+ %aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %aext
+}
+
+define <vscale x 4 x i64> @masked_sload_x2_4i8_4i64(ptr %a, ptr %b, <vscale x 4 x i1> %c) {
+; CHECK-LABEL: masked_sload_x2_4i8_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ld1sb { z1.s }, p0/z, [x1]
+; CHECK-NEXT: sunpkhi z2.d, z0.s
+; CHECK-NEXT: sunpklo z0.d, z0.s
+; CHECK-NEXT: sunpkhi z3.d, z1.s
+; CHECK-NEXT: sunpklo z1.d, z1.s
+; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: add z1.d, z2.d, z3.d
+; CHECK-NEXT: ret
+ %aval = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8(<vscale x 4 x i8> *%a, i32 16, <vscale x 4 x i1> %c, <vscale x 4 x i8> zeroinitializer)
+ %bval = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8(<vscale x 4 x i8> *%b, i32 16, <vscale x 4 x i1> %c, <vscale x 4 x i8> zeroinitializer)
+ %aext = sext <vscale x 4 x i8> %aval to <vscale x 4 x i64>
+ %bext = sext <vscale x 4 x i8> %bval to <vscale x 4 x i64>
+ %res = add <vscale x 4 x i64> %aext, %bext
+ ret <vscale x 4 x i64> %res
+}
+
+define <vscale x 4 x i64> @masked_sload_x2_4i16_4i64(ptr %a, ptr %b, <vscale x 4 x i1> %c) {
+; CHECK-LABEL: masked_sload_x2_4i16_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x1]
+; CHECK-NEXT: sunpkhi z2.d, z0.s
+; CHECK-NEXT: sunpklo z0.d, z0.s
+; CHECK-NEXT: sunpkhi z3.d, z1.s
+; CHECK-NEXT: sunpklo z1.d, z1.s
+; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: add z1.d, z2.d, z3.d
+; CHECK-NEXT: ret
+ %aval = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16(<vscale x 4 x i16> *%a, i32 16, <vscale x 4 x i1> %c, <vscale x 4 x i16> zeroinitializer)
+ %bval = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16(<vscale x 4 x i16> *%b, i32 16, <vscale x 4 x i1> %c, <vscale x 4 x i16> zeroinitializer)
+ %aext = sext <vscale x 4 x i16> %aval to <vscale x 4 x i64>
+ %bext = sext <vscale x 4 x i16> %bval to <vscale x 4 x i64>
+ %res = add <vscale x 4 x i64> %aext, %bext
+ ret <vscale x 4 x i64> %res
+}
+
+define <vscale x 8 x i32> @masked_sload_x2_8i8_8i32(ptr %a, ptr %b, <vscale x 8 x i1> %c) {
+; CHECK-LABEL: masked_sload_x2_8i8_8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x1]
+; CHECK-NEXT: sunpkhi z2.s, z0.h
+; CHECK-NEXT: sunpklo z0.s, z0.h
+; CHECK-NEXT: sunpkhi z3.s, z1.h
+; CHECK-NEXT: sunpklo z1.s, z1.h
+; CHECK-NEXT: add z0.s, z0.s, z1.s
+; CHECK-NEXT: add z1.s, z2.s, z3.s
+; CHECK-NEXT: ret
+ %aval = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8(<vscale x 8 x i8> *%a, i32 16, <vscale x 8 x i1> %c, <vscale x 8 x i8> zeroinitializer)
+ %bval = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8(<vscale x 8 x i8> *%b, i32 16, <vscale x 8 x i1> %c, <vscale x 8 x i8> zeroinitializer)
+ %aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i32>
+ %bext = sext <vscale x 8 x i8> %bval to <vscale x 8 x i32>
+ %res = add <vscale x 8 x i32> %aext, %bext
+ ret <vscale x 8 x i32> %res
+}
+
+define <vscale x 8 x i64> @masked_sload_x2_8i8_8i64(ptr %a, ptr %b, <vscale x 8 x i1> %c) {
+; CHECK-LABEL: masked_sload_x2_8i8_8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x1]
+; CHECK-NEXT: sunpkhi z2.s, z0.h
+; CHECK-NEXT: sunpklo z0.s, z0.h
+; CHECK-NEXT: sunpklo z3.s, z1.h
+; CHECK-NEXT: sunpkhi z1.s, z1.h
+; CHECK-NEXT: sunpkhi z4.d, z2.s
+; CHECK-NEXT: sunpklo z2.d, z2.s
+; CHECK-NEXT: sunpkhi z5.d, z0.s
+; CHECK-NEXT: sunpklo z0.d, z0.s
+; CHECK-NEXT: sunpklo z6.d, z3.s
+; CHECK-NEXT: sunpkhi z7.d, z1.s
+; CHECK-NEXT: sunpklo z24.d, z1.s
+; CHECK-NEXT: sunpkhi z1.d, z3.s
+; CHECK-NEXT: add z0.d, z0.d, z6.d
+; CHECK-NEXT: add z3.d, z4.d, z7.d
+; CHECK-NEXT: add z1.d, z5.d, z1.d
+; CHECK-NEXT: add z2.d, z2.d, z24.d
+; CHECK-NEXT: ret
+ %aval = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8(<vscale x 8 x i8> *%a, i32 16, <vscale x 8 x i1> %c, <vscale x 8 x i8> zeroinitializer)
+ %bval = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8(<vscale x 8 x i8> *%b, i32 16, <vscale x 8 x i1> %c, <vscale x 8 x i8> zeroinitializer)
+ %aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i64>
+ %bext = sext <vscale x 8 x i8> %bval to <vscale x 8 x i64>
+ %res = add <vscale x 8 x i64> %aext, %bext
+ ret <vscale x 8 x i64> %res
+}
+
+
declare <vscale x 2 x i8> @llvm.masked.load.nxv2i8(<vscale x 2 x i8>*, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>*, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32(<vscale x 2 x i32>*, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
index c4447f2f26ae04..a86dd18d7fd7c2 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
@@ -108,6 +108,151 @@ define <vscale x 2 x double> @masked_zload_2i16_2f64(<vscale x 2 x i16>* noalias
ret <vscale x 2 x double> %res
}
+; Extending loads from unpacked to wide illegal types
+
+define <vscale x 4 x i64> @masked_zload_4i8_4i64(ptr %a, <vscale x 4 x i1> %b) {
+; CHECK-LABEL: masked_zload_4i8_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1b { z1.s }, p0/z, [x0]
+; CHECK-NEXT: uunpklo z0.d, z1.s
+; CHECK-NEXT: uunpkhi z1.d, z1.s
+; CHECK-NEXT: ret
+ %aval = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8(<vscale x 4 x i8> *%a, i32 16, <vscale x 4 x i1> %b, <vscale x 4 x i8> zeroinitializer)
+ %aext = zext <vscale x 4 x i8> %aval to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %aext
+}
+
+define <vscale x 4 x i64> @masked_zload_4i16_4i64(ptr %a, <vscale x 4 x i1> %b) {
+; CHECK-LABEL: masked_zload_4i16_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0]
+; CHECK-NEXT: uunpklo z0.d, z1.s
+; CHECK-NEXT: uunpkhi z1.d, z1.s
+; CHECK-NEXT: ret
+ %aval = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16(<vscale x 4 x i16> *%a, i32 16, <vscale x 4 x i1> %b, <vscale x 4 x i16> zeroinitializer)
+ %aext = zext <vscale x 4 x i16> %aval to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %aext
+}
+
+define <vscale x 8 x i32> @masked_zload_8i8_8i32(ptr %a, <vscale x 8 x i1> %b) {
+; CHECK-LABEL: masked_zload_8i8_8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1b { z1.h }, p0/z, [x0]
+; CHECK-NEXT: uunpklo z0.s, z1.h
+; CHECK-NEXT: uunpkhi z1.s, z1.h
+; CHECK-NEXT: ret
+ %aval = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8(<vscale x 8 x i8> *%a, i32 16, <vscale x 8 x i1> %b, <vscale x 8 x i8> zeroinitializer)
+ %aext = zext <vscale x 8 x i8> %aval to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %aext
+}
+
+define <vscale x 8 x i64> @masked_zload_8i8_8i64(ptr %a, <vscale x 8 x i1> %b) {
+; CHECK-LABEL: masked_zload_8i8_8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: uunpklo z1.s, z0.h
+; CHECK-NEXT: uunpkhi z3.s, z0.h
+; CHECK-NEXT: uunpklo z0.d, z1.s
+; CHECK-NEXT: uunpkhi z1.d, z1.s
+; CHECK-NEXT: uunpklo z2.d, z3.s
+; CHECK-NEXT: uunpkhi z3.d, z3.s
+; CHECK-NEXT: ret
+ %aval = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8(<vscale x 8 x i8> *%a, i32 16, <vscale x 8 x i1> %b, <vscale x 8 x i8> zeroinitializer)
+ %aext = zext <vscale x 8 x i8> %aval to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %aext
+}
+
+define <vscale x 4 x i64> @masked_zload_x2_4i8_4i64(ptr %a, ptr %b, <vscale x 4 x i1> %c) {
+; CHECK-LABEL: masked_zload_x2_4i8_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ld1b { z1.s }, p0/z, [x1]
+; CHECK-NEXT: uunpkhi z2.d, z0.s
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: uunpkhi z3.d, z1.s
+; CHECK-NEXT: uunpklo z1.d, z1.s
+; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: add z1.d, z2.d, z3.d
+; CHECK-NEXT: ret
+ %aval = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8(<vscale x 4 x i8> *%a, i32 16, <vscale x 4 x i1> %c, <vscale x 4 x i8> zeroinitializer)
+ %bval = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8(<vscale x 4 x i8> *%b, i32 16, <vscale x 4 x i1> %c, <vscale x 4 x i8> zeroinitializer)
+ %aext = zext <vscale x 4 x i8> %aval to <vscale x 4 x i64>
+ %bext = zext <vscale x 4 x i8> %bval to <vscale x 4 x i64>
+ %res = add <vscale x 4 x i64> %aext, %bext
+ ret <vscale x 4 x i64> %res
+}
+
+define <vscale x 4 x i64> @masked_zload_x2_4i16_4i64(ptr %a, ptr %b, <vscale x 4 x i1> %c) {
+; CHECK-LABEL: masked_zload_x2_4i16_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ld1h { z1.s }, p0/z, [x1]
+; CHECK-NEXT: uunpkhi z2.d, z0.s
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: uunpkhi z3.d, z1.s
+; CHECK-NEXT: uunpklo z1.d, z1.s
+; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: add z1.d, z2.d, z3.d
+; CHECK-NEXT: ret
+ %aval = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16(<vscale x 4 x i16> *%a, i32 16, <vscale x 4 x i1> %c, <vscale x 4 x i16> zeroinitializer)
+ %bval = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16(<vscale x 4 x i16> *%b, i32 16, <vscale x 4 x i1> %c, <vscale x 4 x i16> zeroinitializer)
+ %aext = zext <vscale x 4 x i16> %aval to <vscale x 4 x i64>
+ %bext = zext <vscale x 4 x i16> %bval to <vscale x 4 x i64>
+ %res = add <vscale x 4 x i64> %aext, %bext
+ ret <vscale x 4 x i64> %res
+}
+
+define <vscale x 8 x i32> @masked_zload_x2_8i8_8i32(ptr %a, ptr %b, <vscale x 8 x i1> %c) {
+; CHECK-LABEL: masked_zload_x2_8i8_8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ld1b { z1.h }, p0/z, [x1]
+; CHECK-NEXT: uunpkhi z2.s, z0.h
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: uunpkhi z3.s, z1.h
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: add z0.s, z0.s, z1.s
+; CHECK-NEXT: add z1.s, z2.s, z3.s
+; CHECK-NEXT: ret
+ %aval = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8(<vscale x 8 x i8> *%a, i32 16, <vscale x 8 x i1> %c, <vscale x 8 x i8> zeroinitializer)
+ %bval = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8(<vscale x 8 x i8> *%b, i32 16, <vscale x 8 x i1> %c, <vscale x 8 x i8> zeroinitializer)
+ %aext = zext <vscale x 8 x i8> %aval to <vscale x 8 x i32>
+ %bext = zext <vscale x 8 x i8> %bval to <vscale x 8 x i32>
+ %res = add <vscale x 8 x i32> %aext, %bext
+ ret <vscale x 8 x i32> %res
+}
+
+define <vscale x 8 x i64> @masked_zload_x2_8i8_8i64(ptr %a, ptr %b, <vscale x 8 x i1> %c) {
+; CHECK-LABEL: masked_zload_x2_8i8_8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: ld1b { z1.h }, p0/z, [x1]
+; CHECK-NEXT: uunpkhi z2.s, z0.h
+; CHECK-NEXT: uunpklo z0.s, z0.h
+; CHECK-NEXT: uunpklo z3.s, z1.h
+; CHECK-NEXT: uunpkhi z1.s, z1.h
+; CHECK-NEXT: uunpkhi z4.d, z2.s
+; CHECK-NEXT: uunpklo z2.d, z2.s
+; CHECK-NEXT: uunpkhi z5.d, z0.s
+; CHECK-NEXT: uunpklo z0.d, z0.s
+; CHECK-NEXT: uunpklo z6.d, z3.s
+; CHECK-NEXT: uunpkhi z7.d, z1.s
+; CHECK-NEXT: uunpklo z24.d, z1.s
+; CHECK-NEXT: uunpkhi z1.d, z3.s
+; CHECK-NEXT: add z0.d, z0.d, z6.d
+; CHECK-NEXT: add z3.d, z4.d, z7.d
+; CHECK-NEXT: add z1.d, z5.d, z1.d
+; CHECK-NEXT: add z2.d, z2.d, z24.d
+; CHECK-NEXT: ret
+ %aval = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8(<vscale x 8 x i8> *%a, i32 16, <vscale x 8 x i1> %c, <vscale x 8 x i8> zeroinitializer)
+ %bval = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8(<vscale x 8 x i8> *%b, i32 16, <vscale x 8 x i1> %c, <vscale x 8 x i8> zeroinitializer)
+ %aext = zext <vscale x 8 x i8> %aval to <vscale x 8 x i64>
+ %bext = zext <vscale x 8 x i8> %bval to <vscale x 8 x i64>
+ %res = add <vscale x 8 x i64> %aext, %bext
+ ret <vscale x 8 x i64> %res
+}
+
+
declare <vscale x 2 x i8> @llvm.masked.load.nxv2i8(<vscale x 2 x i8>*, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>*, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32(<vscale x 2 x i32>*, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/sve-sext-zext.ll b/llvm/test/CodeGen/AArch64/sve-sext-zext.ll
index 132bb48d89292a..88e13ea1e0fa4f 100644
--- a/llvm/test/CodeGen/AArch64/sve-sext-zext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-sext-zext.ll
@@ -327,6 +327,115 @@ define <vscale x 16 x i64> @zext_b_to_d(<vscale x 16 x i8> %a) {
ret <vscale x 16 x i64> %ext
}
+; Extending unpacked data to wide, illegal types
+
+define <vscale x 4 x i64> @zext_4i8_4i64(<vscale x 4 x i8> %aval) {
+; CHECK-LABEL: zext_4i8_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and z0.s, z0.s, #0xff
+; CHECK-NEXT: uunpklo z2.d, z0.s
+; CHECK-NEXT: uunpkhi z1.d, z0.s
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+ %aext = zext <vscale x 4 x i8> %aval to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %aext
+}
+
+define <vscale x 4 x i64> @zext_4i16_4i64(<vscale x 4 x i16> %aval) {
+; CHECK-LABEL: zext_4i16_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-NEXT: uunpklo z2.d, z0.s
+; CHECK-NEXT: uunpkhi z1.d, z0.s
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+ %aext = zext <vscale x 4 x i16> %aval to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %aext
+}
+
+define <vscale x 8 x i32> @zext_8i8_8i32(<vscale x 8 x i8> %aval) {
+; CHECK-LABEL: zext_8i8_8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and z0.h, z0.h, #0xff
+; CHECK-NEXT: uunpklo z2.s, z0.h
+; CHECK-NEXT: uunpkhi z1.s, z0.h
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+ %aext = zext <vscale x 8 x i8> %aval to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %aext
+}
+
+define <vscale x 8 x i64> @zext_8i8_8i64(<vscale x 8 x i8> %aval) {
+; CHECK-LABEL: zext_8i8_8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and z0.h, z0.h, #0xff
+; CHECK-NEXT: uunpklo z1.s, z0.h
+; CHECK-NEXT: uunpkhi z3.s, z0.h
+; CHECK-NEXT: uunpklo z0.d, z1.s
+; CHECK-NEXT: uunpkhi z1.d, z1.s
+; CHECK-NEXT: uunpklo z2.d, z3.s
+; CHECK-NEXT: uunpkhi z3.d, z3.s
+; CHECK-NEXT: ret
+ %aext = zext <vscale x 8 x i8> %aval to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %aext
+}
+
+define <vscale x 4 x i64> @sext_4i8_4i64(<vscale x 4 x i8> %aval) {
+; CHECK-LABEL: sext_4i8_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: sxtb z1.s, p0/m, z0.s
+; CHECK-NEXT: sunpklo z0.d, z1.s
+; CHECK-NEXT: sunpkhi z1.d, z1.s
+; CHECK-NEXT: ret
+ %aext = sext <vscale x 4 x i8> %aval to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %aext
+}
+
+define <vscale x 4 x i64> @sext_4i16_4i64(<vscale x 4 x i16> %aval) {
+; CHECK-LABEL: sext_4i16_4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: sxth z1.s, p0/m, z0.s
+; CHECK-NEXT: sunpklo z0.d, z1.s
+; CHECK-NEXT: sunpkhi z1.d, z1.s
+; CHECK-NEXT: ret
+ %aext = sext <vscale x 4 x i16> %aval to <vscale x 4 x i64>
+ ret <vscale x 4 x i64> %aext
+}
+
+define <vscale x 8 x i32> @sext_8i8_8i32(<vscale x 8 x i8> %aval) {
+; CHECK-LABEL: sext_8i8_8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: sxtb z1.h, p0/m, z0.h
+; CHECK-NEXT: sunpklo z0.s, z1.h
+; CHECK-NEXT: sunpkhi z1.s, z1.h
+; CHECK-NEXT: ret
+ %aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %aext
+}
+
+define <vscale x 8 x i64> @sext_8i8_8i64(<vscale x 8 x i8> %aval) {
+; CHECK-LABEL: sext_8i8_8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
+; CHECK-NEXT: sunpklo z1.s, z0.h
+; CHECK-NEXT: sunpkhi z3.s, z0.h
+; CHECK-NEXT: sunpklo z0.d, z1.s
+; CHECK-NEXT: sunpkhi z1.d, z1.s
+; CHECK-NEXT: sunpklo z2.d, z3.s
+; CHECK-NEXT: sunpkhi z3.d, z3.s
+; CHECK-NEXT: ret
+ %aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %aext
+}
+
+
; Extending non power-of-two types
define <vscale x 2 x i64> @sext_i18_i64(<vscale x 2 x i18> %a) {
More information about the llvm-commits
mailing list