[llvm] 5948d4d - [RISCV] Add test coverage for buildvectors with long vslidedown sequences

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 9 11:27:13 PST 2024


Author: Philip Reames
Date: 2024-02-09T11:10:35-08:00
New Revision: 5948d4de1d965d88c8ca05cc84bd94a28fa53ba4

URL: https://github.com/llvm/llvm-project/commit/5948d4de1d965d88c8ca05cc84bd94a28fa53ba4
DIFF: https://github.com/llvm/llvm-project/commit/5948d4de1d965d88c8ca05cc84bd94a28fa53ba4.diff

LOG: [RISCV] Add test coverage for buildvectors with long vslidedown sequences

In advance of an upcoming change.

Added: 
    

Modified: 
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 57b219343c3e38..a2bd862e2ce14f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1394,3 +1394,77 @@ define <2 x double> @vid_step2_v2f64() {
 ; CHECK-NEXT:    ret
   ret <2 x double> <double 0.0, double 2.0>
 }
+
+
+define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float %e3, float %e4, float %e5, float %e6, float %e7) vscale_range(4, 128) {
+; CHECK-LABEL: buildvec_v8f32_zvl256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m1, ta, ma
+; CHECK-NEXT:    vfmv.v.f v8, fa0
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa2
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa6
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa7
+; CHECK-NEXT:    ret
+  %v0 = insertelement <8 x float> poison, float %e0, i64 0
+  %v1 = insertelement <8 x float> %v0, float %e1, i64 1
+  %v2 = insertelement <8 x float> %v1, float %e2, i64 2
+  %v3 = insertelement <8 x float> %v2, float %e3, i64 3
+  %v4 = insertelement <8 x float> %v3, float %e4, i64 4
+  %v5 = insertelement <8 x float> %v4, float %e5, i64 5
+  %v6 = insertelement <8 x float> %v5, float %e6, i64 6
+  %v7 = insertelement <8 x float> %v6, float %e7, i64 7
+  ret <8 x float> %v7
+}
+
+
+define <8 x double> @buildvec_v8f64_zvl256(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7) vscale_range(4, 128) {
+; CHECK-LABEL: buildvec_v8f64_zvl256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
+; CHECK-NEXT:    vfmv.v.f v8, fa0
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa2
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa6
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa7
+; CHECK-NEXT:    ret
+  %v0 = insertelement <8 x double> poison, double %e0, i64 0
+  %v1 = insertelement <8 x double> %v0, double %e1, i64 1
+  %v2 = insertelement <8 x double> %v1, double %e2, i64 2
+  %v3 = insertelement <8 x double> %v2, double %e3, i64 3
+  %v4 = insertelement <8 x double> %v3, double %e4, i64 4
+  %v5 = insertelement <8 x double> %v4, double %e5, i64 5
+  %v6 = insertelement <8 x double> %v5, double %e6, i64 6
+  %v7 = insertelement <8 x double> %v6, double %e7, i64 7
+  ret <8 x double> %v7
+}
+
+define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7) vscale_range(8, 128) {
+; CHECK-LABEL: buildvec_v8f64_zvl512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m1, ta, ma
+; CHECK-NEXT:    vfmv.v.f v8, fa0
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa2
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa6
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa7
+; CHECK-NEXT:    ret
+  %v0 = insertelement <8 x double> poison, double %e0, i64 0
+  %v1 = insertelement <8 x double> %v0, double %e1, i64 1
+  %v2 = insertelement <8 x double> %v1, double %e2, i64 2
+  %v3 = insertelement <8 x double> %v2, double %e3, i64 3
+  %v4 = insertelement <8 x double> %v3, double %e4, i64 4
+  %v5 = insertelement <8 x double> %v4, double %e5, i64 5
+  %v6 = insertelement <8 x double> %v5, double %e6, i64 6
+  %v7 = insertelement <8 x double> %v6, double %e7, i64 7
+  ret <8 x double> %v7
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index dfafbfb97284cd..e691e635811544 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -1178,3 +1178,512 @@ define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vsca
   %v4 = insertelement <8 x i64> %v3, i64 %d, i32 7
   ret <8 x i64> %v4
 }
+
+
+define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
+; RV32-LABEL: buildvec_v16i8_loads_contigous:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset s0, -4
+; RV32-NEXT:    lbu a1, 1(a0)
+; RV32-NEXT:    lbu a2, 2(a0)
+; RV32-NEXT:    lbu a3, 3(a0)
+; RV32-NEXT:    lbu a4, 4(a0)
+; RV32-NEXT:    lbu a5, 5(a0)
+; RV32-NEXT:    lbu a6, 6(a0)
+; RV32-NEXT:    lbu a7, 7(a0)
+; RV32-NEXT:    lbu t0, 8(a0)
+; RV32-NEXT:    lbu t1, 9(a0)
+; RV32-NEXT:    lbu t2, 10(a0)
+; RV32-NEXT:    lbu t3, 11(a0)
+; RV32-NEXT:    lbu t4, 12(a0)
+; RV32-NEXT:    lbu t5, 13(a0)
+; RV32-NEXT:    lbu t6, 14(a0)
+; RV32-NEXT:    lbu s0, 15(a0)
+; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT:    vlse8.v v8, (a0), zero
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    vslide1down.vx v8, v8, a3
+; RV32-NEXT:    vslide1down.vx v8, v8, a4
+; RV32-NEXT:    vslide1down.vx v8, v8, a5
+; RV32-NEXT:    vslide1down.vx v8, v8, a6
+; RV32-NEXT:    vslide1down.vx v8, v8, a7
+; RV32-NEXT:    vslide1down.vx v8, v8, t0
+; RV32-NEXT:    vslide1down.vx v8, v8, t1
+; RV32-NEXT:    vslide1down.vx v8, v8, t2
+; RV32-NEXT:    vslide1down.vx v8, v8, t3
+; RV32-NEXT:    vslide1down.vx v8, v8, t4
+; RV32-NEXT:    vslide1down.vx v8, v8, t5
+; RV32-NEXT:    vslide1down.vx v8, v8, t6
+; RV32-NEXT:    vslide1down.vx v8, v8, s0
+; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: buildvec_v16i8_loads_contigous:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset s0, -8
+; RV64-NEXT:    lbu a1, 1(a0)
+; RV64-NEXT:    lbu a2, 2(a0)
+; RV64-NEXT:    lbu a3, 3(a0)
+; RV64-NEXT:    lbu a4, 4(a0)
+; RV64-NEXT:    lbu a5, 5(a0)
+; RV64-NEXT:    lbu a6, 6(a0)
+; RV64-NEXT:    lbu a7, 7(a0)
+; RV64-NEXT:    lbu t0, 8(a0)
+; RV64-NEXT:    lbu t1, 9(a0)
+; RV64-NEXT:    lbu t2, 10(a0)
+; RV64-NEXT:    lbu t3, 11(a0)
+; RV64-NEXT:    lbu t4, 12(a0)
+; RV64-NEXT:    lbu t5, 13(a0)
+; RV64-NEXT:    lbu t6, 14(a0)
+; RV64-NEXT:    lbu s0, 15(a0)
+; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT:    vlse8.v v8, (a0), zero
+; RV64-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    vslide1down.vx v8, v8, a3
+; RV64-NEXT:    vslide1down.vx v8, v8, a4
+; RV64-NEXT:    vslide1down.vx v8, v8, a5
+; RV64-NEXT:    vslide1down.vx v8, v8, a6
+; RV64-NEXT:    vslide1down.vx v8, v8, a7
+; RV64-NEXT:    vslide1down.vx v8, v8, t0
+; RV64-NEXT:    vslide1down.vx v8, v8, t1
+; RV64-NEXT:    vslide1down.vx v8, v8, t2
+; RV64-NEXT:    vslide1down.vx v8, v8, t3
+; RV64-NEXT:    vslide1down.vx v8, v8, t4
+; RV64-NEXT:    vslide1down.vx v8, v8, t5
+; RV64-NEXT:    vslide1down.vx v8, v8, t6
+; RV64-NEXT:    vslide1down.vx v8, v8, s0
+; RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %p2 = getelementptr i8, ptr %p, i32 1
+  %p3 = getelementptr i8, ptr %p, i32 2
+  %p4 = getelementptr i8, ptr %p, i32 3
+  %p5 = getelementptr i8, ptr %p, i32 4
+  %p6 = getelementptr i8, ptr %p, i32 5
+  %p7 = getelementptr i8, ptr %p, i32 6
+  %p8 = getelementptr i8, ptr %p, i32 7
+  %p9 = getelementptr i8, ptr %p, i32 8
+  %p10 = getelementptr i8, ptr %p, i32 9
+  %p11 = getelementptr i8, ptr %p, i32 10
+  %p12 = getelementptr i8, ptr %p, i32 11
+  %p13 = getelementptr i8, ptr %p, i32 12
+  %p14 = getelementptr i8, ptr %p, i32 13
+  %p15 = getelementptr i8, ptr %p, i32 14
+  %p16 = getelementptr i8, ptr %p, i32 15
+
+  %ld1 = load i8, ptr %p
+  %ld2 = load i8, ptr %p2
+  %ld3 = load i8, ptr %p3
+  %ld4 = load i8, ptr %p4
+  %ld5 = load i8, ptr %p5
+  %ld6 = load i8, ptr %p6
+  %ld7 = load i8, ptr %p7
+  %ld8 = load i8, ptr %p8
+  %ld9 = load i8, ptr %p9
+  %ld10 = load i8, ptr %p10
+  %ld11 = load i8, ptr %p11
+  %ld12 = load i8, ptr %p12
+  %ld13 = load i8, ptr %p13
+  %ld14 = load i8, ptr %p14
+  %ld15 = load i8, ptr %p15
+  %ld16 = load i8, ptr %p16
+
+  %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
+  %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
+  %v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2
+  %v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3
+  %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
+  %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
+  %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
+  %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
+  %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
+  %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
+  %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
+  %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
+  %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
+  %v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13
+  %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
+  %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
+  ret <16 x i8> %v16
+}
+
+
+define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
+; RV32-LABEL: buildvec_v16i8_loads_gather:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset s0, -4
+; RV32-NEXT:    lbu a1, 1(a0)
+; RV32-NEXT:    lbu a2, 22(a0)
+; RV32-NEXT:    lbu a3, 31(a0)
+; RV32-NEXT:    lbu a4, 44(a0)
+; RV32-NEXT:    lbu a5, 55(a0)
+; RV32-NEXT:    lbu a6, 623(a0)
+; RV32-NEXT:    lbu a7, 75(a0)
+; RV32-NEXT:    lbu t0, 82(a0)
+; RV32-NEXT:    lbu t1, 93(a0)
+; RV32-NEXT:    lbu t2, 105(a0)
+; RV32-NEXT:    lbu t3, 161(a0)
+; RV32-NEXT:    lbu t4, 124(a0)
+; RV32-NEXT:    lbu t5, 163(a0)
+; RV32-NEXT:    lbu t6, 144(a0)
+; RV32-NEXT:    lbu s0, 154(a0)
+; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT:    vlse8.v v8, (a0), zero
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    vslide1down.vx v8, v8, a3
+; RV32-NEXT:    vslide1down.vx v8, v8, a4
+; RV32-NEXT:    vslide1down.vx v8, v8, a5
+; RV32-NEXT:    vslide1down.vx v8, v8, a6
+; RV32-NEXT:    vslide1down.vx v8, v8, a7
+; RV32-NEXT:    vslide1down.vx v8, v8, t0
+; RV32-NEXT:    vslide1down.vx v8, v8, t1
+; RV32-NEXT:    vslide1down.vx v8, v8, t2
+; RV32-NEXT:    vslide1down.vx v8, v8, t3
+; RV32-NEXT:    vslide1down.vx v8, v8, t4
+; RV32-NEXT:    vslide1down.vx v8, v8, t5
+; RV32-NEXT:    vslide1down.vx v8, v8, t6
+; RV32-NEXT:    vslide1down.vx v8, v8, s0
+; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: buildvec_v16i8_loads_gather:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset s0, -8
+; RV64-NEXT:    lbu a1, 1(a0)
+; RV64-NEXT:    lbu a2, 22(a0)
+; RV64-NEXT:    lbu a3, 31(a0)
+; RV64-NEXT:    lbu a4, 44(a0)
+; RV64-NEXT:    lbu a5, 55(a0)
+; RV64-NEXT:    lbu a6, 623(a0)
+; RV64-NEXT:    lbu a7, 75(a0)
+; RV64-NEXT:    lbu t0, 82(a0)
+; RV64-NEXT:    lbu t1, 93(a0)
+; RV64-NEXT:    lbu t2, 105(a0)
+; RV64-NEXT:    lbu t3, 161(a0)
+; RV64-NEXT:    lbu t4, 124(a0)
+; RV64-NEXT:    lbu t5, 163(a0)
+; RV64-NEXT:    lbu t6, 144(a0)
+; RV64-NEXT:    lbu s0, 154(a0)
+; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT:    vlse8.v v8, (a0), zero
+; RV64-NEXT:    vslide1down.vx v8, v8, a1
+; RV64-NEXT:    vslide1down.vx v8, v8, a2
+; RV64-NEXT:    vslide1down.vx v8, v8, a3
+; RV64-NEXT:    vslide1down.vx v8, v8, a4
+; RV64-NEXT:    vslide1down.vx v8, v8, a5
+; RV64-NEXT:    vslide1down.vx v8, v8, a6
+; RV64-NEXT:    vslide1down.vx v8, v8, a7
+; RV64-NEXT:    vslide1down.vx v8, v8, t0
+; RV64-NEXT:    vslide1down.vx v8, v8, t1
+; RV64-NEXT:    vslide1down.vx v8, v8, t2
+; RV64-NEXT:    vslide1down.vx v8, v8, t3
+; RV64-NEXT:    vslide1down.vx v8, v8, t4
+; RV64-NEXT:    vslide1down.vx v8, v8, t5
+; RV64-NEXT:    vslide1down.vx v8, v8, t6
+; RV64-NEXT:    vslide1down.vx v8, v8, s0
+; RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %p2 = getelementptr i8, ptr %p, i32 1
+  %p3 = getelementptr i8, ptr %p, i32 22
+  %p4 = getelementptr i8, ptr %p, i32 31
+  %p5 = getelementptr i8, ptr %p, i32 44
+  %p6 = getelementptr i8, ptr %p, i32 55
+  %p7 = getelementptr i8, ptr %p, i32 623
+  %p8 = getelementptr i8, ptr %p, i32 75
+  %p9 = getelementptr i8, ptr %p, i32 82
+  %p10 = getelementptr i8, ptr %p, i32 93
+  %p11 = getelementptr i8, ptr %p, i32 105
+  %p12 = getelementptr i8, ptr %p, i32 161
+  %p13 = getelementptr i8, ptr %p, i32 124
+  %p14 = getelementptr i8, ptr %p, i32 163
+  %p15 = getelementptr i8, ptr %p, i32 144
+  %p16 = getelementptr i8, ptr %p, i32 154
+
+  %ld1 = load i8, ptr %p
+  %ld2 = load i8, ptr %p2
+  %ld3 = load i8, ptr %p3
+  %ld4 = load i8, ptr %p4
+  %ld5 = load i8, ptr %p5
+  %ld6 = load i8, ptr %p6
+  %ld7 = load i8, ptr %p7
+  %ld8 = load i8, ptr %p8
+  %ld9 = load i8, ptr %p9
+  %ld10 = load i8, ptr %p10
+  %ld11 = load i8, ptr %p11
+  %ld12 = load i8, ptr %p12
+  %ld13 = load i8, ptr %p13
+  %ld14 = load i8, ptr %p14
+  %ld15 = load i8, ptr %p15
+  %ld16 = load i8, ptr %p16
+
+  %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
+  %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
+  %v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2
+  %v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3
+  %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
+  %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
+  %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
+  %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
+  %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
+  %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
+  %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
+  %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
+  %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
+  %v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13
+  %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
+  %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
+  ret <16 x i8> %v16
+}
+
+define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) {
+; CHECK-LABEL: buildvec_v16i8_undef_low_half:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a1, a0, 82
+; CHECK-NEXT:    lbu a2, 93(a0)
+; CHECK-NEXT:    lbu a3, 105(a0)
+; CHECK-NEXT:    lbu a4, 161(a0)
+; CHECK-NEXT:    lbu a5, 124(a0)
+; CHECK-NEXT:    lbu a6, 163(a0)
+; CHECK-NEXT:    lbu a7, 144(a0)
+; CHECK-NEXT:    lbu a0, 154(a0)
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vlse8.v v8, (a1), zero
+; CHECK-NEXT:    vslide1down.vx v8, v8, a2
+; CHECK-NEXT:    vslide1down.vx v8, v8, a3
+; CHECK-NEXT:    vslide1down.vx v8, v8, a4
+; CHECK-NEXT:    vslide1down.vx v8, v8, a5
+; CHECK-NEXT:    vslide1down.vx v8, v8, a6
+; CHECK-NEXT:    vslide1down.vx v8, v8, a7
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %p9 = getelementptr i8, ptr %p, i32 82
+  %p10 = getelementptr i8, ptr %p, i32 93
+  %p11 = getelementptr i8, ptr %p, i32 105
+  %p12 = getelementptr i8, ptr %p, i32 161
+  %p13 = getelementptr i8, ptr %p, i32 124
+  %p14 = getelementptr i8, ptr %p, i32 163
+  %p15 = getelementptr i8, ptr %p, i32 144
+  %p16 = getelementptr i8, ptr %p, i32 154
+
+  %ld9 = load i8, ptr %p9
+  %ld10 = load i8, ptr %p10
+  %ld11 = load i8, ptr %p11
+  %ld12 = load i8, ptr %p12
+  %ld13 = load i8, ptr %p13
+  %ld14 = load i8, ptr %p14
+  %ld15 = load i8, ptr %p15
+  %ld16 = load i8, ptr %p16
+
+  %v9 = insertelement <16 x i8> poison, i8 %ld9, i32 8
+  %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
+  %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
+  %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
+  %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
+  %v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13
+  %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
+  %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
+  ret <16 x i8> %v16
+}
+
+define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
+; CHECK-LABEL: buildvec_v16i8_undef_high_half:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lbu a1, 1(a0)
+; CHECK-NEXT:    lbu a2, 22(a0)
+; CHECK-NEXT:    lbu a3, 31(a0)
+; CHECK-NEXT:    lbu a4, 44(a0)
+; CHECK-NEXT:    lbu a5, 55(a0)
+; CHECK-NEXT:    lbu a6, 623(a0)
+; CHECK-NEXT:    lbu a7, 75(a0)
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vlse8.v v8, (a0), zero
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a2
+; CHECK-NEXT:    vslide1down.vx v8, v8, a3
+; CHECK-NEXT:    vslide1down.vx v8, v8, a4
+; CHECK-NEXT:    vslide1down.vx v8, v8, a5
+; CHECK-NEXT:    vslide1down.vx v8, v8, a6
+; CHECK-NEXT:    vslide1down.vx v8, v8, a7
+; CHECK-NEXT:    vslidedown.vi v8, v8, 8
+; CHECK-NEXT:    ret
+  %p2 = getelementptr i8, ptr %p, i32 1
+  %p3 = getelementptr i8, ptr %p, i32 22
+  %p4 = getelementptr i8, ptr %p, i32 31
+  %p5 = getelementptr i8, ptr %p, i32 44
+  %p6 = getelementptr i8, ptr %p, i32 55
+  %p7 = getelementptr i8, ptr %p, i32 623
+  %p8 = getelementptr i8, ptr %p, i32 75
+
+  %ld1 = load i8, ptr %p
+  %ld2 = load i8, ptr %p2
+  %ld3 = load i8, ptr %p3
+  %ld4 = load i8, ptr %p4
+  %ld5 = load i8, ptr %p5
+  %ld6 = load i8, ptr %p6
+  %ld7 = load i8, ptr %p7
+  %ld8 = load i8, ptr %p8
+
+  %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
+  %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
+  %v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2
+  %v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3
+  %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
+  %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
+  %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
+  %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
+  ret <16 x i8> %v8
+}
+
+define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
+; CHECK-LABEL: buildvec_v16i8_undef_edges:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a1, a0, 31
+; CHECK-NEXT:    lbu a2, 44(a0)
+; CHECK-NEXT:    lbu a3, 55(a0)
+; CHECK-NEXT:    lbu a4, 623(a0)
+; CHECK-NEXT:    lbu a5, 75(a0)
+; CHECK-NEXT:    lbu a6, 82(a0)
+; CHECK-NEXT:    lbu a7, 93(a0)
+; CHECK-NEXT:    lbu t0, 105(a0)
+; CHECK-NEXT:    lbu a0, 161(a0)
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vlse8.v v8, (a1), zero
+; CHECK-NEXT:    vslide1down.vx v8, v8, a2
+; CHECK-NEXT:    vslide1down.vx v8, v8, a3
+; CHECK-NEXT:    vslide1down.vx v8, v8, a4
+; CHECK-NEXT:    vslide1down.vx v8, v8, a5
+; CHECK-NEXT:    vslide1down.vx v8, v8, a6
+; CHECK-NEXT:    vslide1down.vx v8, v8, a7
+; CHECK-NEXT:    vslide1down.vx v8, v8, t0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslidedown.vi v8, v8, 4
+; CHECK-NEXT:    ret
+  %p4 = getelementptr i8, ptr %p, i32 31
+  %p5 = getelementptr i8, ptr %p, i32 44
+  %p6 = getelementptr i8, ptr %p, i32 55
+  %p7 = getelementptr i8, ptr %p, i32 623
+  %p8 = getelementptr i8, ptr %p, i32 75
+  %p9 = getelementptr i8, ptr %p, i32 82
+  %p10 = getelementptr i8, ptr %p, i32 93
+  %p11 = getelementptr i8, ptr %p, i32 105
+  %p12 = getelementptr i8, ptr %p, i32 161
+
+  %ld4 = load i8, ptr %p4
+  %ld5 = load i8, ptr %p5
+  %ld6 = load i8, ptr %p6
+  %ld7 = load i8, ptr %p7
+  %ld8 = load i8, ptr %p8
+  %ld9 = load i8, ptr %p9
+  %ld10 = load i8, ptr %p10
+  %ld11 = load i8, ptr %p11
+  %ld12 = load i8, ptr %p12
+
+  %v4 = insertelement <16 x i8> poison, i8 %ld4, i32 3
+  %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
+  %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
+  %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
+  %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
+  %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
+  %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
+  %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
+  %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
+  ret <16 x i8> %v12
+}
+
+define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
+; CHECK-LABEL: buildvec_v16i8_loads_undef_scattered:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lbu a1, 1(a0)
+; CHECK-NEXT:    lbu a2, 44(a0)
+; CHECK-NEXT:    lbu a3, 55(a0)
+; CHECK-NEXT:    lbu a4, 75(a0)
+; CHECK-NEXT:    lbu a5, 82(a0)
+; CHECK-NEXT:    lbu a6, 93(a0)
+; CHECK-NEXT:    lbu a7, 124(a0)
+; CHECK-NEXT:    lbu t0, 144(a0)
+; CHECK-NEXT:    lbu t1, 154(a0)
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vlse8.v v8, (a0), zero
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslidedown.vi v8, v8, 2
+; CHECK-NEXT:    vslide1down.vx v8, v8, a2
+; CHECK-NEXT:    vslide1down.vx v8, v8, a3
+; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a4
+; CHECK-NEXT:    vslide1down.vx v8, v8, a5
+; CHECK-NEXT:    vslide1down.vx v8, v8, a6
+; CHECK-NEXT:    vslidedown.vi v8, v8, 2
+; CHECK-NEXT:    vslide1down.vx v8, v8, a7
+; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vslide1down.vx v8, v8, t0
+; CHECK-NEXT:    vslide1down.vx v8, v8, t1
+; CHECK-NEXT:    ret
+  %p2 = getelementptr i8, ptr %p, i32 1
+  %p3 = getelementptr i8, ptr %p, i32 22
+  %p4 = getelementptr i8, ptr %p, i32 31
+  %p5 = getelementptr i8, ptr %p, i32 44
+  %p6 = getelementptr i8, ptr %p, i32 55
+  %p7 = getelementptr i8, ptr %p, i32 623
+  %p8 = getelementptr i8, ptr %p, i32 75
+  %p9 = getelementptr i8, ptr %p, i32 82
+  %p10 = getelementptr i8, ptr %p, i32 93
+  %p11 = getelementptr i8, ptr %p, i32 105
+  %p12 = getelementptr i8, ptr %p, i32 161
+  %p13 = getelementptr i8, ptr %p, i32 124
+  %p14 = getelementptr i8, ptr %p, i32 163
+  %p15 = getelementptr i8, ptr %p, i32 144
+  %p16 = getelementptr i8, ptr %p, i32 154
+
+  %ld1 = load i8, ptr %p
+  %ld2 = load i8, ptr %p2
+  %ld3 = load i8, ptr %p3
+  %ld4 = load i8, ptr %p4
+  %ld5 = load i8, ptr %p5
+  %ld6 = load i8, ptr %p6
+  %ld7 = load i8, ptr %p7
+  %ld8 = load i8, ptr %p8
+  %ld9 = load i8, ptr %p9
+  %ld10 = load i8, ptr %p10
+  %ld11 = load i8, ptr %p11
+  %ld12 = load i8, ptr %p12
+  %ld13 = load i8, ptr %p13
+  %ld14 = load i8, ptr %p14
+  %ld15 = load i8, ptr %p15
+  %ld16 = load i8, ptr %p16
+
+  %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
+  %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
+  %v3 = insertelement <16 x i8> %v2, i8 undef, i32 2
+  %v4 = insertelement <16 x i8> %v3, i8 undef, i32 3
+  %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
+  %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
+  %v7 = insertelement <16 x i8> %v6, i8 undef, i32 6
+  %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
+  %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
+  %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
+  %v11 = insertelement <16 x i8> %v10, i8 undef, i32 10
+  %v12 = insertelement <16 x i8> %v11, i8 undef, i32 11
+  %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
+  %v14 = insertelement <16 x i8> %v13, i8 undef, i32 13
+  %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
+  %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
+  ret <16 x i8> %v16
+}


        


More information about the llvm-commits mailing list