[llvm] 7b9bf80 - [SLP][NFC]Add tests with strided loads, NFC.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 1 09:09:59 PST 2024


Author: Alexey Bataev
Date: 2024-02-01T09:09:02-08:00
New Revision: 7b9bf80ab51b9b09d9f07fb636f4b64581a3c3e0

URL: https://github.com/llvm/llvm-project/commit/7b9bf80ab51b9b09d9f07fb636f4b64581a3c3e0
DIFF: https://github.com/llvm/llvm-project/commit/7b9bf80ab51b9b09d9f07fb636f4b64581a3c3e0.diff

LOG: [SLP][NFC]Add tests with strided loads, NFC.

Added: 
    llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
    llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll
    llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll
    llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll
    llvm/test/Transforms/SLPVectorizer/RISCV/strided-unsupported-type.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
new file mode 100644
index 0000000000000..ccc31193c7215
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
@@ -0,0 +1,512 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S -mtriple riscv64-unknown-linux-gnu < %s --passes=slp-vectorizer -mattr=+v -slp-threshold=-80 | FileCheck %s
+
+define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.ptr, ptr %add.ptr64) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ptr [[PIX1:%.*]], ptr [[PIX2:%.*]], i64 [[IDX_EXT:%.*]], i64 [[IDX_EXT63:%.*]], ptr [[ADD_PTR:%.*]], ptr [[ADD_PTR64:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[PIX1]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1
+; CHECK-NEXT:    [[ADD_PTR3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR644:%.*]] = getelementptr i8, ptr [[PIX2]], i64 [[IDX_EXT63]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x ptr> poison, ptr [[PIX1]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x ptr> [[TMP4]], ptr [[ADD_PTR3]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <8 x ptr> [[TMP5]], <8 x ptr> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, <8 x ptr> [[TMP6]], <8 x i64> <i64 4, i64 5, i64 6, i64 7, i64 4, i64 5, i64 6, i64 7>
+; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP7]], i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> poison)
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x ptr> poison, ptr [[PIX2]], i32 1
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <8 x ptr> [[TMP9]], ptr [[ADD_PTR644]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <8 x ptr> [[TMP10]], <8 x ptr> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, <8 x ptr> [[TMP11]], <8 x i64> <i64 4, i64 5, i64 6, i64 7, i64 4, i64 5, i64 6, i64 7>
+; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP12]], i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> poison)
+; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]]
+; CHECK-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4
+; CHECK-NEXT:    [[TMP14:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1
+; CHECK-NEXT:    [[TMP15:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1
+; CHECK-NEXT:    [[TMP16:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1
+; CHECK-NEXT:    [[TMP17:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1
+; CHECK-NEXT:    [[ARRAYIDX3_3:%.*]] = getelementptr i8, ptr null, i64 4
+; CHECK-NEXT:    [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <2 x ptr> <ptr null, ptr poison>, ptr [[ARRAYIDX3_3]], i32 1
+; CHECK-NEXT:    [[TMP19:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP18]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
+; CHECK-NEXT:    [[TMP20:%.*]] = load i8, ptr null, align 1
+; CHECK-NEXT:    [[TMP21:%.*]] = load <4 x i8>, ptr null, align 1
+; CHECK-NEXT:    [[TMP22:%.*]] = load <4 x i8>, ptr null, align 1
+; CHECK-NEXT:    [[TMP23:%.*]] = load i8, ptr null, align 1
+; CHECK-NEXT:    [[TMP24:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP21]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <16 x i8> [[TMP25]], <16 x i8> [[TMP26]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP29:%.*]] = shufflevector <16 x i8> [[TMP27]], <16 x i8> [[TMP28]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP30:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP31:%.*]] = shufflevector <16 x i8> [[TMP29]], <16 x i8> [[TMP30]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT:    [[TMP32:%.*]] = zext <16 x i8> [[TMP31]] to <16 x i32>
+; CHECK-NEXT:    [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP22]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP34:%.*]] = shufflevector <4 x i8> [[TMP15]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP35:%.*]] = shufflevector <16 x i8> [[TMP33]], <16 x i8> [[TMP34]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP36:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP37:%.*]] = shufflevector <16 x i8> [[TMP35]], <16 x i8> [[TMP36]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP39:%.*]] = shufflevector <16 x i8> [[TMP37]], <16 x i8> [[TMP38]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT:    [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i32>
+; CHECK-NEXT:    [[TMP41:%.*]] = sub <16 x i32> [[TMP32]], [[TMP40]]
+; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <16 x i8> poison, i8 [[TMP23]], i32 0
+; CHECK-NEXT:    [[TMP43:%.*]] = insertelement <16 x i8> [[TMP42]], i8 [[TMP20]], i32 1
+; CHECK-NEXT:    [[TMP44:%.*]] = shufflevector <2 x i8> [[TMP19]], <2 x i8> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP45:%.*]] = shufflevector <16 x i8> [[TMP43]], <16 x i8> [[TMP44]], <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP46:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP47:%.*]] = shufflevector <16 x i8> [[TMP45]], <16 x i8> [[TMP46]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP48:%.*]] = shufflevector <8 x i8> [[TMP8]], <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP49:%.*]] = shufflevector <16 x i8> [[TMP47]], <16 x i8> [[TMP48]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT:    [[TMP50:%.*]] = zext <16 x i8> [[TMP49]] to <16 x i32>
+; CHECK-NEXT:    [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP50]], <16 x i32> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP52:%.*]] = shufflevector <4 x i8> [[TMP24]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP53:%.*]] = shufflevector <4 x i8> [[TMP17]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP54:%.*]] = shufflevector <16 x i8> [[TMP52]], <16 x i8> [[TMP53]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP55:%.*]] = shufflevector <8 x i8> [[TMP13]], <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP56:%.*]] = shufflevector <16 x i8> [[TMP54]], <16 x i8> [[TMP55]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT:    [[TMP57:%.*]] = zext <16 x i8> [[TMP56]] to <16 x i32>
+; CHECK-NEXT:    [[TMP58:%.*]] = sub <16 x i32> [[TMP51]], [[TMP57]]
+; CHECK-NEXT:    [[TMP59:%.*]] = shl <16 x i32> [[TMP58]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP60:%.*]] = add <16 x i32> [[TMP59]], [[TMP41]]
+; CHECK-NEXT:    [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; CHECK-NEXT:    [[TMP62:%.*]] = add <16 x i32> [[TMP60]], [[TMP61]]
+; CHECK-NEXT:    [[TMP63:%.*]] = sub <16 x i32> [[TMP60]], [[TMP61]]
+; CHECK-NEXT:    [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 22, i32 18, i32 26, i32 30, i32 5, i32 1, i32 9, i32 13, i32 20, i32 16, i32 24, i32 28>
+; CHECK-NEXT:    [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> poison, <16 x i32> <i32 9, i32 8, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP66:%.*]] = add <16 x i32> [[TMP64]], [[TMP65]]
+; CHECK-NEXT:    [[TMP67:%.*]] = sub <16 x i32> [[TMP64]], [[TMP65]]
+; CHECK-NEXT:    [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP69:%.*]] = shufflevector <16 x i32> [[TMP68]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; CHECK-NEXT:    [[TMP70:%.*]] = add <16 x i32> [[TMP68]], [[TMP69]]
+; CHECK-NEXT:    [[TMP71:%.*]] = sub <16 x i32> [[TMP68]], [[TMP69]]
+; CHECK-NEXT:    [[TMP72:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 20, i32 5, i32 6, i32 23, i32 24, i32 9, i32 10, i32 27, i32 28, i32 13, i32 14, i32 31>
+; CHECK-NEXT:    [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP72]], <16 x i32> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; CHECK-NEXT:    [[TMP74:%.*]] = add <16 x i32> [[TMP72]], [[TMP73]]
+; CHECK-NEXT:    [[TMP75:%.*]] = sub <16 x i32> [[TMP72]], [[TMP73]]
+; CHECK-NEXT:    [[TMP76:%.*]] = shufflevector <16 x i32> [[TMP74]], <16 x i32> [[TMP75]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP77:%.*]] = shufflevector <16 x i32> [[TMP32]], <16 x i32> [[TMP64]], <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP78:%.*]] = lshr <16 x i32> [[TMP77]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP79:%.*]] = and <16 x i32> [[TMP78]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
+; CHECK-NEXT:    [[TMP80:%.*]] = mul <16 x i32> [[TMP79]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP81:%.*]] = add <16 x i32> [[TMP80]], [[TMP76]]
+; CHECK-NEXT:    [[TMP82:%.*]] = xor <16 x i32> [[TMP81]], [[TMP77]]
+; CHECK-NEXT:    [[TMP83:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP82]])
+; CHECK-NEXT:    ret i32 [[TMP83]]
+;
+entry:
+  %0 = load i8, ptr %pix1, align 1
+  %conv = zext i8 %0 to i32
+  %1 = load i8, ptr %pix2, align 1
+  %conv2 = zext i8 %1 to i32
+  %sub = sub i32 %conv, %conv2
+  %arrayidx3 = getelementptr i8, ptr %pix1, i64 4
+  %2 = load i8, ptr %arrayidx3, align 1
+  %conv4 = zext i8 %2 to i32
+  %arrayidx5 = getelementptr i8, ptr %pix2, i64 4
+  %3 = load i8, ptr %arrayidx5, align 1
+  %conv6 = zext i8 %3 to i32
+  %sub7 = sub i32 %conv4, %conv6
+  %shl = shl i32 %sub7, 16
+  %add = add i32 %shl, %sub
+  %arrayidx8 = getelementptr i8, ptr %pix1, i64 1
+  %4 = load i8, ptr %arrayidx8, align 1
+  %conv9 = zext i8 %4 to i32
+  %arrayidx10 = getelementptr i8, ptr %pix2, i64 1
+  %5 = load i8, ptr %arrayidx10, align 1
+  %conv11 = zext i8 %5 to i32
+  %sub12 = sub i32 %conv9, %conv11
+  %arrayidx13 = getelementptr i8, ptr %pix1, i64 5
+  %6 = load i8, ptr %arrayidx13, align 1
+  %conv14 = zext i8 %6 to i32
+  %arrayidx15 = getelementptr i8, ptr %pix2, i64 5
+  %7 = load i8, ptr %arrayidx15, align 1
+  %conv16 = zext i8 %7 to i32
+  %sub17 = sub i32 %conv14, %conv16
+  %shl18 = shl i32 %sub17, 16
+  %add19 = add i32 %shl18, %sub12
+  %arrayidx20 = getelementptr i8, ptr %pix1, i64 2
+  %8 = load i8, ptr %arrayidx20, align 1
+  %conv21 = zext i8 %8 to i32
+  %arrayidx22 = getelementptr i8, ptr %pix2, i64 2
+  %9 = load i8, ptr %arrayidx22, align 1
+  %conv23 = zext i8 %9 to i32
+  %sub24 = sub i32 %conv21, %conv23
+  %arrayidx25 = getelementptr i8, ptr %pix1, i64 6
+  %10 = load i8, ptr %arrayidx25, align 1
+  %conv26 = zext i8 %10 to i32
+  %arrayidx27 = getelementptr i8, ptr %pix2, i64 6
+  %11 = load i8, ptr %arrayidx27, align 1
+  %conv28 = zext i8 %11 to i32
+  %sub29 = sub i32 %conv26, %conv28
+  %shl30 = shl i32 %sub29, 16
+  %add31 = add i32 %shl30, %sub24
+  %arrayidx32 = getelementptr i8, ptr %pix1, i64 3
+  %12 = load i8, ptr %arrayidx32, align 1
+  %conv33 = zext i8 %12 to i32
+  %arrayidx34 = getelementptr i8, ptr %pix2, i64 3
+  %13 = load i8, ptr %arrayidx34, align 1
+  %conv35 = zext i8 %13 to i32
+  %sub36 = sub i32 %conv33, %conv35
+  %arrayidx37 = getelementptr i8, ptr %pix1, i64 7
+  %14 = load i8, ptr %arrayidx37, align 1
+  %conv38 = zext i8 %14 to i32
+  %arrayidx39 = getelementptr i8, ptr %pix2, i64 7
+  %15 = load i8, ptr %arrayidx39, align 1
+  %conv40 = zext i8 %15 to i32
+  %sub41 = sub i32 %conv38, %conv40
+  %shl42 = shl i32 %sub41, 16
+  %add43 = add i32 %shl42, %sub36
+  %add44 = add i32 %add19, %add
+  %sub45 = sub i32 %add, %add19
+  %add46 = add i32 %add43, %add31
+  %sub47 = sub i32 %add31, %add43
+  %add48 = add i32 %add46, %add44
+  %sub51 = sub i32 %add44, %add46
+  %add55 = add i32 %sub47, %sub45
+  %sub59 = sub i32 %sub45, %sub47
+  %add.ptr3 = getelementptr i8, ptr %pix1, i64 %idx.ext
+  %add.ptr644 = getelementptr i8, ptr %pix2, i64 %idx.ext63
+  %16 = load i8, ptr %add.ptr3, align 1
+  %conv.1 = zext i8 %16 to i32
+  %17 = load i8, ptr %add.ptr644, align 1
+  %conv2.1 = zext i8 %17 to i32
+  %sub.1 = sub i32 %conv.1, %conv2.1
+  %arrayidx3.1 = getelementptr i8, ptr %add.ptr3, i64 4
+  %18 = load i8, ptr %arrayidx3.1, align 1
+  %conv4.1 = zext i8 %18 to i32
+  %arrayidx5.1 = getelementptr i8, ptr %add.ptr644, i64 4
+  %19 = load i8, ptr %arrayidx5.1, align 1
+  %conv6.1 = zext i8 %19 to i32
+  %sub7.1 = sub i32 %conv4.1, %conv6.1
+  %shl.1 = shl i32 %sub7.1, 16
+  %add.1 = add i32 %shl.1, %sub.1
+  %arrayidx8.1 = getelementptr i8, ptr %add.ptr3, i64 1
+  %20 = load i8, ptr %arrayidx8.1, align 1
+  %conv9.1 = zext i8 %20 to i32
+  %arrayidx10.1 = getelementptr i8, ptr %add.ptr644, i64 1
+  %21 = load i8, ptr %arrayidx10.1, align 1
+  %conv11.1 = zext i8 %21 to i32
+  %sub12.1 = sub i32 %conv9.1, %conv11.1
+  %arrayidx13.1 = getelementptr i8, ptr %add.ptr3, i64 5
+  %22 = load i8, ptr %arrayidx13.1, align 1
+  %conv14.1 = zext i8 %22 to i32
+  %arrayidx15.1 = getelementptr i8, ptr %add.ptr644, i64 5
+  %23 = load i8, ptr %arrayidx15.1, align 1
+  %conv16.1 = zext i8 %23 to i32
+  %sub17.1 = sub i32 %conv14.1, %conv16.1
+  %shl18.1 = shl i32 %sub17.1, 16
+  %add19.1 = add i32 %shl18.1, %sub12.1
+  %arrayidx20.1 = getelementptr i8, ptr %add.ptr3, i64 2
+  %24 = load i8, ptr %arrayidx20.1, align 1
+  %conv21.1 = zext i8 %24 to i32
+  %arrayidx22.1 = getelementptr i8, ptr %add.ptr644, i64 2
+  %25 = load i8, ptr %arrayidx22.1, align 1
+  %conv23.1 = zext i8 %25 to i32
+  %sub24.1 = sub i32 %conv21.1, %conv23.1
+  %arrayidx25.1 = getelementptr i8, ptr %add.ptr3, i64 6
+  %26 = load i8, ptr %arrayidx25.1, align 1
+  %conv26.1 = zext i8 %26 to i32
+  %arrayidx27.1 = getelementptr i8, ptr %add.ptr644, i64 6
+  %27 = load i8, ptr %arrayidx27.1, align 1
+  %conv28.1 = zext i8 %27 to i32
+  %sub29.1 = sub i32 %conv26.1, %conv28.1
+  %shl30.1 = shl i32 %sub29.1, 16
+  %add31.1 = add i32 %shl30.1, %sub24.1
+  %arrayidx32.1 = getelementptr i8, ptr %add.ptr3, i64 3
+  %28 = load i8, ptr %arrayidx32.1, align 1
+  %conv33.1 = zext i8 %28 to i32
+  %arrayidx34.1 = getelementptr i8, ptr %add.ptr644, i64 3
+  %29 = load i8, ptr %arrayidx34.1, align 1
+  %conv35.1 = zext i8 %29 to i32
+  %sub36.1 = sub i32 %conv33.1, %conv35.1
+  %arrayidx37.1 = getelementptr i8, ptr %add.ptr3, i64 7
+  %30 = load i8, ptr %arrayidx37.1, align 1
+  %conv38.1 = zext i8 %30 to i32
+  %arrayidx39.1 = getelementptr i8, ptr %add.ptr644, i64 7
+  %31 = load i8, ptr %arrayidx39.1, align 1
+  %conv40.1 = zext i8 %31 to i32
+  %sub41.1 = sub i32 %conv38.1, %conv40.1
+  %shl42.1 = shl i32 %sub41.1, 16
+  %add43.1 = add i32 %shl42.1, %sub36.1
+  %add44.1 = add i32 %add19.1, %add.1
+  %sub45.1 = sub i32 %add.1, %add19.1
+  %add46.1 = add i32 %add43.1, %add31.1
+  %sub47.1 = sub i32 %add31.1, %add43.1
+  %add48.1 = add i32 %add46.1, %add44.1
+  %sub51.1 = sub i32 %add44.1, %add46.1
+  %add55.1 = add i32 %sub47.1, %sub45.1
+  %sub59.1 = sub i32 %sub45.1, %sub47.1
+  %add.ptr.1 = getelementptr i8, ptr %add.ptr, i64 %idx.ext
+  %add.ptr64.1 = getelementptr i8, ptr %add.ptr64, i64 %idx.ext63
+  %32 = load i8, ptr %add.ptr.1, align 1
+  %conv.2 = zext i8 %32 to i32
+  %33 = load i8, ptr %add.ptr64.1, align 1
+  %conv2.2 = zext i8 %33 to i32
+  %sub.2 = sub i32 %conv.2, %conv2.2
+  %arrayidx3.2 = getelementptr i8, ptr %add.ptr.1, i64 4
+  %34 = load i8, ptr %arrayidx3.2, align 1
+  %conv4.2 = zext i8 %34 to i32
+  %arrayidx5.2 = getelementptr i8, ptr %add.ptr64.1, i64 4
+  %35 = load i8, ptr %arrayidx5.2, align 1
+  %conv6.2 = zext i8 %35 to i32
+  %sub7.2 = sub i32 %conv4.2, %conv6.2
+  %shl.2 = shl i32 %sub7.2, 16
+  %add.2 = add i32 %shl.2, %sub.2
+  %arrayidx8.2 = getelementptr i8, ptr %add.ptr.1, i64 1
+  %36 = load i8, ptr %arrayidx8.2, align 1
+  %conv9.2 = zext i8 %36 to i32
+  %arrayidx10.2 = getelementptr i8, ptr %add.ptr64.1, i64 1
+  %37 = load i8, ptr %arrayidx10.2, align 1
+  %conv11.2 = zext i8 %37 to i32
+  %sub12.2 = sub i32 %conv9.2, %conv11.2
+  %arrayidx13.2 = getelementptr i8, ptr %add.ptr.1, i64 5
+  %38 = load i8, ptr %arrayidx13.2, align 1
+  %conv14.2 = zext i8 %38 to i32
+  %arrayidx15.2 = getelementptr i8, ptr %add.ptr64.1, i64 5
+  %39 = load i8, ptr %arrayidx15.2, align 1
+  %conv16.2 = zext i8 %39 to i32
+  %sub17.2 = sub i32 %conv14.2, %conv16.2
+  %shl18.2 = shl i32 %sub17.2, 16
+  %add19.2 = add i32 %shl18.2, %sub12.2
+  %arrayidx20.2 = getelementptr i8, ptr %add.ptr.1, i64 2
+  %40 = load i8, ptr %arrayidx20.2, align 1
+  %conv21.2 = zext i8 %40 to i32
+  %arrayidx22.2 = getelementptr i8, ptr %add.ptr64.1, i64 2
+  %41 = load i8, ptr %arrayidx22.2, align 1
+  %conv23.2 = zext i8 %41 to i32
+  %sub24.2 = sub i32 %conv21.2, %conv23.2
+  %arrayidx25.2 = getelementptr i8, ptr %add.ptr.1, i64 6
+  %42 = load i8, ptr %arrayidx25.2, align 1
+  %conv26.2 = zext i8 %42 to i32
+  %arrayidx27.2 = getelementptr i8, ptr %add.ptr64.1, i64 6
+  %43 = load i8, ptr %arrayidx27.2, align 1
+  %conv28.2 = zext i8 %43 to i32
+  %sub29.2 = sub i32 %conv26.2, %conv28.2
+  %shl30.2 = shl i32 %sub29.2, 16
+  %add31.2 = add i32 %shl30.2, %sub24.2
+  %arrayidx32.2 = getelementptr i8, ptr %add.ptr.1, i64 3
+  %44 = load i8, ptr %arrayidx32.2, align 1
+  %conv33.2 = zext i8 %44 to i32
+  %arrayidx34.2 = getelementptr i8, ptr %add.ptr64.1, i64 3
+  %45 = load i8, ptr %arrayidx34.2, align 1
+  %conv35.2 = zext i8 %45 to i32
+  %sub36.2 = sub i32 %conv33.2, %conv35.2
+  %arrayidx37.2 = getelementptr i8, ptr %add.ptr.1, i64 7
+  %46 = load i8, ptr %arrayidx37.2, align 1
+  %conv38.2 = zext i8 %46 to i32
+  %arrayidx39.2 = getelementptr i8, ptr %add.ptr64.1, i64 7
+  %47 = load i8, ptr %arrayidx39.2, align 1
+  %conv40.2 = zext i8 %47 to i32
+  %sub41.2 = sub i32 %conv38.2, %conv40.2
+  %shl42.2 = shl i32 %sub41.2, 16
+  %add43.2 = add i32 %shl42.2, %sub36.2
+  %add44.2 = add i32 %add19.2, %add.2
+  %sub45.2 = sub i32 %add.2, %add19.2
+  %add46.2 = add i32 %add43.2, %add31.2
+  %sub47.2 = sub i32 %add31.2, %add43.2
+  %add48.2 = add i32 %add46.2, %add44.2
+  %sub51.2 = sub i32 %add44.2, %add46.2
+  %add55.2 = add i32 %sub47.2, %sub45.2
+  %sub59.2 = sub i32 %sub45.2, %sub47.2
+  %48 = load i8, ptr null, align 1
+  %conv.3 = zext i8 %48 to i32
+  %49 = load i8, ptr null, align 1
+  %conv2.3 = zext i8 %49 to i32
+  %sub.3 = sub i32 %conv.3, %conv2.3
+  %arrayidx3.3 = getelementptr i8, ptr null, i64 4
+  %50 = load i8, ptr %arrayidx3.3, align 1
+  %conv4.3 = zext i8 %50 to i32
+  %arrayidx5.3 = getelementptr i8, ptr null, i64 4
+  %51 = load i8, ptr %arrayidx5.3, align 1
+  %conv6.3 = zext i8 %51 to i32
+  %sub7.3 = sub i32 %conv4.3, %conv6.3
+  %shl.3 = shl i32 %sub7.3, 16
+  %add.3 = add i32 %shl.3, %sub.3
+  %arrayidx8.3 = getelementptr i8, ptr null, i64 1
+  %52 = load i8, ptr %arrayidx8.3, align 1
+  %conv9.3 = zext i8 %52 to i32
+  %arrayidx10.3 = getelementptr i8, ptr null, i64 1
+  %53 = load i8, ptr %arrayidx10.3, align 1
+  %conv11.3 = zext i8 %53 to i32
+  %sub12.3 = sub i32 %conv9.3, %conv11.3
+  %54 = load i8, ptr null, align 1
+  %conv14.3 = zext i8 %54 to i32
+  %arrayidx15.3 = getelementptr i8, ptr null, i64 5
+  %55 = load i8, ptr %arrayidx15.3, align 1
+  %conv16.3 = zext i8 %55 to i32
+  %sub17.3 = sub i32 %conv14.3, %conv16.3
+  %shl18.3 = shl i32 %sub17.3, 16
+  %add19.3 = add i32 %shl18.3, %sub12.3
+  %arrayidx20.3 = getelementptr i8, ptr null, i64 2
+  %56 = load i8, ptr %arrayidx20.3, align 1
+  %conv21.3 = zext i8 %56 to i32
+  %arrayidx22.3 = getelementptr i8, ptr null, i64 2
+  %57 = load i8, ptr %arrayidx22.3, align 1
+  %conv23.3 = zext i8 %57 to i32
+  %sub24.3 = sub i32 %conv21.3, %conv23.3
+  %58 = load i8, ptr null, align 1
+  %conv26.3 = zext i8 %58 to i32
+  %arrayidx27.3 = getelementptr i8, ptr null, i64 6
+  %59 = load i8, ptr %arrayidx27.3, align 1
+  %conv28.3 = zext i8 %59 to i32
+  %sub29.3 = sub i32 %conv26.3, %conv28.3
+  %shl30.3 = shl i32 %sub29.3, 16
+  %add31.3 = add i32 %shl30.3, %sub24.3
+  %arrayidx32.3 = getelementptr i8, ptr null, i64 3
+  %60 = load i8, ptr %arrayidx32.3, align 1
+  %conv33.3 = zext i8 %60 to i32
+  %arrayidx34.3 = getelementptr i8, ptr null, i64 3
+  %61 = load i8, ptr %arrayidx34.3, align 1
+  %conv35.3 = zext i8 %61 to i32
+  %sub36.3 = sub i32 %conv33.3, %conv35.3
+  %62 = load i8, ptr null, align 1
+  %conv38.3 = zext i8 %62 to i32
+  %arrayidx39.3 = getelementptr i8, ptr null, i64 7
+  %63 = load i8, ptr %arrayidx39.3, align 1
+  %conv40.3 = zext i8 %63 to i32
+  %sub41.3 = sub i32 %conv38.3, %conv40.3
+  %shl42.3 = shl i32 %sub41.3, 16
+  %add43.3 = add i32 %shl42.3, %sub36.3
+  %add44.3 = add i32 %add19.3, %add.3
+  %sub45.3 = sub i32 %add.3, %add19.3
+  %add46.3 = add i32 %add43.3, %add31.3
+  %sub47.3 = sub i32 %add31.3, %add43.3
+  %add48.3 = add i32 %add46.3, %add44.3
+  %sub51.3 = sub i32 %add44.3, %add46.3
+  %add55.3 = add i32 %sub47.3, %sub45.3
+  %sub59.3 = sub i32 %sub45.3, %sub47.3
+  %add78 = add i32 %add48.1, %add48
+  %sub86 = sub i32 %add48, %add48.1
+  %add94 = add i32 %add48.3, %add48.2
+  %sub102 = sub i32 %add48.2, %add48.3
+  %add103 = add i32 %add94, %add78
+  %sub104 = sub i32 %add78, %add94
+  %add105 = add i32 %sub102, %sub86
+  %sub106 = sub i32 %sub86, %sub102
+  %shr.i = lshr i32 %conv.3, 15
+  %and.i = and i32 %shr.i, 65537
+  %mul.i = mul i32 %and.i, 65535
+  %add.i = add i32 %mul.i, %add103
+  %xor.i = xor i32 %add.i, %conv.3
+  %shr.i49 = lshr i32 %add46.2, 15
+  %and.i50 = and i32 %shr.i49, 65537
+  %mul.i51 = mul i32 %and.i50, 65535
+  %add.i52 = add i32 %mul.i51, %add105
+  %xor.i53 = xor i32 %add.i52, %add46.2
+  %shr.i54 = lshr i32 %add46.1, 15
+  %and.i55 = and i32 %shr.i54, 65537
+  %mul.i56 = mul i32 %and.i55, 65535
+  %add.i57 = add i32 %mul.i56, %sub104
+  %xor.i58 = xor i32 %add.i57, %add46.1
+  %shr.i59 = lshr i32 %add46, 15
+  %and.i60 = and i32 %shr.i59, 65537
+  %mul.i61 = mul i32 %and.i60, 65535
+  %add.i62 = add i32 %mul.i61, %sub106
+  %xor.i63 = xor i32 %add.i62, %add46
+  %add110 = add i32 %xor.i53, %xor.i
+  %add112 = add i32 %add110, %xor.i58
+  %add113 = add i32 %add112, %xor.i63
+  %add78.1 = add i32 %add55.1, %add55
+  %sub86.1 = sub i32 %add55, %add55.1
+  %add94.1 = add i32 %add55.3, %add55.2
+  %sub102.1 = sub i32 %add55.2, %add55.3
+  %add103.1 = add i32 %add94.1, %add78.1
+  %sub104.1 = sub i32 %add78.1, %add94.1
+  %add105.1 = add i32 %sub102.1, %sub86.1
+  %sub106.1 = sub i32 %sub86.1, %sub102.1
+  %shr.i.1 = lshr i32 %conv9.2, 15
+  %and.i.1 = and i32 %shr.i.1, 65537
+  %mul.i.1 = mul i32 %and.i.1, 65535
+  %add.i.1 = add i32 %mul.i.1, %add103.1
+  %xor.i.1 = xor i32 %add.i.1, %conv9.2
+  %shr.i49.1 = lshr i32 %conv.2, 15
+  %and.i50.1 = and i32 %shr.i49.1, 65537
+  %mul.i51.1 = mul i32 %and.i50.1, 65535
+  %add.i52.1 = add i32 %mul.i51.1, %add105.1
+  %xor.i53.1 = xor i32 %add.i52.1, %conv.2
+  %shr.i54.1 = lshr i32 %sub47.1, 15
+  %and.i55.1 = and i32 %shr.i54.1, 65537
+  %mul.i56.1 = mul i32 %and.i55.1, 65535
+  %add.i57.1 = add i32 %mul.i56.1, %sub104.1
+  %xor.i58.1 = xor i32 %add.i57.1, %sub47.1
+  %shr.i59.1 = lshr i32 %sub47, 15
+  %and.i60.1 = and i32 %shr.i59.1, 65537
+  %mul.i61.1 = mul i32 %and.i60.1, 65535
+  %add.i62.1 = add i32 %mul.i61.1, %sub106.1
+  %xor.i63.1 = xor i32 %add.i62.1, %sub47
+  %add108.1 = add i32 %xor.i53.1, %add113
+  %add110.1 = add i32 %add108.1, %xor.i.1
+  %add112.1 = add i32 %add110.1, %xor.i58.1
+  %add113.1 = add i32 %add112.1, %xor.i63.1
+  %add78.2 = add i32 %sub51.1, %sub51
+  %sub86.2 = sub i32 %sub51, %sub51.1
+  %add94.2 = add i32 %sub51.3, %sub51.2
+  %sub102.2 = sub i32 %sub51.2, %sub51.3
+  %add103.2 = add i32 %add94.2, %add78.2
+  %sub104.2 = sub i32 %add78.2, %add94.2
+  %add105.2 = add i32 %sub102.2, %sub86.2
+  %sub106.2 = sub i32 %sub86.2, %sub102.2
+  %shr.i.2 = lshr i32 %conv9.1, 15
+  %and.i.2 = and i32 %shr.i.2, 65537
+  %mul.i.2 = mul i32 %and.i.2, 65535
+  %add.i.2 = add i32 %mul.i.2, %add103.2
+  %xor.i.2 = xor i32 %add.i.2, %conv9.1
+  %shr.i49.2 = lshr i32 %conv.1, 15
+  %and.i50.2 = and i32 %shr.i49.2, 65537
+  %mul.i51.2 = mul i32 %and.i50.2, 65535
+  %add.i52.2 = add i32 %mul.i51.2, %add105.2
+  %xor.i53.2 = xor i32 %add.i52.2, %conv.1
+  %shr.i54.2 = lshr i32 %conv21.1, 15
+  %and.i55.2 = and i32 %shr.i54.2, 65537
+  %mul.i56.2 = mul i32 %and.i55.2, 65535
+  %add.i57.2 = add i32 %mul.i56.2, %sub104.2
+  %xor.i58.2 = xor i32 %add.i57.2, %conv21.1
+  %shr.i59.2 = lshr i32 %add44, 15
+  %and.i60.2 = and i32 %shr.i59.2, 65537
+  %mul.i61.2 = mul i32 %and.i60.2, 65535
+  %add.i62.2 = add i32 %mul.i61.2, %sub106.2
+  %xor.i63.2 = xor i32 %add.i62.2, %add44
+  %add108.2 = add i32 %xor.i53.2, %add113.1
+  %add110.2 = add i32 %add108.2, %xor.i.2
+  %add112.2 = add i32 %add110.2, %xor.i58.2
+  %add113.2 = add i32 %add112.2, %xor.i63.2
+  %add78.3 = add i32 %sub59.1, %sub59
+  %sub86.3 = sub i32 %sub59, %sub59.1
+  %add94.3 = add i32 %sub59.3, %sub59.2
+  %sub102.3 = sub i32 %sub59.2, %sub59.3
+  %add103.3 = add i32 %add94.3, %add78.3
+  %sub104.3 = sub i32 %add78.3, %add94.3
+  %add105.3 = add i32 %sub102.3, %sub86.3
+  %sub106.3 = sub i32 %sub86.3, %sub102.3
+  %shr.i.3 = lshr i32 %conv9, 15
+  %and.i.3 = and i32 %shr.i.3, 65537
+  %mul.i.3 = mul i32 %and.i.3, 65535
+  %add.i.3 = add i32 %mul.i.3, %add103.3
+  %xor.i.3 = xor i32 %add.i.3, %conv9
+  %shr.i49.3 = lshr i32 %conv, 15
+  %and.i50.3 = and i32 %shr.i49.3, 65537
+  %mul.i51.3 = mul i32 %and.i50.3, 65535
+  %add.i52.3 = add i32 %mul.i51.3, %add105.3
+  %xor.i53.3 = xor i32 %add.i52.3, %conv
+  %shr.i54.3 = lshr i32 %conv21, 15
+  %and.i55.3 = and i32 %shr.i54.3, 65537
+  %mul.i56.3 = mul i32 %and.i55.3, 65535
+  %add.i57.3 = add i32 %mul.i56.3, %sub104.3
+  %xor.i58.3 = xor i32 %add.i57.3, %conv21
+  %shr.i59.3 = lshr i32 %conv33, 15
+  %and.i60.3 = and i32 %shr.i59.3, 65537
+  %mul.i61.3 = mul i32 %and.i60.3, 65535
+  %add.i62.3 = add i32 %mul.i61.3, %sub106.3
+  %xor.i63.3 = xor i32 %add.i62.3, %conv33
+  %add108.3 = add i32 %xor.i53.3, %add113.2
+  %add110.3 = add i32 %add108.3, %xor.i.3
+  %add112.3 = add i32 %add110.3, %xor.i58.3
+  %add113.3 = add i32 %add112.3, %xor.i63.3
+  ret i32 %add113.3
+}

diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll
new file mode 100644
index 0000000000000..27e8f084e553d
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll
@@ -0,0 +1,482 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s
+
+define void @test([48 x float]* %p, float* noalias %s) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
+; CHECK-NEXT:    [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = fsub fast float [[I1]], [[I]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
+; CHECK-NEXT:    store float [[ADD]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 4
+; CHECK-NEXT:    [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26
+; CHECK-NEXT:    [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]]
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1
+; CHECK-NEXT:    store float [[ADD7]], ptr [[ARRAYIDX9]], align 4
+; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 8
+; CHECK-NEXT:    [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22
+; CHECK-NEXT:    [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
+; CHECK-NEXT:    [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]]
+; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2
+; CHECK-NEXT:    store float [[ADD14]], ptr [[ARRAYIDX16]], align 4
+; CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 12
+; CHECK-NEXT:    [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4
+; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18
+; CHECK-NEXT:    [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
+; CHECK-NEXT:    [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]]
+; CHECK-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3
+; CHECK-NEXT:    store float [[ADD21]], ptr [[ARRAYIDX23]], align 4
+; CHECK-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 16
+; CHECK-NEXT:    [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4
+; CHECK-NEXT:    [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14
+; CHECK-NEXT:    [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
+; CHECK-NEXT:    [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]]
+; CHECK-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4
+; CHECK-NEXT:    store float [[ADD28]], ptr [[ARRAYIDX30]], align 4
+; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 20
+; CHECK-NEXT:    [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4
+; CHECK-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10
+; CHECK-NEXT:    [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
+; CHECK-NEXT:    [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]]
+; CHECK-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5
+; CHECK-NEXT:    store float [[ADD35]], ptr [[ARRAYIDX37]], align 4
+; CHECK-NEXT:    [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 24
+; CHECK-NEXT:    [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4
+; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6
+; CHECK-NEXT:    [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
+; CHECK-NEXT:    [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]]
+; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6
+; CHECK-NEXT:    store float [[ADD42]], ptr [[ARRAYIDX44]], align 4
+; CHECK-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 28
+; CHECK-NEXT:    [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4
+; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 2
+; CHECK-NEXT:    [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
+; CHECK-NEXT:    [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]]
+; CHECK-NEXT:    [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7
+; CHECK-NEXT:    store float [[ADD49]], ptr [[ARRAYIDX51]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 0
+  %i = load float, float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 30
+  %i1 = load float, float* %arrayidx1, align 4
+  %add = fsub fast float %i1, %i
+  %arrayidx2 = getelementptr inbounds float, float* %s, i64 0
+  store float %add, float* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 4
+  %i2 = load float, float* %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 26
+  %i3 = load float, float* %arrayidx6, align 4
+  %add7 = fsub fast float %i3, %i2
+  %arrayidx9 = getelementptr inbounds float, float* %s, i64 1
+  store float %add7, float* %arrayidx9, align 4
+  %arrayidx11 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 8
+  %i4 = load float, float* %arrayidx11, align 4
+  %arrayidx13 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 22
+  %i5 = load float, float* %arrayidx13, align 4
+  %add14 = fsub fast float %i5, %i4
+  %arrayidx16 = getelementptr inbounds float, float* %s, i64 2
+  store float %add14, float* %arrayidx16, align 4
+  %arrayidx18 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 12
+  %i6 = load float, float* %arrayidx18, align 4
+  %arrayidx20 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 18
+  %i7 = load float, float* %arrayidx20, align 4
+  %add21 = fsub fast float %i7, %i6
+  %arrayidx23 = getelementptr inbounds float, float* %s, i64 3
+  store float %add21, float* %arrayidx23, align 4
+  %arrayidx25 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 16
+  %i8 = load float, float* %arrayidx25, align 4
+  %arrayidx27 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 14
+  %i9 = load float, float* %arrayidx27, align 4
+  %add28 = fsub fast float %i9, %i8
+  %arrayidx30 = getelementptr inbounds float, float* %s, i64 4
+  store float %add28, float* %arrayidx30, align 4
+  %arrayidx32 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 20
+  %i10 = load float, float* %arrayidx32, align 4
+  %arrayidx34 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 10
+  %i11 = load float, float* %arrayidx34, align 4
+  %add35 = fsub fast float %i11, %i10
+  %arrayidx37 = getelementptr inbounds float, float* %s, i64 5
+  store float %add35, float* %arrayidx37, align 4
+  %arrayidx39 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 24
+  %i12 = load float, float* %arrayidx39, align 4
+  %arrayidx41 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 6
+  %i13 = load float, float* %arrayidx41, align 4
+  %add42 = fsub fast float %i13, %i12
+  %arrayidx44 = getelementptr inbounds float, float* %s, i64 6
+  store float %add42, float* %arrayidx44, align 4
+  %arrayidx46 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 28
+  %i14 = load float, float* %arrayidx46, align 4
+  %arrayidx48 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 2
+  %i15 = load float, float* %arrayidx48, align 4
+  %add49 = fsub fast float %i15, %i14
+  %arrayidx51 = getelementptr inbounds float, float* %s, i64 7
+  store float %add49, float* %arrayidx51, align 4
+  ret void
+}
+
+define void @test1([48 x float]* %p, float* noalias %s, i32 %stride) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
+; CHECK-NEXT:    [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = fsub fast float [[I1]], [[I]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
+; CHECK-NEXT:    store float [[ADD]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[STR]]
+; CHECK-NEXT:    [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26
+; CHECK-NEXT:    [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]]
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1
+; CHECK-NEXT:    store float [[ADD7]], ptr [[ARRAYIDX9]], align 4
+; CHECK-NEXT:    [[ST1:%.*]] = mul i64 [[STR]], 2
+; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST1]]
+; CHECK-NEXT:    [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22
+; CHECK-NEXT:    [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
+; CHECK-NEXT:    [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]]
+; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2
+; CHECK-NEXT:    store float [[ADD14]], ptr [[ARRAYIDX16]], align 4
+; CHECK-NEXT:    [[ST2:%.*]] = mul i64 [[STR]], 3
+; CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST2]]
+; CHECK-NEXT:    [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4
+; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18
+; CHECK-NEXT:    [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
+; CHECK-NEXT:    [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]]
+; CHECK-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3
+; CHECK-NEXT:    store float [[ADD21]], ptr [[ARRAYIDX23]], align 4
+; CHECK-NEXT:    [[ST3:%.*]] = mul i64 [[STR]], 4
+; CHECK-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST3]]
+; CHECK-NEXT:    [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4
+; CHECK-NEXT:    [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14
+; CHECK-NEXT:    [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
+; CHECK-NEXT:    [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]]
+; CHECK-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4
+; CHECK-NEXT:    store float [[ADD28]], ptr [[ARRAYIDX30]], align 4
+; CHECK-NEXT:    [[ST4:%.*]] = mul i64 [[STR]], 5
+; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST4]]
+; CHECK-NEXT:    [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4
+; CHECK-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10
+; CHECK-NEXT:    [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
+; CHECK-NEXT:    [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]]
+; CHECK-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5
+; CHECK-NEXT:    store float [[ADD35]], ptr [[ARRAYIDX37]], align 4
+; CHECK-NEXT:    [[ST5:%.*]] = mul i64 [[STR]], 6
+; CHECK-NEXT:    [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST5]]
+; CHECK-NEXT:    [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4
+; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6
+; CHECK-NEXT:    [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
+; CHECK-NEXT:    [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]]
+; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6
+; CHECK-NEXT:    store float [[ADD42]], ptr [[ARRAYIDX44]], align 4
+; CHECK-NEXT:    [[ST6:%.*]] = mul i64 [[STR]], 7
+; CHECK-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]]
+; CHECK-NEXT:    [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4
+; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 2
+; CHECK-NEXT:    [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
+; CHECK-NEXT:    [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]]
+; CHECK-NEXT:    [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7
+; CHECK-NEXT:    store float [[ADD49]], ptr [[ARRAYIDX51]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %str = zext i32 %stride to i64
+  %arrayidx = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 0
+  %i = load float, float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 30
+  %i1 = load float, float* %arrayidx1, align 4
+  %add = fsub fast float %i1, %i
+  %arrayidx2 = getelementptr inbounds float, float* %s, i64 0
+  store float %add, float* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %str
+  %i2 = load float, float* %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 26
+  %i3 = load float, float* %arrayidx6, align 4
+  %add7 = fsub fast float %i3, %i2
+  %arrayidx9 = getelementptr inbounds float, float* %s, i64 1
+  store float %add7, float* %arrayidx9, align 4
+  %st1 = mul i64 %str, 2
+  %arrayidx11 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st1
+  %i4 = load float, float* %arrayidx11, align 4
+  %arrayidx13 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 22
+  %i5 = load float, float* %arrayidx13, align 4
+  %add14 = fsub fast float %i5, %i4
+  %arrayidx16 = getelementptr inbounds float, float* %s, i64 2
+  store float %add14, float* %arrayidx16, align 4
+  %st2 = mul i64 %str, 3
+  %arrayidx18 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st2
+  %i6 = load float, float* %arrayidx18, align 4
+  %arrayidx20 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 18
+  %i7 = load float, float* %arrayidx20, align 4
+  %add21 = fsub fast float %i7, %i6
+  %arrayidx23 = getelementptr inbounds float, float* %s, i64 3
+  store float %add21, float* %arrayidx23, align 4
+  %st3 = mul i64 %str, 4
+  %arrayidx25 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st3
+  %i8 = load float, float* %arrayidx25, align 4
+  %arrayidx27 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 14
+  %i9 = load float, float* %arrayidx27, align 4
+  %add28 = fsub fast float %i9, %i8
+  %arrayidx30 = getelementptr inbounds float, float* %s, i64 4
+  store float %add28, float* %arrayidx30, align 4
+  %st4 = mul i64 %str, 5
+  %arrayidx32 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st4
+  %i10 = load float, float* %arrayidx32, align 4
+  %arrayidx34 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 10
+  %i11 = load float, float* %arrayidx34, align 4
+  %add35 = fsub fast float %i11, %i10
+  %arrayidx37 = getelementptr inbounds float, float* %s, i64 5
+  store float %add35, float* %arrayidx37, align 4
+  %st5 = mul i64 %str, 6
+  %arrayidx39 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st5
+  %i12 = load float, float* %arrayidx39, align 4
+  %arrayidx41 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 6
+  %i13 = load float, float* %arrayidx41, align 4
+  %add42 = fsub fast float %i13, %i12
+  %arrayidx44 = getelementptr inbounds float, float* %s, i64 6
+  store float %add42, float* %arrayidx44, align 4
+  %st6 = mul i64 %str, 7
+  %arrayidx46 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st6
+  %i14 = load float, float* %arrayidx46, align 4
+  %arrayidx48 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 2
+  %i15 = load float, float* %arrayidx48, align 4
+  %add49 = fsub fast float %i15, %i14
+  %arrayidx51 = getelementptr inbounds float, float* %s, i64 7
+  store float %add49, float* %arrayidx51, align 4
+  ret void
+}
+
+define void @test2([48 x float]* %p, float* noalias %s, i32 %stride) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 2
+; CHECK-NEXT:    [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ST6:%.*]] = mul i64 [[STR]], 7
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]]
+; CHECK-NEXT:    [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = fsub fast float [[I1]], [[I]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
+; CHECK-NEXT:    store float [[ADD]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6
+; CHECK-NEXT:    [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ST5:%.*]] = mul i64 [[STR]], 6
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST5]]
+; CHECK-NEXT:    [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]]
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1
+; CHECK-NEXT:    store float [[ADD7]], ptr [[ARRAYIDX9]], align 4
+; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10
+; CHECK-NEXT:    [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
+; CHECK-NEXT:    [[ST4:%.*]] = mul i64 [[STR]], 5
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST4]]
+; CHECK-NEXT:    [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
+; CHECK-NEXT:    [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]]
+; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2
+; CHECK-NEXT:    store float [[ADD14]], ptr [[ARRAYIDX16]], align 4
+; CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14
+; CHECK-NEXT:    [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4
+; CHECK-NEXT:    [[ST3:%.*]] = mul i64 [[STR]], 4
+; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST3]]
+; CHECK-NEXT:    [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
+; CHECK-NEXT:    [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]]
+; CHECK-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3
+; CHECK-NEXT:    store float [[ADD21]], ptr [[ARRAYIDX23]], align 4
+; CHECK-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18
+; CHECK-NEXT:    [[ST2:%.*]] = mul i64 [[STR]], 3
+; CHECK-NEXT:    [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4
+; CHECK-NEXT:    [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST2]]
+; CHECK-NEXT:    [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
+; CHECK-NEXT:    [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]]
+; CHECK-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4
+; CHECK-NEXT:    store float [[ADD28]], ptr [[ARRAYIDX30]], align 4
+; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22
+; CHECK-NEXT:    [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4
+; CHECK-NEXT:    [[ST1:%.*]] = mul i64 [[STR]], 2
+; CHECK-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST1]]
+; CHECK-NEXT:    [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
+; CHECK-NEXT:    [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]]
+; CHECK-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5
+; CHECK-NEXT:    store float [[ADD35]], ptr [[ARRAYIDX37]], align 4
+; CHECK-NEXT:    [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26
+; CHECK-NEXT:    [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4
+; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[STR]]
+; CHECK-NEXT:    [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
+; CHECK-NEXT:    [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]]
+; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6
+; CHECK-NEXT:    store float [[ADD42]], ptr [[ARRAYIDX44]], align 4
+; CHECK-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
+; CHECK-NEXT:    [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4
+; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 0
+; CHECK-NEXT:    [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
+; CHECK-NEXT:    [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]]
+; CHECK-NEXT:    [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7
+; CHECK-NEXT:    store float [[ADD49]], ptr [[ARRAYIDX51]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %str = zext i32 %stride to i64
+  %arrayidx = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 2
+  %i = load float, float* %arrayidx, align 4
+  %st6 = mul i64 %str, 7
+  %arrayidx1 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st6
+  %i1 = load float, float* %arrayidx1, align 4
+  %add = fsub fast float %i1, %i
+  %arrayidx2 = getelementptr inbounds float, float* %s, i64 0
+  store float %add, float* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 6
+  %i2 = load float, float* %arrayidx4, align 4
+  %st5 = mul i64 %str, 6
+  %arrayidx6 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st5
+  %i3 = load float, float* %arrayidx6, align 4
+  %add7 = fsub fast float %i3, %i2
+  %arrayidx9 = getelementptr inbounds float, float* %s, i64 1
+  store float %add7, float* %arrayidx9, align 4
+  %arrayidx11 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 10
+  %i4 = load float, float* %arrayidx11, align 4
+  %st4 = mul i64 %str, 5
+  %arrayidx13 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st4
+  %i5 = load float, float* %arrayidx13, align 4
+  %add14 = fsub fast float %i5, %i4
+  %arrayidx16 = getelementptr inbounds float, float* %s, i64 2
+  store float %add14, float* %arrayidx16, align 4
+  %arrayidx18 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 14
+  %i6 = load float, float* %arrayidx18, align 4
+  %st3 = mul i64 %str, 4
+  %arrayidx20 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st3
+  %i7 = load float, float* %arrayidx20, align 4
+  %add21 = fsub fast float %i7, %i6
+  %arrayidx23 = getelementptr inbounds float, float* %s, i64 3
+  store float %add21, float* %arrayidx23, align 4
+  %arrayidx25 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 18
+  %st2 = mul i64 %str, 3
+  %i8 = load float, float* %arrayidx25, align 4
+  %arrayidx27 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st2
+  %i9 = load float, float* %arrayidx27, align 4
+  %add28 = fsub fast float %i9, %i8
+  %arrayidx30 = getelementptr inbounds float, float* %s, i64 4
+  store float %add28, float* %arrayidx30, align 4
+  %arrayidx32 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 22
+  %i10 = load float, float* %arrayidx32, align 4
+  %st1 = mul i64 %str, 2
+  %arrayidx34 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %st1
+  %i11 = load float, float* %arrayidx34, align 4
+  %add35 = fsub fast float %i11, %i10
+  %arrayidx37 = getelementptr inbounds float, float* %s, i64 5
+  store float %add35, float* %arrayidx37, align 4
+  %arrayidx39 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 26
+  %i12 = load float, float* %arrayidx39, align 4
+  %arrayidx41 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 %str
+  %i13 = load float, float* %arrayidx41, align 4
+  %add42 = fsub fast float %i13, %i12
+  %arrayidx44 = getelementptr inbounds float, float* %s, i64 6
+  store float %add42, float* %arrayidx44, align 4
+  %arrayidx46 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 30
+  %i14 = load float, float* %arrayidx46, align 4
+  %arrayidx48 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 0
+  %i15 = load float, float* %arrayidx48, align 4
+  %add49 = fsub fast float %i15, %i14
+  %arrayidx51 = getelementptr inbounds float, float* %s, i64 7
+  store float %add49, float* %arrayidx51, align 4
+  ret void
+}
+
+define void @test3([48 x float]* %p, float* noalias %s) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 4
+; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 8
+; CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 12
+; CHECK-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 16
+; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 20
+; CHECK-NEXT:    [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 24
+; CHECK-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 28
+; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 23
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ARRAYIDX]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x ptr> [[TMP0]], ptr [[ARRAYIDX4]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x ptr> [[TMP1]], ptr [[ARRAYIDX11]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x ptr> [[TMP2]], ptr [[ARRAYIDX18]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x ptr> [[TMP3]], ptr [[ARRAYIDX25]], i32 4
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x ptr> [[TMP4]], ptr [[ARRAYIDX32]], i32 5
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x ptr> [[TMP5]], ptr [[ARRAYIDX39]], i32 6
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x ptr> [[TMP6]], ptr [[ARRAYIDX46]], i32 7
+; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison)
+; CHECK-NEXT:    [[TMP9:%.*]] = load <8 x float>, ptr [[ARRAYIDX48]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP11:%.*]] = fsub fast <8 x float> [[TMP10]], [[TMP8]]
+; CHECK-NEXT:    store <8 x float> [[TMP11]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 0
+  %i = load float, float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 30
+  %i1 = load float, float* %arrayidx1, align 4
+  %add = fsub fast float %i1, %i
+  %arrayidx2 = getelementptr inbounds float, float* %s, i64 0
+  store float %add, float* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 4
+  %i2 = load float, float* %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 29
+  %i3 = load float, float* %arrayidx6, align 4
+  %add7 = fsub fast float %i3, %i2
+  %arrayidx9 = getelementptr inbounds float, float* %s, i64 1
+  store float %add7, float* %arrayidx9, align 4
+  %arrayidx11 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 8
+  %i4 = load float, float* %arrayidx11, align 4
+  %arrayidx13 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 28
+  %i5 = load float, float* %arrayidx13, align 4
+  %add14 = fsub fast float %i5, %i4
+  %arrayidx16 = getelementptr inbounds float, float* %s, i64 2
+  store float %add14, float* %arrayidx16, align 4
+  %arrayidx18 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 12
+  %i6 = load float, float* %arrayidx18, align 4
+  %arrayidx20 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 27
+  %i7 = load float, float* %arrayidx20, align 4
+  %add21 = fsub fast float %i7, %i6
+  %arrayidx23 = getelementptr inbounds float, float* %s, i64 3
+  store float %add21, float* %arrayidx23, align 4
+  %arrayidx25 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 16
+  %i8 = load float, float* %arrayidx25, align 4
+  %arrayidx27 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 26
+  %i9 = load float, float* %arrayidx27, align 4
+  %add28 = fsub fast float %i9, %i8
+  %arrayidx30 = getelementptr inbounds float, float* %s, i64 4
+  store float %add28, float* %arrayidx30, align 4
+  %arrayidx32 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 20
+  %i10 = load float, float* %arrayidx32, align 4
+  %arrayidx34 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 25
+  %i11 = load float, float* %arrayidx34, align 4
+  %add35 = fsub fast float %i11, %i10
+  %arrayidx37 = getelementptr inbounds float, float* %s, i64 5
+  store float %add35, float* %arrayidx37, align 4
+  %arrayidx39 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 24
+  %i12 = load float, float* %arrayidx39, align 4
+  %arrayidx41 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 24
+  %i13 = load float, float* %arrayidx41, align 4
+  %add42 = fsub fast float %i13, %i12
+  %arrayidx44 = getelementptr inbounds float, float* %s, i64 6
+  store float %add42, float* %arrayidx44, align 4
+  %arrayidx46 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 28
+  %i14 = load float, float* %arrayidx46, align 4
+  %arrayidx48 = getelementptr inbounds [48 x float], [48 x float]* %p, i64 0, i64 23
+  %i15 = load float, float* %arrayidx48, align 4
+  %add49 = fsub fast float %i15, %i14
+  %arrayidx51 = getelementptr inbounds float, float* %s, i64 7
+  store float %add49, float* %arrayidx51, align 4
+  ret void
+}
+

diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll
new file mode 100644
index 0000000000000..c72d6cc75d827
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-50 -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s| FileCheck %s
+
+%class.A = type { i32, i32 }
+
+define void @test() {
+; CHECK-LABEL: define void @test
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[BODY:%.*]]
+; CHECK:       body:
+; CHECK-NEXT:    [[ADD_I_I62_US:%.*]] = shl i64 0, 0
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> <i64 poison, i64 1>, i64 [[ADD_I_I62_US]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i64> zeroinitializer, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [[CLASS_A:%.*]], <2 x ptr> zeroinitializer, <2 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[TMP2]], i32 4, <2 x i1> <i1 true, i1 true>, <2 x i32> poison)
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
+; CHECK-NEXT:    [[CMP_I_I_I_I67_US:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+; CHECK-NEXT:    [[SPEC_SELECT_I_I68_US:%.*]] = select i1 false, i64 [[TMP6]], i64 0
+; CHECK-NEXT:    br label [[BODY]]
+;
+entry:
+  br label %body
+
+body:
+  %add.i.i62.us = shl i64 0, 0
+  %mul.i.i63.us = or i64 %add.i.i62.us, 0
+  %add.ptr.i.i.i64.us = getelementptr %class.A, ptr null, i64 %mul.i.i63.us
+  %sub4.i.i65.us = or i64 0, 1
+  %add.ptr.i63.i.i66.us = getelementptr %class.A, ptr null, i64 %sub4.i.i65.us
+  %0 = load i32, ptr %add.ptr.i.i.i64.us, align 4
+  %1 = load i32, ptr %add.ptr.i63.i.i66.us, align 4
+  %cmp.i.i.i.i67.us = icmp slt i32 %0, %1
+  %spec.select.i.i68.us = select i1 false, i64 %sub4.i.i65.us, i64 0
+  br label %body
+}
+

diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll
new file mode 100644
index 0000000000000..5aba9ea115a4b
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v -slp-threshold=-20 < %s | FileCheck %s
+
+%S = type { i16, i16 }
+
+define i16 @test() {
+; CHECK-LABEL: define i16 @test
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PPREV_058_I:%.*]] = getelementptr [[S:%.*]], ptr null, i64 -1
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x ptr> <ptr poison, ptr null>, ptr [[PPREV_058_I]], i32 0
+; CHECK-NEXT:    br label [[WHILE_BODY_I:%.*]]
+; CHECK:       while.body.i:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i16 [ 0, [[WHILE_BODY_I]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x ptr> [ [[TMP3:%.*]], [[WHILE_BODY_I]] ], [ [[TMP0]], [[ENTRY]] ]
+; CHECK-NEXT:    [[TMP3]] = getelementptr [[S]], <2 x ptr> [[TMP2]], <2 x i64> <i64 -1, i64 -1>
+; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> [[TMP3]], i32 2, <2 x i1> <i1 true, i1 true>, <2 x i16> poison)
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
+; CHECK-NEXT:    [[CMP_I178:%.*]] = icmp ult i16 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    br label [[WHILE_BODY_I]]
+;
+entry:
+  %pPrev.058.i = getelementptr %S, ptr null, i64 -1
+  br label %while.body.i
+
+while.body.i:
+  %0 = phi i16 [ 0, %while.body.i ], [ 0, %entry ]
+  %pPrev.062.i = phi ptr [ %pPrev.0.i, %while.body.i ], [ %pPrev.058.i, %entry ]
+  %pEdge.061.i = phi ptr [ %incdec.ptr.i, %while.body.i ], [ null, %entry ]
+  %incdec.ptr.i = getelementptr %S, ptr %pEdge.061.i, i64 -1
+  %pPrev.0.i = getelementptr %S, ptr %pPrev.062.i, i64 -1
+  %1 = load i16, ptr %incdec.ptr.i, align 2
+  %2 = load i16, ptr %pPrev.0.i, align 2
+  %cmp.i178 = icmp ult i16 %1, %2
+  br label %while.body.i
+}

diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-unsupported-type.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-unsupported-type.ll
new file mode 100644
index 0000000000000..4fd22639d6371
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-unsupported-type.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S < %s --passes=slp-vectorizer -slp-threshold=-50 -mtriple=riscv64-unknown-linux-gnu -mattr=+v | FileCheck %s
+
+define void @loads() {
+; CHECK-LABEL: define void @loads(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x fp128>, ptr null, align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une <2 x fp128> [[TMP0]], zeroinitializer
+; CHECK-NEXT:    call void null(i32 0, ptr null, i32 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp une <2 x fp128> [[TMP0]], zeroinitializer
+; CHECK-NEXT:    ret void
+;
+entry:
+  %_M_value.imagp.i266 = getelementptr { fp128, fp128 }, ptr null, i64 0, i32 1
+  %0 = load fp128, ptr null, align 16
+  %cmp.i382 = fcmp une fp128 %0, 0xL00000000000000000000000000000000
+  %1 = load fp128, ptr %_M_value.imagp.i266, align 16
+  %cmp4.i385 = fcmp une fp128 %1, 0xL00000000000000000000000000000000
+  call void null(i32 0, ptr null, i32 0)
+  %cmp.i386 = fcmp une fp128 %0, 0xL00000000000000000000000000000000
+  %cmp2.i388 = fcmp une fp128 %1, 0xL00000000000000000000000000000000
+  ret void
+}
+
+define void @stores(ptr noalias %p) {
+; CHECK-LABEL: define void @stores(
+; CHECK-SAME: ptr noalias [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[_M_VALUE_IMAGP_I266:%.*]] = getelementptr { fp128, fp128 }, ptr null, i64 0, i32 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load fp128, ptr null, align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load fp128, ptr [[_M_VALUE_IMAGP_I266]], align 16
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr fp128, ptr [[P]], i64 1
+; CHECK-NEXT:    store fp128 [[TMP0]], ptr [[P1]], align 16
+; CHECK-NEXT:    store fp128 [[TMP1]], ptr [[P]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %_M_value.imagp.i266 = getelementptr { fp128, fp128 }, ptr null, i64 0, i32 1
+  %0 = load fp128, ptr null, align 16
+  %1 = load fp128, ptr %_M_value.imagp.i266, align 16
+  %p1 = getelementptr fp128, ptr %p, i64 1
+  store fp128 %0, ptr %p1, align 16
+  store fp128 %1, ptr %p, align 16
+  ret void
+}


        


More information about the llvm-commits mailing list