[llvm] 834ad10 - [SLP][NFC]ADd a test version with threshold=-15, NFC.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 5 10:14:10 PDT 2024


Author: Alexey Bataev
Date: 2024-08-05T10:14:01-07:00
New Revision: 834ad102c377a4d1cdc6c601d9899b5dc0a1858b

URL: https://github.com/llvm/llvm-project/commit/834ad102c377a4d1cdc6c601d9899b5dc0a1858b
DIFF: https://github.com/llvm/llvm-project/commit/834ad102c377a4d1cdc6c601d9899b5dc0a1858b.diff

LOG: [SLP][NFC]ADd a test version with threshold=-15, NFC.

Added: 
    

Modified: 
    llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
index aa9a070a79450..3595f7772defd 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
 ; RUN: opt -S -mtriple riscv64-unknown-linux-gnu < %s --passes=slp-vectorizer -mattr=+v -slp-threshold=-20 | FileCheck %s
+; RUN: opt -S -mtriple riscv64-unknown-linux-gnu < %s --passes=slp-vectorizer -mattr=+v -slp-threshold=-15 | FileCheck %s --check-prefix=THR15
 
 define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.ptr, ptr %add.ptr64) {
 ; CHECK-LABEL: define i32 @test(
@@ -359,6 +360,340 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I63_3]]
 ; CHECK-NEXT:    ret i32 [[ADD113_3]]
 ;
+; THR15-LABEL: define i32 @test(
+; THR15-SAME: ptr [[PIX1:%.*]], ptr [[PIX2:%.*]], i64 [[IDX_EXT:%.*]], i64 [[IDX_EXT63:%.*]], ptr [[ADD_PTR:%.*]], ptr [[ADD_PTR64:%.*]]) #[[ATTR0:[0-9]+]] {
+; THR15-NEXT:  entry:
+; THR15-NEXT:    [[TMP0:%.*]] = load i8, ptr [[PIX1]], align 1
+; THR15-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
+; THR15-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 4
+; THR15-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr i8, ptr [[PIX2]], i64 4
+; THR15-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr i8, ptr [[PIX1]], i64 1
+; THR15-NEXT:    [[ARRAYIDX22:%.*]] = getelementptr i8, ptr [[PIX2]], i64 2
+; THR15-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr i8, ptr [[PIX1]], i64 6
+; THR15-NEXT:    [[ARRAYIDX27:%.*]] = getelementptr i8, ptr [[PIX2]], i64 6
+; THR15-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr i8, ptr [[PIX1]], i64 3
+; THR15-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1
+; THR15-NEXT:    [[CONV33:%.*]] = zext i8 [[TMP1]] to i32
+; THR15-NEXT:    [[ADD_PTR3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 [[IDX_EXT]]
+; THR15-NEXT:    [[ADD_PTR644:%.*]] = getelementptr i8, ptr [[PIX2]], i64 [[IDX_EXT63]]
+; THR15-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ADD_PTR3]], align 1
+; THR15-NEXT:    [[CONV_1:%.*]] = zext i8 [[TMP2]] to i32
+; THR15-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 4
+; THR15-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 4
+; THR15-NEXT:    [[ARRAYIDX8_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 1
+; THR15-NEXT:    [[ARRAYIDX22_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 2
+; THR15-NEXT:    [[ARRAYIDX25_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 6
+; THR15-NEXT:    [[ARRAYIDX27_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 6
+; THR15-NEXT:    [[ARRAYIDX32_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 3
+; THR15-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX32_1]], align 1
+; THR15-NEXT:    [[CONV33_1:%.*]] = zext i8 [[TMP3]] to i32
+; THR15-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
+; THR15-NEXT:    [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]]
+; THR15-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4
+; THR15-NEXT:    [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4
+; THR15-NEXT:    [[TMP4:%.*]] = load <2 x i8>, ptr [[ADD_PTR_1]], align 1
+; THR15-NEXT:    [[TMP5:%.*]] = zext <2 x i8> [[TMP4]] to <2 x i32>
+; THR15-NEXT:    [[TMP6:%.*]] = load <2 x i8>, ptr [[ADD_PTR64_1]], align 1
+; THR15-NEXT:    [[TMP7:%.*]] = zext <2 x i8> [[TMP6]] to <2 x i32>
+; THR15-NEXT:    [[TMP8:%.*]] = sub <2 x i32> [[TMP5]], [[TMP7]]
+; THR15-NEXT:    [[TMP9:%.*]] = load <2 x i8>, ptr [[ARRAYIDX3_2]], align 1
+; THR15-NEXT:    [[TMP10:%.*]] = zext <2 x i8> [[TMP9]] to <2 x i32>
+; THR15-NEXT:    [[TMP11:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_2]], align 1
+; THR15-NEXT:    [[TMP12:%.*]] = zext <2 x i8> [[TMP11]] to <2 x i32>
+; THR15-NEXT:    [[TMP13:%.*]] = sub <2 x i32> [[TMP10]], [[TMP12]]
+; THR15-NEXT:    [[TMP14:%.*]] = shl <2 x i32> [[TMP13]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP15:%.*]] = add <2 x i32> [[TMP14]], [[TMP8]]
+; THR15-NEXT:    [[ARRAYIDX20_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 2
+; THR15-NEXT:    [[ARRAYIDX22_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 2
+; THR15-NEXT:    [[ARRAYIDX25_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 6
+; THR15-NEXT:    [[ARRAYIDX27_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 6
+; THR15-NEXT:    [[TMP16:%.*]] = load <2 x i8>, ptr [[ARRAYIDX20_2]], align 1
+; THR15-NEXT:    [[TMP17:%.*]] = zext <2 x i8> [[TMP16]] to <2 x i32>
+; THR15-NEXT:    [[TMP18:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_2]], align 1
+; THR15-NEXT:    [[TMP19:%.*]] = zext <2 x i8> [[TMP18]] to <2 x i32>
+; THR15-NEXT:    [[TMP20:%.*]] = sub <2 x i32> [[TMP17]], [[TMP19]]
+; THR15-NEXT:    [[TMP21:%.*]] = load <2 x i8>, ptr [[ARRAYIDX25_2]], align 1
+; THR15-NEXT:    [[TMP22:%.*]] = zext <2 x i8> [[TMP21]] to <2 x i32>
+; THR15-NEXT:    [[TMP23:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_2]], align 1
+; THR15-NEXT:    [[TMP24:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32>
+; THR15-NEXT:    [[TMP25:%.*]] = sub <2 x i32> [[TMP22]], [[TMP24]]
+; THR15-NEXT:    [[TMP26:%.*]] = shl <2 x i32> [[TMP25]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP27:%.*]] = add <2 x i32> [[TMP26]], [[TMP20]]
+; THR15-NEXT:    [[TMP28:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0
+; THR15-NEXT:    [[TMP29:%.*]] = extractelement <2 x i32> [[TMP15]], i32 1
+; THR15-NEXT:    [[ADD44_2:%.*]] = add i32 [[TMP29]], [[TMP28]]
+; THR15-NEXT:    [[SUB45_2:%.*]] = sub i32 [[TMP28]], [[TMP29]]
+; THR15-NEXT:    [[TMP30:%.*]] = extractelement <2 x i32> [[TMP27]], i32 0
+; THR15-NEXT:    [[TMP31:%.*]] = extractelement <2 x i32> [[TMP27]], i32 1
+; THR15-NEXT:    [[ADD46_2:%.*]] = add i32 [[TMP31]], [[TMP30]]
+; THR15-NEXT:    [[SUB47_2:%.*]] = sub i32 [[TMP30]], [[TMP31]]
+; THR15-NEXT:    [[ADD48_2:%.*]] = add i32 [[ADD46_2]], [[ADD44_2]]
+; THR15-NEXT:    [[SUB51_2:%.*]] = sub i32 [[ADD44_2]], [[ADD46_2]]
+; THR15-NEXT:    [[ADD55_2:%.*]] = add i32 [[SUB47_2]], [[SUB45_2]]
+; THR15-NEXT:    [[SUB59_2:%.*]] = sub i32 [[SUB45_2]], [[SUB47_2]]
+; THR15-NEXT:    [[ARRAYIDX3_3:%.*]] = getelementptr i8, ptr null, i64 4
+; THR15-NEXT:    [[ARRAYIDX5_3:%.*]] = getelementptr i8, ptr null, i64 4
+; THR15-NEXT:    [[TMP32:%.*]] = load <2 x i8>, ptr null, align 1
+; THR15-NEXT:    [[TMP33:%.*]] = zext <2 x i8> [[TMP32]] to <2 x i32>
+; THR15-NEXT:    [[TMP34:%.*]] = load <2 x i8>, ptr null, align 1
+; THR15-NEXT:    [[TMP35:%.*]] = zext <2 x i8> [[TMP34]] to <2 x i32>
+; THR15-NEXT:    [[TMP36:%.*]] = sub <2 x i32> [[TMP33]], [[TMP35]]
+; THR15-NEXT:    [[TMP37:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 [[ARRAYIDX3_3]], i64 -4, <2 x i1> <i1 true, i1 true>, i32 2)
+; THR15-NEXT:    [[TMP38:%.*]] = zext <2 x i8> [[TMP37]] to <2 x i32>
+; THR15-NEXT:    [[TMP39:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1
+; THR15-NEXT:    [[TMP40:%.*]] = zext <2 x i8> [[TMP39]] to <2 x i32>
+; THR15-NEXT:    [[TMP41:%.*]] = sub <2 x i32> [[TMP38]], [[TMP40]]
+; THR15-NEXT:    [[TMP42:%.*]] = shl <2 x i32> [[TMP41]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP43:%.*]] = add <2 x i32> [[TMP42]], [[TMP36]]
+; THR15-NEXT:    [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2
+; THR15-NEXT:    [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2
+; THR15-NEXT:    [[TMP44:%.*]] = load i8, ptr null, align 1
+; THR15-NEXT:    [[ARRAYIDX27_3:%.*]] = getelementptr i8, ptr null, i64 6
+; THR15-NEXT:    [[TMP45:%.*]] = load i8, ptr null, align 1
+; THR15-NEXT:    [[TMP46:%.*]] = load <2 x i8>, ptr [[ARRAYIDX20_3]], align 1
+; THR15-NEXT:    [[TMP47:%.*]] = zext <2 x i8> [[TMP46]] to <2 x i32>
+; THR15-NEXT:    [[TMP48:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_3]], align 1
+; THR15-NEXT:    [[TMP49:%.*]] = zext <2 x i8> [[TMP48]] to <2 x i32>
+; THR15-NEXT:    [[TMP50:%.*]] = sub <2 x i32> [[TMP47]], [[TMP49]]
+; THR15-NEXT:    [[TMP51:%.*]] = insertelement <2 x i8> poison, i8 [[TMP44]], i32 0
+; THR15-NEXT:    [[TMP52:%.*]] = insertelement <2 x i8> [[TMP51]], i8 [[TMP45]], i32 1
+; THR15-NEXT:    [[TMP53:%.*]] = zext <2 x i8> [[TMP52]] to <2 x i32>
+; THR15-NEXT:    [[TMP54:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1
+; THR15-NEXT:    [[TMP55:%.*]] = zext <2 x i8> [[TMP54]] to <2 x i32>
+; THR15-NEXT:    [[TMP56:%.*]] = sub <2 x i32> [[TMP53]], [[TMP55]]
+; THR15-NEXT:    [[TMP57:%.*]] = shl <2 x i32> [[TMP56]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP58:%.*]] = add <2 x i32> [[TMP57]], [[TMP50]]
+; THR15-NEXT:    [[TMP59:%.*]] = extractelement <2 x i32> [[TMP43]], i32 0
+; THR15-NEXT:    [[TMP60:%.*]] = extractelement <2 x i32> [[TMP43]], i32 1
+; THR15-NEXT:    [[ADD44_3:%.*]] = add i32 [[TMP60]], [[TMP59]]
+; THR15-NEXT:    [[SUB45_3:%.*]] = sub i32 [[TMP59]], [[TMP60]]
+; THR15-NEXT:    [[TMP61:%.*]] = extractelement <2 x i32> [[TMP58]], i32 0
+; THR15-NEXT:    [[TMP62:%.*]] = extractelement <2 x i32> [[TMP58]], i32 1
+; THR15-NEXT:    [[ADD46_3:%.*]] = add i32 [[TMP62]], [[TMP61]]
+; THR15-NEXT:    [[SUB47_3:%.*]] = sub i32 [[TMP61]], [[TMP62]]
+; THR15-NEXT:    [[ADD48_3:%.*]] = add i32 [[ADD46_3]], [[ADD44_3]]
+; THR15-NEXT:    [[SUB51_3:%.*]] = sub i32 [[ADD44_3]], [[ADD46_3]]
+; THR15-NEXT:    [[ADD55_3:%.*]] = add i32 [[SUB47_3]], [[SUB45_3]]
+; THR15-NEXT:    [[SUB59_3:%.*]] = sub i32 [[SUB45_3]], [[SUB47_3]]
+; THR15-NEXT:    [[ADD94:%.*]] = add i32 [[ADD48_3]], [[ADD48_2]]
+; THR15-NEXT:    [[SUB102:%.*]] = sub i32 [[ADD48_2]], [[ADD48_3]]
+; THR15-NEXT:    [[TMP63:%.*]] = extractelement <2 x i32> [[TMP33]], i32 0
+; THR15-NEXT:    [[SHR_I:%.*]] = lshr i32 [[TMP63]], 15
+; THR15-NEXT:    [[AND_I:%.*]] = and i32 [[SHR_I]], 65537
+; THR15-NEXT:    [[MUL_I:%.*]] = mul i32 [[AND_I]], 65535
+; THR15-NEXT:    [[SHR_I49:%.*]] = lshr i32 [[ADD46_2]], 15
+; THR15-NEXT:    [[AND_I50:%.*]] = and i32 [[SHR_I49]], 65537
+; THR15-NEXT:    [[MUL_I51:%.*]] = mul i32 [[AND_I50]], 65535
+; THR15-NEXT:    [[ADD94_2:%.*]] = add i32 [[SUB51_3]], [[SUB51_2]]
+; THR15-NEXT:    [[SUB102_2:%.*]] = sub i32 [[SUB51_2]], [[SUB51_3]]
+; THR15-NEXT:    [[SHR_I49_2:%.*]] = lshr i32 [[CONV_1]], 15
+; THR15-NEXT:    [[AND_I50_2:%.*]] = and i32 [[SHR_I49_2]], 65537
+; THR15-NEXT:    [[MUL_I51_2:%.*]] = mul i32 [[AND_I50_2]], 65535
+; THR15-NEXT:    [[ADD94_3:%.*]] = add i32 [[SUB59_3]], [[SUB59_2]]
+; THR15-NEXT:    [[SUB102_3:%.*]] = sub i32 [[SUB59_2]], [[SUB59_3]]
+; THR15-NEXT:    [[SHR_I49_3:%.*]] = lshr i32 [[CONV]], 15
+; THR15-NEXT:    [[AND_I50_3:%.*]] = and i32 [[SHR_I49_3]], 65537
+; THR15-NEXT:    [[MUL_I51_3:%.*]] = mul i32 [[AND_I50_3]], 65535
+; THR15-NEXT:    [[TMP64:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8]], align 1
+; THR15-NEXT:    [[TMP65:%.*]] = zext <2 x i8> [[TMP64]] to <2 x i32>
+; THR15-NEXT:    [[TMP66:%.*]] = load <2 x i8>, ptr [[PIX2]], align 1
+; THR15-NEXT:    [[TMP67:%.*]] = zext <2 x i8> [[TMP66]] to <2 x i32>
+; THR15-NEXT:    [[TMP68:%.*]] = load <2 x i8>, ptr [[ARRAYIDX3]], align 1
+; THR15-NEXT:    [[TMP69:%.*]] = zext <2 x i8> [[TMP68]] to <2 x i32>
+; THR15-NEXT:    [[TMP70:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5]], align 1
+; THR15-NEXT:    [[TMP71:%.*]] = zext <2 x i8> [[TMP70]] to <2 x i32>
+; THR15-NEXT:    [[TMP72:%.*]] = sub <2 x i32> [[TMP69]], [[TMP71]]
+; THR15-NEXT:    [[TMP73:%.*]] = shl <2 x i32> [[TMP72]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP74:%.*]] = shufflevector <2 x i32> [[TMP65]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; THR15-NEXT:    [[TMP75:%.*]] = insertelement <2 x i32> [[TMP74]], i32 [[CONV]], i32 0
+; THR15-NEXT:    [[TMP76:%.*]] = sub <2 x i32> [[TMP75]], [[TMP67]]
+; THR15-NEXT:    [[TMP77:%.*]] = add <2 x i32> [[TMP73]], [[TMP76]]
+; THR15-NEXT:    [[TMP78:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22]], align 1
+; THR15-NEXT:    [[TMP79:%.*]] = zext <2 x i8> [[TMP78]] to <2 x i32>
+; THR15-NEXT:    [[TMP80:%.*]] = load <2 x i8>, ptr [[ARRAYIDX25]], align 1
+; THR15-NEXT:    [[TMP81:%.*]] = zext <2 x i8> [[TMP80]] to <2 x i32>
+; THR15-NEXT:    [[TMP82:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27]], align 1
+; THR15-NEXT:    [[TMP83:%.*]] = zext <2 x i8> [[TMP82]] to <2 x i32>
+; THR15-NEXT:    [[TMP84:%.*]] = sub <2 x i32> [[TMP81]], [[TMP83]]
+; THR15-NEXT:    [[TMP85:%.*]] = shl <2 x i32> [[TMP84]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP86:%.*]] = insertelement <2 x i32> [[TMP74]], i32 [[CONV33]], i32 1
+; THR15-NEXT:    [[TMP87:%.*]] = sub <2 x i32> [[TMP86]], [[TMP79]]
+; THR15-NEXT:    [[TMP88:%.*]] = add <2 x i32> [[TMP85]], [[TMP87]]
+; THR15-NEXT:    [[TMP89:%.*]] = extractelement <2 x i32> [[TMP77]], i32 0
+; THR15-NEXT:    [[TMP90:%.*]] = extractelement <2 x i32> [[TMP77]], i32 1
+; THR15-NEXT:    [[ADD44:%.*]] = add i32 [[TMP90]], [[TMP89]]
+; THR15-NEXT:    [[SUB45:%.*]] = sub i32 [[TMP89]], [[TMP90]]
+; THR15-NEXT:    [[TMP91:%.*]] = extractelement <2 x i32> [[TMP88]], i32 0
+; THR15-NEXT:    [[TMP92:%.*]] = extractelement <2 x i32> [[TMP88]], i32 1
+; THR15-NEXT:    [[ADD46:%.*]] = add i32 [[TMP92]], [[TMP91]]
+; THR15-NEXT:    [[SUB47:%.*]] = sub i32 [[TMP91]], [[TMP92]]
+; THR15-NEXT:    [[ADD48:%.*]] = add i32 [[ADD46]], [[ADD44]]
+; THR15-NEXT:    [[SUB51:%.*]] = sub i32 [[ADD44]], [[ADD46]]
+; THR15-NEXT:    [[ADD55:%.*]] = add i32 [[SUB47]], [[SUB45]]
+; THR15-NEXT:    [[SUB59:%.*]] = sub i32 [[SUB45]], [[SUB47]]
+; THR15-NEXT:    [[SHR_I59:%.*]] = lshr i32 [[ADD46]], 15
+; THR15-NEXT:    [[AND_I60:%.*]] = and i32 [[SHR_I59]], 65537
+; THR15-NEXT:    [[MUL_I61:%.*]] = mul i32 [[AND_I60]], 65535
+; THR15-NEXT:    [[SHR_I59_1:%.*]] = lshr i32 [[SUB47]], 15
+; THR15-NEXT:    [[AND_I60_1:%.*]] = and i32 [[SHR_I59_1]], 65537
+; THR15-NEXT:    [[MUL_I61_1:%.*]] = mul i32 [[AND_I60_1]], 65535
+; THR15-NEXT:    [[TMP93:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8_1]], align 1
+; THR15-NEXT:    [[TMP94:%.*]] = zext <2 x i8> [[TMP93]] to <2 x i32>
+; THR15-NEXT:    [[TMP95:%.*]] = load <2 x i8>, ptr [[ADD_PTR644]], align 1
+; THR15-NEXT:    [[TMP96:%.*]] = zext <2 x i8> [[TMP95]] to <2 x i32>
+; THR15-NEXT:    [[TMP97:%.*]] = load <2 x i8>, ptr [[ARRAYIDX3_1]], align 1
+; THR15-NEXT:    [[TMP98:%.*]] = zext <2 x i8> [[TMP97]] to <2 x i32>
+; THR15-NEXT:    [[TMP99:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_1]], align 1
+; THR15-NEXT:    [[TMP100:%.*]] = zext <2 x i8> [[TMP99]] to <2 x i32>
+; THR15-NEXT:    [[TMP101:%.*]] = sub <2 x i32> [[TMP98]], [[TMP100]]
+; THR15-NEXT:    [[TMP102:%.*]] = shl <2 x i32> [[TMP101]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP103:%.*]] = shufflevector <2 x i32> [[TMP94]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; THR15-NEXT:    [[TMP104:%.*]] = insertelement <2 x i32> [[TMP103]], i32 [[CONV_1]], i32 0
+; THR15-NEXT:    [[TMP105:%.*]] = sub <2 x i32> [[TMP104]], [[TMP96]]
+; THR15-NEXT:    [[TMP106:%.*]] = add <2 x i32> [[TMP102]], [[TMP105]]
+; THR15-NEXT:    [[TMP107:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_1]], align 1
+; THR15-NEXT:    [[TMP108:%.*]] = zext <2 x i8> [[TMP107]] to <2 x i32>
+; THR15-NEXT:    [[TMP109:%.*]] = load <2 x i8>, ptr [[ARRAYIDX25_1]], align 1
+; THR15-NEXT:    [[TMP110:%.*]] = zext <2 x i8> [[TMP109]] to <2 x i32>
+; THR15-NEXT:    [[TMP111:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_1]], align 1
+; THR15-NEXT:    [[TMP112:%.*]] = zext <2 x i8> [[TMP111]] to <2 x i32>
+; THR15-NEXT:    [[TMP113:%.*]] = sub <2 x i32> [[TMP110]], [[TMP112]]
+; THR15-NEXT:    [[TMP114:%.*]] = shl <2 x i32> [[TMP113]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP115:%.*]] = insertelement <2 x i32> [[TMP103]], i32 [[CONV33_1]], i32 1
+; THR15-NEXT:    [[TMP116:%.*]] = sub <2 x i32> [[TMP115]], [[TMP108]]
+; THR15-NEXT:    [[TMP117:%.*]] = add <2 x i32> [[TMP114]], [[TMP116]]
+; THR15-NEXT:    [[TMP118:%.*]] = extractelement <2 x i32> [[TMP106]], i32 0
+; THR15-NEXT:    [[TMP119:%.*]] = extractelement <2 x i32> [[TMP106]], i32 1
+; THR15-NEXT:    [[ADD44_1:%.*]] = add i32 [[TMP119]], [[TMP118]]
+; THR15-NEXT:    [[SUB45_1:%.*]] = sub i32 [[TMP118]], [[TMP119]]
+; THR15-NEXT:    [[TMP120:%.*]] = extractelement <2 x i32> [[TMP117]], i32 0
+; THR15-NEXT:    [[TMP121:%.*]] = extractelement <2 x i32> [[TMP117]], i32 1
+; THR15-NEXT:    [[ADD46_1:%.*]] = add i32 [[TMP121]], [[TMP120]]
+; THR15-NEXT:    [[SUB47_1:%.*]] = sub i32 [[TMP120]], [[TMP121]]
+; THR15-NEXT:    [[ADD48_1:%.*]] = add i32 [[ADD46_1]], [[ADD44_1]]
+; THR15-NEXT:    [[SUB51_1:%.*]] = sub i32 [[ADD44_1]], [[ADD46_1]]
+; THR15-NEXT:    [[ADD55_1:%.*]] = add i32 [[SUB47_1]], [[SUB45_1]]
+; THR15-NEXT:    [[SUB59_1:%.*]] = sub i32 [[SUB45_1]], [[SUB47_1]]
+; THR15-NEXT:    [[SHR_I54:%.*]] = lshr i32 [[ADD46_1]], 15
+; THR15-NEXT:    [[AND_I55:%.*]] = and i32 [[SHR_I54]], 65537
+; THR15-NEXT:    [[MUL_I56:%.*]] = mul i32 [[AND_I55]], 65535
+; THR15-NEXT:    [[SHR_I54_1:%.*]] = lshr i32 [[SUB47_1]], 15
+; THR15-NEXT:    [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537
+; THR15-NEXT:    [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535
+; THR15-NEXT:    [[TMP122:%.*]] = lshr <2 x i32> [[TMP94]], <i32 15, i32 15>
+; THR15-NEXT:    [[TMP123:%.*]] = and <2 x i32> [[TMP122]], <i32 65537, i32 65537>
+; THR15-NEXT:    [[TMP124:%.*]] = mul <2 x i32> [[TMP123]], <i32 65535, i32 65535>
+; THR15-NEXT:    [[ADD78:%.*]] = add i32 [[ADD48_1]], [[ADD48]]
+; THR15-NEXT:    [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_1]]
+; THR15-NEXT:    [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]]
+; THR15-NEXT:    [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD94]]
+; THR15-NEXT:    [[ADD105:%.*]] = add i32 [[SUB102]], [[SUB86]]
+; THR15-NEXT:    [[SUB106:%.*]] = sub i32 [[SUB86]], [[SUB102]]
+; THR15-NEXT:    [[ADD_I:%.*]] = add i32 [[MUL_I]], [[ADD103]]
+; THR15-NEXT:    [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP63]]
+; THR15-NEXT:    [[ADD_I52:%.*]] = add i32 [[MUL_I51]], [[ADD105]]
+; THR15-NEXT:    [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[ADD46_2]]
+; THR15-NEXT:    [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]]
+; THR15-NEXT:    [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[ADD46_1]]
+; THR15-NEXT:    [[ADD_I62:%.*]] = add i32 [[MUL_I61]], [[SUB106]]
+; THR15-NEXT:    [[XOR_I63:%.*]] = xor i32 [[ADD_I62]], [[ADD46]]
+; THR15-NEXT:    [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]]
+; THR15-NEXT:    [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]]
+; THR15-NEXT:    [[ADD113:%.*]] = add i32 [[ADD112]], [[XOR_I63]]
+; THR15-NEXT:    [[TMP125:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_2]], i32 0
+; THR15-NEXT:    [[TMP126:%.*]] = shufflevector <2 x i32> [[TMP125]], <2 x i32> poison, <2 x i32> zeroinitializer
+; THR15-NEXT:    [[TMP127:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_3]], i32 0
+; THR15-NEXT:    [[TMP128:%.*]] = shufflevector <2 x i32> [[TMP127]], <2 x i32> poison, <2 x i32> zeroinitializer
+; THR15-NEXT:    [[TMP129:%.*]] = sub <2 x i32> [[TMP126]], [[TMP128]]
+; THR15-NEXT:    [[TMP130:%.*]] = add <2 x i32> [[TMP126]], [[TMP128]]
+; THR15-NEXT:    [[TMP131:%.*]] = shufflevector <2 x i32> [[TMP129]], <2 x i32> [[TMP130]], <2 x i32> <i32 0, i32 3>
+; THR15-NEXT:    [[TMP132:%.*]] = lshr <2 x i32> [[TMP5]], <i32 15, i32 15>
+; THR15-NEXT:    [[TMP133:%.*]] = and <2 x i32> [[TMP132]], <i32 65537, i32 65537>
+; THR15-NEXT:    [[TMP134:%.*]] = mul <2 x i32> [[TMP133]], <i32 65535, i32 65535>
+; THR15-NEXT:    [[TMP135:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55]], i32 0
+; THR15-NEXT:    [[TMP136:%.*]] = shufflevector <2 x i32> [[TMP135]], <2 x i32> poison, <2 x i32> zeroinitializer
+; THR15-NEXT:    [[TMP137:%.*]] = insertelement <2 x i32> poison, i32 [[ADD55_1]], i32 0
+; THR15-NEXT:    [[TMP138:%.*]] = shufflevector <2 x i32> [[TMP137]], <2 x i32> poison, <2 x i32> zeroinitializer
+; THR15-NEXT:    [[TMP139:%.*]] = sub <2 x i32> [[TMP136]], [[TMP138]]
+; THR15-NEXT:    [[TMP140:%.*]] = add <2 x i32> [[TMP136]], [[TMP138]]
+; THR15-NEXT:    [[TMP141:%.*]] = shufflevector <2 x i32> [[TMP139]], <2 x i32> [[TMP140]], <2 x i32> <i32 0, i32 3>
+; THR15-NEXT:    [[TMP142:%.*]] = extractelement <2 x i32> [[TMP131]], i32 1
+; THR15-NEXT:    [[TMP143:%.*]] = extractelement <2 x i32> [[TMP141]], i32 1
+; THR15-NEXT:    [[SUB104_1:%.*]] = sub i32 [[TMP143]], [[TMP142]]
+; THR15-NEXT:    [[TMP144:%.*]] = add <2 x i32> [[TMP131]], [[TMP141]]
+; THR15-NEXT:    [[TMP145:%.*]] = extractelement <2 x i32> [[TMP131]], i32 0
+; THR15-NEXT:    [[TMP146:%.*]] = extractelement <2 x i32> [[TMP141]], i32 0
+; THR15-NEXT:    [[TMP147:%.*]] = shufflevector <2 x i32> [[TMP141]], <2 x i32> [[TMP131]], <2 x i32> <i32 0, i32 2>
+; THR15-NEXT:    [[SUB106_1:%.*]] = sub i32 [[TMP146]], [[TMP145]]
+; THR15-NEXT:    [[TMP148:%.*]] = add <2 x i32> [[TMP134]], [[TMP144]]
+; THR15-NEXT:    [[TMP149:%.*]] = xor <2 x i32> [[TMP148]], [[TMP5]]
+; THR15-NEXT:    [[ADD_I57_1:%.*]] = add i32 [[MUL_I56_1]], [[SUB104_1]]
+; THR15-NEXT:    [[XOR_I58_1:%.*]] = xor i32 [[ADD_I57_1]], [[SUB47_1]]
+; THR15-NEXT:    [[ADD_I62_1:%.*]] = add i32 [[MUL_I61_1]], [[SUB106_1]]
+; THR15-NEXT:    [[XOR_I63_1:%.*]] = xor i32 [[ADD_I62_1]], [[SUB47]]
+; THR15-NEXT:    [[TMP150:%.*]] = extractelement <2 x i32> [[TMP149]], i32 0
+; THR15-NEXT:    [[ADD108_1:%.*]] = add i32 [[TMP150]], [[ADD113]]
+; THR15-NEXT:    [[TMP151:%.*]] = extractelement <2 x i32> [[TMP149]], i32 1
+; THR15-NEXT:    [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[TMP151]]
+; THR15-NEXT:    [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[XOR_I58_1]]
+; THR15-NEXT:    [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[XOR_I63_1]]
+; THR15-NEXT:    [[ADD78_2:%.*]] = add i32 [[SUB51_1]], [[SUB51]]
+; THR15-NEXT:    [[SUB86_2:%.*]] = sub i32 [[SUB51]], [[SUB51_1]]
+; THR15-NEXT:    [[TMP152:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_2]], i32 0
+; THR15-NEXT:    [[TMP153:%.*]] = shufflevector <2 x i32> [[TMP152]], <2 x i32> poison, <2 x i32> zeroinitializer
+; THR15-NEXT:    [[TMP154:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_2]], i32 0
+; THR15-NEXT:    [[TMP155:%.*]] = shufflevector <2 x i32> [[TMP154]], <2 x i32> poison, <2 x i32> zeroinitializer
+; THR15-NEXT:    [[TMP156:%.*]] = add <2 x i32> [[TMP153]], [[TMP155]]
+; THR15-NEXT:    [[TMP157:%.*]] = sub <2 x i32> [[TMP153]], [[TMP155]]
+; THR15-NEXT:    [[TMP158:%.*]] = shufflevector <2 x i32> [[TMP156]], <2 x i32> [[TMP157]], <2 x i32> <i32 0, i32 3>
+; THR15-NEXT:    [[ADD105_2:%.*]] = add i32 [[SUB102_2]], [[SUB86_2]]
+; THR15-NEXT:    [[SUB106_2:%.*]] = sub i32 [[SUB86_2]], [[SUB102_2]]
+; THR15-NEXT:    [[ADD_I52_2:%.*]] = add i32 [[MUL_I51_2]], [[ADD105_2]]
+; THR15-NEXT:    [[XOR_I53_2:%.*]] = xor i32 [[ADD_I52_2]], [[CONV_1]]
+; THR15-NEXT:    [[TMP159:%.*]] = add <2 x i32> [[TMP124]], [[TMP158]]
+; THR15-NEXT:    [[TMP160:%.*]] = xor <2 x i32> [[TMP159]], [[TMP94]]
+; THR15-NEXT:    [[SHR_I59_2:%.*]] = lshr i32 [[ADD44]], 15
+; THR15-NEXT:    [[AND_I60_2:%.*]] = and i32 [[SHR_I59_2]], 65537
+; THR15-NEXT:    [[MUL_I61_2:%.*]] = mul i32 [[AND_I60_2]], 65535
+; THR15-NEXT:    [[ADD_I62_2:%.*]] = add i32 [[MUL_I61_2]], [[SUB106_2]]
+; THR15-NEXT:    [[XOR_I63_2:%.*]] = xor i32 [[ADD_I62_2]], [[ADD44]]
+; THR15-NEXT:    [[ADD108_2:%.*]] = add i32 [[XOR_I53_2]], [[ADD113_1]]
+; THR15-NEXT:    [[TMP161:%.*]] = extractelement <2 x i32> [[TMP160]], i32 0
+; THR15-NEXT:    [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP161]]
+; THR15-NEXT:    [[TMP162:%.*]] = extractelement <2 x i32> [[TMP160]], i32 1
+; THR15-NEXT:    [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[TMP162]]
+; THR15-NEXT:    [[ADD113_2:%.*]] = add i32 [[ADD112_2]], [[XOR_I63_2]]
+; THR15-NEXT:    [[ADD78_3:%.*]] = add i32 [[SUB59_1]], [[SUB59]]
+; THR15-NEXT:    [[SUB86_3:%.*]] = sub i32 [[SUB59]], [[SUB59_1]]
+; THR15-NEXT:    [[TMP163:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_3]], i32 0
+; THR15-NEXT:    [[TMP164:%.*]] = shufflevector <2 x i32> [[TMP163]], <2 x i32> poison, <2 x i32> zeroinitializer
+; THR15-NEXT:    [[TMP165:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_3]], i32 0
+; THR15-NEXT:    [[TMP166:%.*]] = shufflevector <2 x i32> [[TMP165]], <2 x i32> poison, <2 x i32> zeroinitializer
+; THR15-NEXT:    [[TMP167:%.*]] = add <2 x i32> [[TMP164]], [[TMP166]]
+; THR15-NEXT:    [[TMP168:%.*]] = sub <2 x i32> [[TMP164]], [[TMP166]]
+; THR15-NEXT:    [[TMP169:%.*]] = shufflevector <2 x i32> [[TMP167]], <2 x i32> [[TMP168]], <2 x i32> <i32 0, i32 3>
+; THR15-NEXT:    [[ADD105_3:%.*]] = add i32 [[SUB102_3]], [[SUB86_3]]
+; THR15-NEXT:    [[SUB106_3:%.*]] = sub i32 [[SUB86_3]], [[SUB102_3]]
+; THR15-NEXT:    [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_3]]
+; THR15-NEXT:    [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV]]
+; THR15-NEXT:    [[TMP170:%.*]] = lshr <2 x i32> [[TMP65]], <i32 15, i32 15>
+; THR15-NEXT:    [[TMP171:%.*]] = and <2 x i32> [[TMP170]], <i32 65537, i32 65537>
+; THR15-NEXT:    [[TMP172:%.*]] = mul <2 x i32> [[TMP171]], <i32 65535, i32 65535>
+; THR15-NEXT:    [[TMP173:%.*]] = add <2 x i32> [[TMP172]], [[TMP169]]
+; THR15-NEXT:    [[TMP174:%.*]] = xor <2 x i32> [[TMP173]], [[TMP65]]
+; THR15-NEXT:    [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15
+; THR15-NEXT:    [[AND_I60_3:%.*]] = and i32 [[SHR_I59_3]], 65537
+; THR15-NEXT:    [[MUL_I61_3:%.*]] = mul i32 [[AND_I60_3]], 65535
+; THR15-NEXT:    [[ADD_I62_3:%.*]] = add i32 [[MUL_I61_3]], [[SUB106_3]]
+; THR15-NEXT:    [[XOR_I63_3:%.*]] = xor i32 [[ADD_I62_3]], [[CONV33]]
+; THR15-NEXT:    [[ADD108_3:%.*]] = add i32 [[XOR_I53_3]], [[ADD113_2]]
+; THR15-NEXT:    [[TMP175:%.*]] = extractelement <2 x i32> [[TMP174]], i32 0
+; THR15-NEXT:    [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[TMP175]]
+; THR15-NEXT:    [[TMP176:%.*]] = extractelement <2 x i32> [[TMP174]], i32 1
+; THR15-NEXT:    [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[TMP176]]
+; THR15-NEXT:    [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I63_3]]
+; THR15-NEXT:    ret i32 [[ADD113_3]]
+;
 entry:
   %0 = load i8, ptr %pix1, align 1
   %conv = zext i8 %0 to i32


        


More information about the llvm-commits mailing list