[llvm] 7a8c226 - [SLP] add test for partially vectorized bswap (PR39538); NFC

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 27 14:54:28 PDT 2020


Author: Sanjay Patel
Date: 2020-04-27T17:29:27-04:00
New Revision: 7a8c226ba87d93e86d8beac280adc949b7af764d

URL: https://github.com/llvm/llvm-project/commit/7a8c226ba87d93e86d8beac280adc949b7af764d
DIFF: https://github.com/llvm/llvm-project/commit/7a8c226ba87d93e86d8beac280adc949b7af764d.diff

LOG: [SLP] add test for partially vectorized bswap (PR39538); NFC

Added: 
    

Modified: 
    llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
index c44a8524edfe..fb206b84fa93 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
@@ -356,3 +356,150 @@ define i64 @load64le_nop_shift(i8* %arg) {
   %o7 = or i64 %o6, %s7
   ret i64 %o7
 }
+
+define void @PR39538(i8* %t0, i32* %t1) {
+; CHECK-LABEL: @PR39538(
+; CHECK-NEXT:    [[T6:%.*]] = getelementptr inbounds i8, i8* [[T0:%.*]], i64 1
+; CHECK-NEXT:    [[T11:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 2
+; CHECK-NEXT:    [[T16:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 3
+; CHECK-NEXT:    [[T20:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 4
+; CHECK-NEXT:    [[T24:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 5
+; CHECK-NEXT:    [[T29:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 6
+; CHECK-NEXT:    [[T34:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 7
+; CHECK-NEXT:    [[T39:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 8
+; CHECK-NEXT:    [[T43:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 9
+; CHECK-NEXT:    [[T48:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 10
+; CHECK-NEXT:    [[T53:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 11
+; CHECK-NEXT:    [[T58:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 12
+; CHECK-NEXT:    [[T62:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 13
+; CHECK-NEXT:    [[T67:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 14
+; CHECK-NEXT:    [[T72:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 15
+; CHECK-NEXT:    [[T38:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1
+; CHECK-NEXT:    [[T57:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 2
+; CHECK-NEXT:    [[T76:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 3
+; CHECK-NEXT:    [[T3:%.*]] = load i8, i8* [[T0]], align 1
+; CHECK-NEXT:    [[T7:%.*]] = load i8, i8* [[T6]], align 1
+; CHECK-NEXT:    [[T12:%.*]] = load i8, i8* [[T11]], align 1
+; CHECK-NEXT:    [[T17:%.*]] = load i8, i8* [[T16]], align 1
+; CHECK-NEXT:    [[T21:%.*]] = load i8, i8* [[T20]], align 1
+; CHECK-NEXT:    [[T25:%.*]] = load i8, i8* [[T24]], align 1
+; CHECK-NEXT:    [[T30:%.*]] = load i8, i8* [[T29]], align 1
+; CHECK-NEXT:    [[T35:%.*]] = load i8, i8* [[T34]], align 1
+; CHECK-NEXT:    [[T40:%.*]] = load i8, i8* [[T39]], align 1
+; CHECK-NEXT:    [[T44:%.*]] = load i8, i8* [[T43]], align 1
+; CHECK-NEXT:    [[T49:%.*]] = load i8, i8* [[T48]], align 1
+; CHECK-NEXT:    [[T54:%.*]] = load i8, i8* [[T53]], align 1
+; CHECK-NEXT:    [[T59:%.*]] = load i8, i8* [[T58]], align 1
+; CHECK-NEXT:    [[T63:%.*]] = load i8, i8* [[T62]], align 1
+; CHECK-NEXT:    [[T68:%.*]] = load i8, i8* [[T67]], align 1
+; CHECK-NEXT:    [[T73:%.*]] = load i8, i8* [[T72]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i8> undef, i8 [[T3]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i8> [[TMP1]], i8 [[T21]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[T40]], i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i8> [[TMP3]], i8 [[T59]], i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i8> undef, i8 [[T7]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i8> [[TMP6]], i8 [[T25]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i8> [[TMP7]], i8 [[T44]], i32 2
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i8> [[TMP8]], i8 [[T63]], i32 3
+; CHECK-NEXT:    [[TMP10:%.*]] = zext <4 x i8> [[TMP9]] to <4 x i32>
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i8> undef, i8 [[T12]], i32 0
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i8> [[TMP11]], i8 [[T30]], i32 1
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i8> [[TMP12]], i8 [[T49]], i32 2
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i8> [[TMP13]], i8 [[T68]], i32 3
+; CHECK-NEXT:    [[TMP15:%.*]] = zext <4 x i8> [[TMP14]] to <4 x i32>
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i8> undef, i8 [[T17]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[T35]], i32 1
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[T54]], i32 2
+; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i8> [[TMP18]], i8 [[T73]], i32 3
+; CHECK-NEXT:    [[TMP20:%.*]] = zext <4 x i8> [[TMP19]] to <4 x i32>
+; CHECK-NEXT:    [[TMP21:%.*]] = shl nuw <4 x i32> [[TMP5]], <i32 24, i32 24, i32 24, i32 24>
+; CHECK-NEXT:    [[TMP22:%.*]] = shl nuw nsw <4 x i32> [[TMP10]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP23:%.*]] = shl nuw nsw <4 x i32> [[TMP15]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[TMP24:%.*]] = or <4 x i32> [[TMP22]], [[TMP21]]
+; CHECK-NEXT:    [[TMP25:%.*]] = or <4 x i32> [[TMP24]], [[TMP23]]
+; CHECK-NEXT:    [[TMP26:%.*]] = or <4 x i32> [[TMP25]], [[TMP20]]
+; CHECK-NEXT:    [[TMP27:%.*]] = bitcast i32* [[T1]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP26]], <4 x i32>* [[TMP27]], align 4
+; CHECK-NEXT:    ret void
+;
+  %t6 = getelementptr inbounds i8, i8* %t0, i64 1
+  %t11 = getelementptr inbounds i8, i8* %t0, i64 2
+  %t16 = getelementptr inbounds i8, i8* %t0, i64 3
+  %t20 = getelementptr inbounds i8, i8* %t0, i64 4
+  %t24 = getelementptr inbounds i8, i8* %t0, i64 5
+  %t29 = getelementptr inbounds i8, i8* %t0, i64 6
+  %t34 = getelementptr inbounds i8, i8* %t0, i64 7
+  %t39 = getelementptr inbounds i8, i8* %t0, i64 8
+  %t43 = getelementptr inbounds i8, i8* %t0, i64 9
+  %t48 = getelementptr inbounds i8, i8* %t0, i64 10
+  %t53 = getelementptr inbounds i8, i8* %t0, i64 11
+  %t58 = getelementptr inbounds i8, i8* %t0, i64 12
+  %t62 = getelementptr inbounds i8, i8* %t0, i64 13
+  %t67 = getelementptr inbounds i8, i8* %t0, i64 14
+  %t72 = getelementptr inbounds i8, i8* %t0, i64 15
+  %t38 = getelementptr inbounds i32, i32* %t1, i64 1
+  %t57 = getelementptr inbounds i32, i32* %t1, i64 2
+  %t76 = getelementptr inbounds i32, i32* %t1, i64 3
+  %t3 = load i8, i8* %t0, align 1
+  %t7 = load i8, i8* %t6, align 1
+  %t12 = load i8, i8* %t11, align 1
+  %t17 = load i8, i8* %t16, align 1
+  %t21 = load i8, i8* %t20, align 1
+  %t25 = load i8, i8* %t24, align 1
+  %t30 = load i8, i8* %t29, align 1
+  %t35 = load i8, i8* %t34, align 1
+  %t40 = load i8, i8* %t39, align 1
+  %t44 = load i8, i8* %t43, align 1
+  %t49 = load i8, i8* %t48, align 1
+  %t54 = load i8, i8* %t53, align 1
+  %t59 = load i8, i8* %t58, align 1
+  %t63 = load i8, i8* %t62, align 1
+  %t68 = load i8, i8* %t67, align 1
+  %t73 = load i8, i8* %t72, align 1
+  %t4 = zext i8 %t3 to i32
+  %t8 = zext i8 %t7 to i32
+  %t13 = zext i8 %t12 to i32
+  %t18 = zext i8 %t17 to i32
+  %t22 = zext i8 %t21 to i32
+  %t26 = zext i8 %t25 to i32
+  %t31 = zext i8 %t30 to i32
+  %t36 = zext i8 %t35 to i32
+  %t41 = zext i8 %t40 to i32
+  %t45 = zext i8 %t44 to i32
+  %t50 = zext i8 %t49 to i32
+  %t55 = zext i8 %t54 to i32
+  %t60 = zext i8 %t59 to i32
+  %t64 = zext i8 %t63 to i32
+  %t69 = zext i8 %t68 to i32
+  %t74 = zext i8 %t73 to i32
+  %t5 = shl nuw i32 %t4, 24
+  %t23 = shl nuw i32 %t22, 24
+  %t42 = shl nuw i32 %t41, 24
+  %t61 = shl nuw i32 %t60, 24
+  %t9 = shl nuw nsw i32 %t8, 16
+  %t27 = shl nuw nsw i32 %t26, 16
+  %t46 = shl nuw nsw i32 %t45, 16
+  %t65 = shl nuw nsw i32 %t64, 16
+  %t14 = shl nuw nsw i32 %t13, 8
+  %t32 = shl nuw nsw i32 %t31, 8
+  %t51 = shl nuw nsw i32 %t50, 8
+  %t70 = shl nuw nsw i32 %t69, 8
+  %t10 = or i32 %t9, %t5
+  %t15 = or i32 %t10, %t14
+  %t19 = or i32 %t15, %t18
+  %t28 = or i32 %t27, %t23
+  %t33 = or i32 %t28, %t32
+  %t37 = or i32 %t33, %t36
+  %t47 = or i32 %t46, %t42
+  %t52 = or i32 %t47, %t51
+  %t56 = or i32 %t52, %t55
+  %t66 = or i32 %t65, %t61
+  %t71 = or i32 %t66, %t70
+  %t75 = or i32 %t71, %t74
+  store i32 %t19, i32* %t1, align 4
+  store i32 %t37, i32* %t38, align 4
+  store i32 %t56, i32* %t57, align 4
+  store i32 %t75, i32* %t76, align 4
+  ret void
+}


        


More information about the llvm-commits mailing list