[PATCH] D44785: Lowering x86 adds/addus/subs/subus intrinsics (llvm part)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 24 13:35:39 PDT 2018


Simon or Tomasz, have you had any luck reproducing this?

~Craig

On Fri, Apr 20, 2018 at 1:14 PM, Tom Hudson via Phabricator <
reviews at reviews.llvm.org> wrote:

> tomhudson added a subscriber: test.
> tomhudson added a comment.
>
> RKSimon, is this what you're asking for?
>
> ; Function Attrs: nounwind readnone
> declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) #0
>
> ; Function Attrs: nounwind
> declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) #1
>
> define void @test(<16 x i8>*, <16 x i8>*, <16 x i8>*, <16 x i8>*, <16 x
> i8>*) {
> entry:
>
>   %src = load <16 x i8>, <16 x i8>* %0
>   %src1 = load <16 x i8>, <16 x i8>* %1
>   %dst = load <16 x i8>, <16 x i8>* %2
>   %const = load <16 x i8>, <16 x i8>* %3
>   %5 = and <16 x i8> %src, <i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0, i8
> -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1>
>   %6 = bitcast <16 x i8> %5 to <4 x i32>
>   %7 = lshr <4 x i32> %6, <i32 8, i32 8, i32 8, i32 8>
>   %8 = or <4 x i32> %6, %7
>   %9 = lshr <4 x i32> %8, <i32 16, i32 16, i32 16, i32 16>
>   %10 = or <4 x i32> %8, %9
>   %11 = bitcast <4 x i32> %10 to <16 x i8>
>   %12 = xor <16 x i8> %src, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8
> -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
>   %13 = select <16 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false,
> i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1
> false, i1 false, i1 false, i1 true>, <16 x i8> %12, <16 x i8> %src1
>   %14 = shufflevector <16 x i8> %src, <16 x i8> zeroinitializer, <16 x
> i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4,
> i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
>   %15 = shufflevector <16 x i8> %src, <16 x i8> zeroinitializer, <16 x
> i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12,
> i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
>   %16 = bitcast <16 x i8> %14 to <8 x i16>
>   %17 = bitcast <16 x i8> %15 to <8 x i16>
>   %18 = shufflevector <16 x i8> %11, <16 x i8> zeroinitializer, <16 x i32>
> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20,
> i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
>   %19 = shufflevector <16 x i8> %11, <16 x i8> zeroinitializer, <16 x i32>
> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32
> 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
>   %20 = bitcast <16 x i8> %18 to <8 x i16>
>   %21 = bitcast <16 x i8> %19 to <8 x i16>
>   %22 = mul <8 x i16> %16, %20
>   %23 = lshr <8 x i16> %22, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %24 = add <8 x i16> %22, %23
>   %25 = add <8 x i16> %24, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
>   %26 = lshr <8 x i16> %25, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %27 = mul <8 x i16> %17, %21
>   %28 = lshr <8 x i16> %27, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %29 = add <8 x i16> %27, %28
>   %30 = add <8 x i16> %29, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
>   %31 = lshr <8 x i16> %30, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %32 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %26, <8 x
> i16> %31)
>   %33 = shufflevector <16 x i8> %dst, <16 x i8> zeroinitializer, <16 x
> i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4,
> i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
>   %34 = shufflevector <16 x i8> %dst, <16 x i8> zeroinitializer, <16 x
> i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12,
> i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
>   %35 = bitcast <16 x i8> %33 to <8 x i16>
>   %36 = bitcast <16 x i8> %34 to <8 x i16>
>   %37 = shufflevector <16 x i8> %13, <16 x i8> zeroinitializer, <16 x i32>
> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20,
> i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
>   %38 = shufflevector <16 x i8> %13, <16 x i8> zeroinitializer, <16 x i32>
> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32
> 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
>   %39 = bitcast <16 x i8> %37 to <8 x i16>
>   %40 = bitcast <16 x i8> %38 to <8 x i16>
>   %41 = mul <8 x i16> %35, %39
>   %42 = lshr <8 x i16> %41, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %43 = add <8 x i16> %41, %42
>   %44 = add <8 x i16> %43, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
>   %45 = lshr <8 x i16> %44, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %46 = mul <8 x i16> %36, %40
>   %47 = lshr <8 x i16> %46, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %48 = add <8 x i16> %46, %47
>   %49 = add <8 x i16> %48, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
>   %50 = lshr <8 x i16> %49, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %51 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %45, <8 x
> i16> %50)
>   %52 = icmp ult <16 x i8> %32, %51
>   %53 = sext <16 x i1> %52 to <16 x i8>
>   %54 = trunc <16 x i8> %53 to <16 x i1>
>   %55 = select <16 x i1> %54, <16 x i8> %32, <16 x i8> %51
>   %56 = shufflevector <16 x i8> %src, <16 x i8> zeroinitializer, <16 x
> i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4,
> i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
>   %57 = shufflevector <16 x i8> %src, <16 x i8> zeroinitializer, <16 x
> i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12,
> i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
>   %58 = bitcast <16 x i8> %56 to <8 x i16>
>   %59 = bitcast <16 x i8> %57 to <8 x i16>
>   %60 = shufflevector <16 x i8> %11, <16 x i8> zeroinitializer, <16 x i32>
> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20,
> i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
>   %61 = shufflevector <16 x i8> %11, <16 x i8> zeroinitializer, <16 x i32>
> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32
> 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
>   %62 = bitcast <16 x i8> %60 to <8 x i16>
>   %63 = bitcast <16 x i8> %61 to <8 x i16>
>   %64 = mul <8 x i16> %58, %62
>   %65 = lshr <8 x i16> %64, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %66 = add <8 x i16> %64, %65
>   %67 = add <8 x i16> %66, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
>   %68 = lshr <8 x i16> %67, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %69 = mul <8 x i16> %59, %63
>   %70 = lshr <8 x i16> %69, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %71 = add <8 x i16> %69, %70
>   %72 = add <8 x i16> %71, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
>   %73 = lshr <8 x i16> %72, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %74 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %68, <8 x
> i16> %73)
>   %75 = shufflevector <16 x i8> %dst, <16 x i8> zeroinitializer, <16 x
> i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4,
> i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
>   %76 = shufflevector <16 x i8> %dst, <16 x i8> zeroinitializer, <16 x
> i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12,
> i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
>   %77 = bitcast <16 x i8> %75 to <8 x i16>
>   %78 = bitcast <16 x i8> %76 to <8 x i16>
>   %79 = shufflevector <16 x i8> %13, <16 x i8> zeroinitializer, <16 x i32>
> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20,
> i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
>   %80 = shufflevector <16 x i8> %13, <16 x i8> zeroinitializer, <16 x i32>
> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32
> 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
>   %81 = bitcast <16 x i8> %79 to <8 x i16>
>   %82 = bitcast <16 x i8> %80 to <8 x i16>
>   %83 = mul <8 x i16> %77, %81
>   %84 = lshr <8 x i16> %83, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %85 = add <8 x i16> %83, %84
>   %86 = add <8 x i16> %85, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
>   %87 = lshr <8 x i16> %86, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %88 = mul <8 x i16> %78, %82
>   %89 = lshr <8 x i16> %88, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %90 = add <8 x i16> %88, %89
>   %91 = add <8 x i16> %90, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
>   %92 = lshr <8 x i16> %91, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
>   %93 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %87, <8 x
> i16> %92)
>   %94 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %74, <16 x i8>
> %93)
>   %res = select <16 x i1> <i1 false, i1 false, i1 false, i1 true, i1
> false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true,
> i1 false, i1 false, i1 false, i1 true>, <16 x i8> %94, <16 x i8> %55
>   store <16 x i8> %res, <16 x i8>* %4
>   ret void
>
> }
>
>
> Repository:
>   rL LLVM
>
> https://reviews.llvm.org/D44785
>
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180424/490adb3d/attachment.html>


More information about the llvm-commits mailing list