[PATCH] D44785: Lowering x86 adds/addus/subs/subus intrinsics (llvm part)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 24 13:35:39 PDT 2018
Simon or Tomasz, have you had any luck reproducing this?
~Craig
On Fri, Apr 20, 2018 at 1:14 PM, Tom Hudson via Phabricator <
reviews at reviews.llvm.org> wrote:
> tomhudson added a subscriber: test.
> tomhudson added a comment.
>
> RKSimon, is this what you're asking for?
>
> ; Function Attrs: nounwind readnone
> declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) #0
>
> ; Function Attrs: nounwind
> declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) #1
>
> define void @test(<16 x i8>*, <16 x i8>*, <16 x i8>*, <16 x i8>*, <16 x
> i8>*) {
> entry:
>
> %src = load <16 x i8>, <16 x i8>* %0
> %src1 = load <16 x i8>, <16 x i8>* %1
> %dst = load <16 x i8>, <16 x i8>* %2
> %const = load <16 x i8>, <16 x i8>* %3
> %5 = and <16 x i8> %src, <i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0, i8
> -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1>
> %6 = bitcast <16 x i8> %5 to <4 x i32>
> %7 = lshr <4 x i32> %6, <i32 8, i32 8, i32 8, i32 8>
> %8 = or <4 x i32> %6, %7
> %9 = lshr <4 x i32> %8, <i32 16, i32 16, i32 16, i32 16>
> %10 = or <4 x i32> %8, %9
> %11 = bitcast <4 x i32> %10 to <16 x i8>
> %12 = xor <16 x i8> %src, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8
> -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
> %13 = select <16 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false,
> i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1
> false, i1 false, i1 false, i1 true>, <16 x i8> %12, <16 x i8> %src1
> %14 = shufflevector <16 x i8> %src, <16 x i8> zeroinitializer, <16 x
> i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4,
> i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
> %15 = shufflevector <16 x i8> %src, <16 x i8> zeroinitializer, <16 x
> i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12,
> i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
> %16 = bitcast <16 x i8> %14 to <8 x i16>
> %17 = bitcast <16 x i8> %15 to <8 x i16>
> %18 = shufflevector <16 x i8> %11, <16 x i8> zeroinitializer, <16 x i32>
> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20,
> i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
> %19 = shufflevector <16 x i8> %11, <16 x i8> zeroinitializer, <16 x i32>
> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32
> 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
> %20 = bitcast <16 x i8> %18 to <8 x i16>
> %21 = bitcast <16 x i8> %19 to <8 x i16>
> %22 = mul <8 x i16> %16, %20
> %23 = lshr <8 x i16> %22, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %24 = add <8 x i16> %22, %23
> %25 = add <8 x i16> %24, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
> %26 = lshr <8 x i16> %25, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %27 = mul <8 x i16> %17, %21
> %28 = lshr <8 x i16> %27, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %29 = add <8 x i16> %27, %28
> %30 = add <8 x i16> %29, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
> %31 = lshr <8 x i16> %30, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %32 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %26, <8 x
> i16> %31)
> %33 = shufflevector <16 x i8> %dst, <16 x i8> zeroinitializer, <16 x
> i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4,
> i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
> %34 = shufflevector <16 x i8> %dst, <16 x i8> zeroinitializer, <16 x
> i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12,
> i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
> %35 = bitcast <16 x i8> %33 to <8 x i16>
> %36 = bitcast <16 x i8> %34 to <8 x i16>
> %37 = shufflevector <16 x i8> %13, <16 x i8> zeroinitializer, <16 x i32>
> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20,
> i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
> %38 = shufflevector <16 x i8> %13, <16 x i8> zeroinitializer, <16 x i32>
> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32
> 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
> %39 = bitcast <16 x i8> %37 to <8 x i16>
> %40 = bitcast <16 x i8> %38 to <8 x i16>
> %41 = mul <8 x i16> %35, %39
> %42 = lshr <8 x i16> %41, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %43 = add <8 x i16> %41, %42
> %44 = add <8 x i16> %43, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
> %45 = lshr <8 x i16> %44, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %46 = mul <8 x i16> %36, %40
> %47 = lshr <8 x i16> %46, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %48 = add <8 x i16> %46, %47
> %49 = add <8 x i16> %48, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
> %50 = lshr <8 x i16> %49, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %51 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %45, <8 x
> i16> %50)
> %52 = icmp ult <16 x i8> %32, %51
> %53 = sext <16 x i1> %52 to <16 x i8>
> %54 = trunc <16 x i8> %53 to <16 x i1>
> %55 = select <16 x i1> %54, <16 x i8> %32, <16 x i8> %51
> %56 = shufflevector <16 x i8> %src, <16 x i8> zeroinitializer, <16 x
> i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4,
> i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
> %57 = shufflevector <16 x i8> %src, <16 x i8> zeroinitializer, <16 x
> i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12,
> i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
> %58 = bitcast <16 x i8> %56 to <8 x i16>
> %59 = bitcast <16 x i8> %57 to <8 x i16>
> %60 = shufflevector <16 x i8> %11, <16 x i8> zeroinitializer, <16 x i32>
> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20,
> i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
> %61 = shufflevector <16 x i8> %11, <16 x i8> zeroinitializer, <16 x i32>
> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32
> 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
> %62 = bitcast <16 x i8> %60 to <8 x i16>
> %63 = bitcast <16 x i8> %61 to <8 x i16>
> %64 = mul <8 x i16> %58, %62
> %65 = lshr <8 x i16> %64, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %66 = add <8 x i16> %64, %65
> %67 = add <8 x i16> %66, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
> %68 = lshr <8 x i16> %67, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %69 = mul <8 x i16> %59, %63
> %70 = lshr <8 x i16> %69, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %71 = add <8 x i16> %69, %70
> %72 = add <8 x i16> %71, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
> %73 = lshr <8 x i16> %72, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %74 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %68, <8 x
> i16> %73)
> %75 = shufflevector <16 x i8> %dst, <16 x i8> zeroinitializer, <16 x
> i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4,
> i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
> %76 = shufflevector <16 x i8> %dst, <16 x i8> zeroinitializer, <16 x
> i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12,
> i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
> %77 = bitcast <16 x i8> %75 to <8 x i16>
> %78 = bitcast <16 x i8> %76 to <8 x i16>
> %79 = shufflevector <16 x i8> %13, <16 x i8> zeroinitializer, <16 x i32>
> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20,
> i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
> %80 = shufflevector <16 x i8> %13, <16 x i8> zeroinitializer, <16 x i32>
> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32
> 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
> %81 = bitcast <16 x i8> %79 to <8 x i16>
> %82 = bitcast <16 x i8> %80 to <8 x i16>
> %83 = mul <8 x i16> %77, %81
> %84 = lshr <8 x i16> %83, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %85 = add <8 x i16> %83, %84
> %86 = add <8 x i16> %85, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
> %87 = lshr <8 x i16> %86, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %88 = mul <8 x i16> %78, %82
> %89 = lshr <8 x i16> %88, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %90 = add <8 x i16> %88, %89
> %91 = add <8 x i16> %90, <i16 128, i16 128, i16 128, i16 128, i16 128,
> i16 128, i16 128, i16 128>
> %92 = lshr <8 x i16> %91, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16
> 8, i16 8>
> %93 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %87, <8 x
> i16> %92)
> %94 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %74, <16 x i8>
> %93)
> %res = select <16 x i1> <i1 false, i1 false, i1 false, i1 true, i1
> false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true,
> i1 false, i1 false, i1 false, i1 true>, <16 x i8> %94, <16 x i8> %55
> store <16 x i8> %res, <16 x i8>* %4
> ret void
>
> }
>
>
> Repository:
> rL LLVM
>
> https://reviews.llvm.org/D44785
>
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180424/490adb3d/attachment.html>
More information about the llvm-commits
mailing list