[llvm] 09dcf31 - [NFC] Add tests for i128 fshl on a few targets.

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 24 11:46:06 PDT 2021


Simon already added i128 test for X86 in
778440f1996f2a442fb691058b97bbb0a9cf7300

On Tue, Aug 24, 2021 at 9:44 PM Eli Friedman via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
>
>
> Author: Eli Friedman
> Date: 2021-08-24T11:43:35-07:00
> New Revision: 09dcf31d7449f79cfa525a630b7dcc2affc9a930
>
> URL: https://github.com/llvm/llvm-project/commit/09dcf31d7449f79cfa525a630b7dcc2affc9a930
> DIFF: https://github.com/llvm/llvm-project/commit/09dcf31d7449f79cfa525a630b7dcc2affc9a930.diff
>
> LOG: [NFC] Add tests for i128 fshl on a few targets.
>
> In preparation for D108058.
>
> Added:
>
>
> Modified:
>     llvm/test/CodeGen/AArch64/funnel-shift.ll
>     llvm/test/CodeGen/PowerPC/funnel-shift.ll
>     llvm/test/CodeGen/X86/funnel-shift.ll
>
> Removed:
>
>
>
> ################################################################################
> diff  --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
> index 7e3b284d0b59..832ee5d39236 100644
> --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
> +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
> @@ -5,6 +5,7 @@ declare i8 @llvm.fshl.i8(i8, i8, i8)
>  declare i16 @llvm.fshl.i16(i16, i16, i16)
>  declare i32 @llvm.fshl.i32(i32, i32, i32)
>  declare i64 @llvm.fshl.i64(i64, i64, i64)
> +declare i128 @llvm.fshl.i128(i128, i128, i128)
>  declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
>
>  declare i8 @llvm.fshr.i8(i8, i8, i8)
> @@ -42,6 +43,37 @@ define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
>    ret i64 %f
>  }
>
> +define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
> +; CHECK-LABEL: fshl_i128:
> +; CHECK:       // %bb.0:
> +; CHECK-NEXT:    mvn w9, w4
> +; CHECK-NEXT:    and x12, x9, #0x7f
> +; CHECK-NEXT:    extr x8, x3, x2, #1
> +; CHECK-NEXT:    lsr x10, x3, #1
> +; CHECK-NEXT:    tst x12, #0x40
> +; CHECK-NEXT:    lsr x12, x0, #1
> +; CHECK-NEXT:    lsr x8, x8, x9
> +; CHECK-NEXT:    lsr x12, x12, x9
> +; CHECK-NEXT:    lsr x9, x10, x9
> +; CHECK-NEXT:    lsl x10, x10, #1
> +; CHECK-NEXT:    lsl x10, x10, x4
> +; CHECK-NEXT:    lsl x11, x1, x4
> +; CHECK-NEXT:    and x14, x4, #0x7f
> +; CHECK-NEXT:    orr x8, x10, x8
> +; CHECK-NEXT:    lsl x13, x0, x4
> +; CHECK-NEXT:    orr x11, x11, x12
> +; CHECK-NEXT:    csel x10, xzr, x9, ne
> +; CHECK-NEXT:    csel x8, x9, x8, ne
> +; CHECK-NEXT:    tst x14, #0x40
> +; CHECK-NEXT:    csel x9, x13, x11, ne
> +; CHECK-NEXT:    csel x11, xzr, x13, ne
> +; CHECK-NEXT:    orr x1, x9, x10
> +; CHECK-NEXT:    orr x0, x11, x8
> +; CHECK-NEXT:    ret
> +  %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
> +  ret i128 %f
> +}
> +
>  ; Verify that weird types are minimally supported.
>  declare i37 @llvm.fshl.i37(i37, i37, i37)
>  define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
>
> diff  --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
> index 9a4398b90ab4..c33904082f23 100644
> --- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll
> +++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
> @@ -7,6 +7,7 @@ declare i8 @llvm.fshl.i8(i8, i8, i8)
>  declare i16 @llvm.fshl.i16(i16, i16, i16)
>  declare i32 @llvm.fshl.i32(i32, i32, i32)
>  declare i64 @llvm.fshl.i64(i64, i64, i64)
> +declare i128 @llvm.fshl.i128(i128, i128, i128)
>  declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
>
>  declare i8 @llvm.fshr.i8(i8, i8, i8)
> @@ -108,6 +109,395 @@ define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
>    ret i64 %f
>  }
>
> +define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
> +; CHECK32_32-LABEL: fshl_i128:
> +; CHECK32_32:       # %bb.0:
> +; CHECK32_32-NEXT:    stwu 1, -64(1)
> +; CHECK32_32-NEXT:    lwz 0, 84(1)
> +; CHECK32_32-NEXT:    rotlwi 12, 8, 31
> +; CHECK32_32-NEXT:    srwi 11, 7, 1
> +; CHECK32_32-NEXT:    rlwimi 12, 7, 31, 0, 0
> +; CHECK32_32-NEXT:    andi. 7, 0, 127
> +; CHECK32_32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
> +; CHECK32_32-NEXT:    rotlwi 10, 10, 31
> +; CHECK32_32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
> +; CHECK32_32-NEXT:    rotlwi 30, 9, 31
> +; CHECK32_32-NEXT:    subfic 27, 7, 32
> +; CHECK32_32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
> +; CHECK32_32-NEXT:    rlwimi 10, 9, 31, 0, 0
> +; CHECK32_32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
> +; CHECK32_32-NEXT:    rlwimi 30, 8, 31, 0, 0
> +; CHECK32_32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
> +; CHECK32_32-NEXT:    not 8, 0
> +; CHECK32_32-NEXT:    subfic 9, 7, 96
> +; CHECK32_32-NEXT:    addi 0, 7, -64
> +; CHECK32_32-NEXT:    slw 28, 3, 7
> +; CHECK32_32-NEXT:    subfic 25, 7, 64
> +; CHECK32_32-NEXT:    srw 22, 4, 27
> +; CHECK32_32-NEXT:    stw 20, 16(1) # 4-byte Folded Spill
> +; CHECK32_32-NEXT:    srw 9, 6, 9
> +; CHECK32_32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
> +; CHECK32_32-NEXT:    slw 23, 5, 0
> +; CHECK32_32-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
> +; CHECK32_32-NEXT:    addi 29, 7, -96
> +; CHECK32_32-NEXT:    srw 20, 5, 25
> +; CHECK32_32-NEXT:    or 28, 28, 22
> +; CHECK32_32-NEXT:    srw 22, 6, 25
> +; CHECK32_32-NEXT:    subfic 25, 25, 32
> +; CHECK32_32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
> +; CHECK32_32-NEXT:    mcrf 1, 0
> +; CHECK32_32-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
> +; CHECK32_32-NEXT:    addi 26, 7, -32
> +; CHECK32_32-NEXT:    andi. 8, 8, 127
> +; CHECK32_32-NEXT:    slw 24, 5, 7
> +; CHECK32_32-NEXT:    slw 29, 6, 29
> +; CHECK32_32-NEXT:    or 9, 23, 9
> +; CHECK32_32-NEXT:    slw 25, 5, 25
> +; CHECK32_32-NEXT:    srw 5, 5, 27
> +; CHECK32_32-NEXT:    srw 27, 6, 27
> +; CHECK32_32-NEXT:    stw 21, 20(1) # 4-byte Folded Spill
> +; CHECK32_32-NEXT:    slw 21, 4, 26
> +; CHECK32_32-NEXT:    subfic 23, 8, 32
> +; CHECK32_32-NEXT:    or 27, 24, 27
> +; CHECK32_32-NEXT:    subfic 24, 8, 96
> +; CHECK32_32-NEXT:    or 9, 9, 29
> +; CHECK32_32-NEXT:    addi 29, 8, -64
> +; CHECK32_32-NEXT:    or 25, 22, 25
> +; CHECK32_32-NEXT:    stw 19, 12(1) # 4-byte Folded Spill
> +; CHECK32_32-NEXT:    srw 19, 12, 8
> +; CHECK32_32-NEXT:    or 28, 28, 21
> +; CHECK32_32-NEXT:    slw 21, 11, 23
> +; CHECK32_32-NEXT:    slw 24, 11, 24
> +; CHECK32_32-NEXT:    srw 22, 12, 29
> +; CHECK32_32-NEXT:    slw 26, 6, 26
> +; CHECK32_32-NEXT:    or 5, 25, 5
> +; CHECK32_32-NEXT:    addi 25, 8, -96
> +; CHECK32_32-NEXT:    or 21, 19, 21
> +; CHECK32_32-NEXT:    srw 19, 10, 8
> +; CHECK32_32-NEXT:    or 24, 22, 24
> +; CHECK32_32-NEXT:    slw 22, 30, 23
> +; CHECK32_32-NEXT:    or 27, 27, 26
> +; CHECK32_32-NEXT:    addi 26, 8, -32
> +; CHECK32_32-NEXT:    srw 25, 11, 25
> +; CHECK32_32-NEXT:    or 22, 19, 22
> +; CHECK32_32-NEXT:    or 28, 28, 20
> +; CHECK32_32-NEXT:    srw 20, 11, 26
> +; CHECK32_32-NEXT:    or 25, 24, 25
> +; CHECK32_32-NEXT:    subfic 24, 8, 64
> +; CHECK32_32-NEXT:    srw 26, 30, 26
> +; CHECK32_32-NEXT:    or 26, 22, 26
> +; CHECK32_32-NEXT:    subfic 22, 24, 32
> +; CHECK32_32-NEXT:    slw 23, 12, 23
> +; CHECK32_32-NEXT:    srw 22, 12, 22
> +; CHECK32_32-NEXT:    slw 12, 12, 24
> +; CHECK32_32-NEXT:    slw 24, 11, 24
> +; CHECK32_32-NEXT:    cmplwi 5, 7, 64
> +; CHECK32_32-NEXT:    or 24, 24, 22
> +; CHECK32_32-NEXT:    slw 22, 6, 0
> +; CHECK32_32-NEXT:    slw 6, 6, 7
> +; CHECK32_32-NEXT:    slw 7, 4, 7
> +; CHECK32_32-NEXT:    srw 29, 11, 29
> +; CHECK32_32-NEXT:    srw 11, 11, 8
> +; CHECK32_32-NEXT:    cmplwi 6, 8, 64
> +; CHECK32_32-NEXT:    srw 8, 30, 8
> +; CHECK32_32-NEXT:    or 5, 7, 5
> +; CHECK32_32-NEXT:    or 7, 26, 12
> +; CHECK32_32-NEXT:    or 12, 24, 23
> +; CHECK32_32-NEXT:    bc 12, 20, .LBB2_1
> +; CHECK32_32-NEXT:    b .LBB2_2
> +; CHECK32_32-NEXT:  .LBB2_1:
> +; CHECK32_32-NEXT:    addi 9, 28, 0
> +; CHECK32_32-NEXT:  .LBB2_2:
> +; CHECK32_32-NEXT:    li 28, 0
> +; CHECK32_32-NEXT:    bc 12, 20, .LBB2_4
> +; CHECK32_32-NEXT:  # %bb.3:
> +; CHECK32_32-NEXT:    ori 5, 22, 0
> +; CHECK32_32-NEXT:    b .LBB2_4
> +; CHECK32_32-NEXT:  .LBB2_4:
> +; CHECK32_32-NEXT:    bc 12, 24, .LBB2_6
> +; CHECK32_32-NEXT:  # %bb.5:
> +; CHECK32_32-NEXT:    ori 7, 25, 0
> +; CHECK32_32-NEXT:    b .LBB2_6
> +; CHECK32_32-NEXT:  .LBB2_6:
> +; CHECK32_32-NEXT:    or 8, 8, 12
> +; CHECK32_32-NEXT:    or 21, 21, 20
> +; CHECK32_32-NEXT:    bc 12, 20, .LBB2_8
> +; CHECK32_32-NEXT:  # %bb.7:
> +; CHECK32_32-NEXT:    ori 6, 28, 0
> +; CHECK32_32-NEXT:    b .LBB2_8
> +; CHECK32_32-NEXT:  .LBB2_8:
> +; CHECK32_32-NEXT:    bc 12, 6, .LBB2_10
> +; CHECK32_32-NEXT:  # %bb.9:
> +; CHECK32_32-NEXT:    ori 4, 5, 0
> +; CHECK32_32-NEXT:    b .LBB2_10
> +; CHECK32_32-NEXT:  .LBB2_10:
> +; CHECK32_32-NEXT:    bc 12, 2, .LBB2_12
> +; CHECK32_32-NEXT:  # %bb.11:
> +; CHECK32_32-NEXT:    ori 5, 7, 0
> +; CHECK32_32-NEXT:    b .LBB2_13
> +; CHECK32_32-NEXT:  .LBB2_12:
> +; CHECK32_32-NEXT:    addi 5, 10, 0
> +; CHECK32_32-NEXT:  .LBB2_13:
> +; CHECK32_32-NEXT:    bc 12, 24, .LBB2_15
> +; CHECK32_32-NEXT:  # %bb.14:
> +; CHECK32_32-NEXT:    ori 7, 29, 0
> +; CHECK32_32-NEXT:    ori 11, 28, 0
> +; CHECK32_32-NEXT:    ori 0, 28, 0
> +; CHECK32_32-NEXT:    b .LBB2_16
> +; CHECK32_32-NEXT:  .LBB2_15:
> +; CHECK32_32-NEXT:    addi 7, 8, 0
> +; CHECK32_32-NEXT:    addi 0, 21, 0
> +; CHECK32_32-NEXT:  .LBB2_16:
> +; CHECK32_32-NEXT:    bc 12, 6, .LBB2_18
> +; CHECK32_32-NEXT:  # %bb.17:
> +; CHECK32_32-NEXT:    ori 3, 9, 0
> +; CHECK32_32-NEXT:    b .LBB2_18
> +; CHECK32_32-NEXT:  .LBB2_18:
> +; CHECK32_32-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
> +; CHECK32_32-NEXT:    or 6, 6, 5
> +; CHECK32_32-NEXT:    bc 12, 20, .LBB2_20
> +; CHECK32_32-NEXT:  # %bb.19:
> +; CHECK32_32-NEXT:    ori 5, 28, 0
> +; CHECK32_32-NEXT:    b .LBB2_21
> +; CHECK32_32-NEXT:  .LBB2_20:
> +; CHECK32_32-NEXT:    addi 5, 27, 0
> +; CHECK32_32-NEXT:  .LBB2_21:
> +; CHECK32_32-NEXT:    bc 12, 2, .LBB2_22
> +; CHECK32_32-NEXT:    b .LBB2_23
> +; CHECK32_32-NEXT:  .LBB2_22:
> +; CHECK32_32-NEXT:    addi 7, 30, 0
> +; CHECK32_32-NEXT:  .LBB2_23:
> +; CHECK32_32-NEXT:    or 3, 3, 11
> +; CHECK32_32-NEXT:    or 4, 4, 0
> +; CHECK32_32-NEXT:    or 5, 5, 7
> +; CHECK32_32-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
> +; CHECK32_32-NEXT:    lwz 28, 48(1) # 4-byte Folded Reload
> +; CHECK32_32-NEXT:    lwz 27, 44(1) # 4-byte Folded Reload
> +; CHECK32_32-NEXT:    lwz 26, 40(1) # 4-byte Folded Reload
> +; CHECK32_32-NEXT:    lwz 25, 36(1) # 4-byte Folded Reload
> +; CHECK32_32-NEXT:    lwz 24, 32(1) # 4-byte Folded Reload
> +; CHECK32_32-NEXT:    lwz 23, 28(1) # 4-byte Folded Reload
> +; CHECK32_32-NEXT:    lwz 22, 24(1) # 4-byte Folded Reload
> +; CHECK32_32-NEXT:    lwz 21, 20(1) # 4-byte Folded Reload
> +; CHECK32_32-NEXT:    lwz 20, 16(1) # 4-byte Folded Reload
> +; CHECK32_32-NEXT:    lwz 19, 12(1) # 4-byte Folded Reload
> +; CHECK32_32-NEXT:    addi 1, 1, 64
> +; CHECK32_32-NEXT:    blr
> +;
> +; CHECK32_64-LABEL: fshl_i128:
> +; CHECK32_64:       # %bb.0:
> +; CHECK32_64-NEXT:    stwu 1, -64(1)
> +; CHECK32_64-NEXT:    lwz 12, 84(1)
> +; CHECK32_64-NEXT:    rotlwi 11, 8, 31
> +; CHECK32_64-NEXT:    rotlwi 10, 10, 31
> +; CHECK32_64-NEXT:    rlwimi 10, 9, 31, 0, 0
> +; CHECK32_64-NEXT:    rlwimi 11, 7, 31, 0, 0
> +; CHECK32_64-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
> +; CHECK32_64-NEXT:    rotlwi 30, 9, 31
> +; CHECK32_64-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
> +; CHECK32_64-NEXT:    not 9, 12
> +; CHECK32_64-NEXT:    rlwimi 30, 8, 31, 0, 0
> +; CHECK32_64-NEXT:    andi. 8, 12, 127
> +; CHECK32_64-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
> +; CHECK32_64-NEXT:    mcrf 1, 0
> +; CHECK32_64-NEXT:    subfic 12, 8, 96
> +; CHECK32_64-NEXT:    addi 0, 8, -64
> +; CHECK32_64-NEXT:    subfic 27, 8, 32
> +; CHECK32_64-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
> +; CHECK32_64-NEXT:    andi. 9, 9, 127
> +; CHECK32_64-NEXT:    srw 12, 6, 12
> +; CHECK32_64-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
> +; CHECK32_64-NEXT:    subfic 25, 8, 64
> +; CHECK32_64-NEXT:    slw 23, 5, 0
> +; CHECK32_64-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
> +; CHECK32_64-NEXT:    addi 26, 8, -32
> +; CHECK32_64-NEXT:    srw 22, 4, 27
> +; CHECK32_64-NEXT:    srwi 7, 7, 1
> +; CHECK32_64-NEXT:    or 12, 23, 12
> +; CHECK32_64-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
> +; CHECK32_64-NEXT:    slw 28, 3, 8
> +; CHECK32_64-NEXT:    srw 23, 6, 25
> +; CHECK32_64-NEXT:    stw 18, 8(1) # 4-byte Folded Spill
> +; CHECK32_64-NEXT:    subfic 18, 9, 32
> +; CHECK32_64-NEXT:    or 28, 28, 22
> +; CHECK32_64-NEXT:    srw 22, 5, 27
> +; CHECK32_64-NEXT:    srw 27, 6, 27
> +; CHECK32_64-NEXT:    stw 20, 16(1) # 4-byte Folded Spill
> +; CHECK32_64-NEXT:    srw 20, 5, 25
> +; CHECK32_64-NEXT:    subfic 25, 25, 32
> +; CHECK32_64-NEXT:    stw 21, 20(1) # 4-byte Folded Spill
> +; CHECK32_64-NEXT:    slw 21, 4, 26
> +; CHECK32_64-NEXT:    slw 26, 6, 26
> +; CHECK32_64-NEXT:    or 28, 28, 21
> +; CHECK32_64-NEXT:    slw 21, 7, 18
> +; CHECK32_64-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
> +; CHECK32_64-NEXT:    slw 24, 5, 8
> +; CHECK32_64-NEXT:    slw 5, 5, 25
> +; CHECK32_64-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
> +; CHECK32_64-NEXT:    addi 29, 8, -96
> +; CHECK32_64-NEXT:    subfic 25, 9, 96
> +; CHECK32_64-NEXT:    slw 29, 6, 29
> +; CHECK32_64-NEXT:    or 27, 24, 27
> +; CHECK32_64-NEXT:    stw 19, 12(1) # 4-byte Folded Spill
> +; CHECK32_64-NEXT:    srw 19, 11, 9
> +; CHECK32_64-NEXT:    addi 24, 9, -64
> +; CHECK32_64-NEXT:    or 12, 12, 29
> +; CHECK32_64-NEXT:    srw 29, 10, 9
> +; CHECK32_64-NEXT:    slw 25, 7, 25
> +; CHECK32_64-NEXT:    or 21, 19, 21
> +; CHECK32_64-NEXT:    srw 19, 11, 24
> +; CHECK32_64-NEXT:    or 5, 23, 5
> +; CHECK32_64-NEXT:    slw 23, 30, 18
> +; CHECK32_64-NEXT:    or 27, 27, 26
> +; CHECK32_64-NEXT:    addi 26, 9, -96
> +; CHECK32_64-NEXT:    or 25, 19, 25
> +; CHECK32_64-NEXT:    lwz 19, 12(1) # 4-byte Folded Reload
> +; CHECK32_64-NEXT:    or 29, 29, 23
> +; CHECK32_64-NEXT:    addi 23, 9, -32
> +; CHECK32_64-NEXT:    srw 26, 7, 26
> +; CHECK32_64-NEXT:    or 28, 28, 20
> +; CHECK32_64-NEXT:    srw 20, 7, 23
> +; CHECK32_64-NEXT:    or 26, 25, 26
> +; CHECK32_64-NEXT:    subfic 25, 9, 64
> +; CHECK32_64-NEXT:    srw 23, 30, 23
> +; CHECK32_64-NEXT:    or 29, 29, 23
> +; CHECK32_64-NEXT:    subfic 23, 25, 32
> +; CHECK32_64-NEXT:    or 5, 5, 22
> +; CHECK32_64-NEXT:    slw 22, 11, 18
> +; CHECK32_64-NEXT:    lwz 18, 8(1) # 4-byte Folded Reload
> +; CHECK32_64-NEXT:    srw 23, 11, 23
> +; CHECK32_64-NEXT:    slw 11, 11, 25
> +; CHECK32_64-NEXT:    slw 25, 7, 25
> +; CHECK32_64-NEXT:    cmplwi 5, 8, 64
> +; CHECK32_64-NEXT:    bc 12, 20, .LBB2_1
> +; CHECK32_64-NEXT:    b .LBB2_2
> +; CHECK32_64-NEXT:  .LBB2_1:
> +; CHECK32_64-NEXT:    addi 12, 28, 0
> +; CHECK32_64-NEXT:  .LBB2_2:
> +; CHECK32_64-NEXT:    lwz 28, 48(1) # 4-byte Folded Reload
> +; CHECK32_64-NEXT:    or 25, 25, 23
> +; CHECK32_64-NEXT:    bc 12, 6, .LBB2_4
> +; CHECK32_64-NEXT:  # %bb.3:
> +; CHECK32_64-NEXT:    ori 3, 12, 0
> +; CHECK32_64-NEXT:    b .LBB2_4
> +; CHECK32_64-NEXT:  .LBB2_4:
> +; CHECK32_64-NEXT:    slw 23, 6, 0
> +; CHECK32_64-NEXT:    slw 6, 6, 8
> +; CHECK32_64-NEXT:    slw 8, 4, 8
> +; CHECK32_64-NEXT:    cmplwi 6, 9, 64
> +; CHECK32_64-NEXT:    or 5, 8, 5
> +; CHECK32_64-NEXT:    bc 12, 20, .LBB2_6
> +; CHECK32_64-NEXT:  # %bb.5:
> +; CHECK32_64-NEXT:    ori 5, 23, 0
> +; CHECK32_64-NEXT:    b .LBB2_6
> +; CHECK32_64-NEXT:  .LBB2_6:
> +; CHECK32_64-NEXT:    lwz 23, 28(1) # 4-byte Folded Reload
> +; CHECK32_64-NEXT:    li 8, 0
> +; CHECK32_64-NEXT:    srw 24, 7, 24
> +; CHECK32_64-NEXT:    bc 12, 6, .LBB2_8
> +; CHECK32_64-NEXT:  # %bb.7:
> +; CHECK32_64-NEXT:    ori 4, 5, 0
> +; CHECK32_64-NEXT:    b .LBB2_8
> +; CHECK32_64-NEXT:  .LBB2_8:
> +; CHECK32_64-NEXT:    bc 12, 20, .LBB2_10
> +; CHECK32_64-NEXT:  # %bb.9:
> +; CHECK32_64-NEXT:    ori 6, 8, 0
> +; CHECK32_64-NEXT:    b .LBB2_10
> +; CHECK32_64-NEXT:  .LBB2_10:
> +; CHECK32_64-NEXT:    srw 7, 7, 9
> +; CHECK32_64-NEXT:    srw 9, 30, 9
> +; CHECK32_64-NEXT:    bc 12, 24, .LBB2_12
> +; CHECK32_64-NEXT:  # %bb.11:
> +; CHECK32_64-NEXT:    ori 7, 8, 0
> +; CHECK32_64-NEXT:    b .LBB2_12
> +; CHECK32_64-NEXT:  .LBB2_12:
> +; CHECK32_64-NEXT:    or 0, 25, 22
> +; CHECK32_64-NEXT:    or 11, 29, 11
> +; CHECK32_64-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
> +; CHECK32_64-NEXT:    bc 12, 24, .LBB2_14
> +; CHECK32_64-NEXT:  # %bb.13:
> +; CHECK32_64-NEXT:    ori 5, 26, 0
> +; CHECK32_64-NEXT:    b .LBB2_15
> +; CHECK32_64-NEXT:  .LBB2_14:
> +; CHECK32_64-NEXT:    addi 5, 11, 0
> +; CHECK32_64-NEXT:  .LBB2_15:
> +; CHECK32_64-NEXT:    or 9, 9, 0
> +; CHECK32_64-NEXT:    or 21, 21, 20
> +; CHECK32_64-NEXT:    bc 12, 2, .LBB2_16
> +; CHECK32_64-NEXT:    b .LBB2_17
> +; CHECK32_64-NEXT:  .LBB2_16:
> +; CHECK32_64-NEXT:    addi 5, 10, 0
> +; CHECK32_64-NEXT:  .LBB2_17:
> +; CHECK32_64-NEXT:    bc 12, 24, .LBB2_19
> +; CHECK32_64-NEXT:  # %bb.18:
> +; CHECK32_64-NEXT:    ori 0, 8, 0
> +; CHECK32_64-NEXT:    b .LBB2_20
> +; CHECK32_64-NEXT:  .LBB2_19:
> +; CHECK32_64-NEXT:    addi 0, 21, 0
> +; CHECK32_64-NEXT:  .LBB2_20:
> +; CHECK32_64-NEXT:    bc 12, 20, .LBB2_21
> +; CHECK32_64-NEXT:    b .LBB2_22
> +; CHECK32_64-NEXT:  .LBB2_21:
> +; CHECK32_64-NEXT:    addi 8, 27, 0
> +; CHECK32_64-NEXT:  .LBB2_22:
> +; CHECK32_64-NEXT:    lwz 27, 44(1) # 4-byte Folded Reload
> +; CHECK32_64-NEXT:    or 3, 3, 7
> +; CHECK32_64-NEXT:    bc 12, 24, .LBB2_24
> +; CHECK32_64-NEXT:  # %bb.23:
> +; CHECK32_64-NEXT:    ori 7, 24, 0
> +; CHECK32_64-NEXT:    b .LBB2_25
> +; CHECK32_64-NEXT:  .LBB2_24:
> +; CHECK32_64-NEXT:    addi 7, 9, 0
> +; CHECK32_64-NEXT:  .LBB2_25:
> +; CHECK32_64-NEXT:    or 4, 4, 0
> +; CHECK32_64-NEXT:    bc 12, 2, .LBB2_26
> +; CHECK32_64-NEXT:    b .LBB2_27
> +; CHECK32_64-NEXT:  .LBB2_26:
> +; CHECK32_64-NEXT:    addi 7, 30, 0
> +; CHECK32_64-NEXT:  .LBB2_27:
> +; CHECK32_64-NEXT:    or 6, 6, 5
> +; CHECK32_64-NEXT:    or 5, 8, 7
> +; CHECK32_64-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
> +; CHECK32_64-NEXT:    lwz 26, 40(1) # 4-byte Folded Reload
> +; CHECK32_64-NEXT:    lwz 25, 36(1) # 4-byte Folded Reload
> +; CHECK32_64-NEXT:    lwz 24, 32(1) # 4-byte Folded Reload
> +; CHECK32_64-NEXT:    lwz 22, 24(1) # 4-byte Folded Reload
> +; CHECK32_64-NEXT:    lwz 21, 20(1) # 4-byte Folded Reload
> +; CHECK32_64-NEXT:    lwz 20, 16(1) # 4-byte Folded Reload
> +; CHECK32_64-NEXT:    addi 1, 1, 64
> +; CHECK32_64-NEXT:    blr
> +;
> +; CHECK64-LABEL: fshl_i128:
> +; CHECK64:       # %bb.0:
> +; CHECK64-NEXT:    clrlwi 8, 7, 25
> +; CHECK64-NEXT:    rotldi 5, 5, 63
> +; CHECK64-NEXT:    not 7, 7
> +; CHECK64-NEXT:    rldicl 9, 6, 63, 1
> +; CHECK64-NEXT:    subfic 10, 8, 64
> +; CHECK64-NEXT:    addi 11, 8, -64
> +; CHECK64-NEXT:    rldimi 5, 6, 63, 0
> +; CHECK64-NEXT:    clrlwi 6, 7, 25
> +; CHECK64-NEXT:    srd 7, 3, 10
> +; CHECK64-NEXT:    sld 10, 3, 11
> +; CHECK64-NEXT:    subfic 11, 6, 64
> +; CHECK64-NEXT:    addi 12, 6, -64
> +; CHECK64-NEXT:    sld 4, 4, 8
> +; CHECK64-NEXT:    srd 5, 5, 6
> +; CHECK64-NEXT:    sld 11, 9, 11
> +; CHECK64-NEXT:    or 4, 4, 7
> +; CHECK64-NEXT:    or 5, 5, 11
> +; CHECK64-NEXT:    srd 7, 9, 12
> +; CHECK64-NEXT:    or 4, 4, 10
> +; CHECK64-NEXT:    srd 6, 9, 6
> +; CHECK64-NEXT:    or 5, 5, 7
> +; CHECK64-NEXT:    sld 3, 3, 8
> +; CHECK64-NEXT:    or 4, 4, 6
> +; CHECK64-NEXT:    or 3, 3, 5
> +; CHECK64-NEXT:    blr
> +  %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
> +  ret i128 %f
> +}
> +
>  ; Verify that weird types are minimally supported.
>  declare i37 @llvm.fshl.i37(i37, i37, i37)
>  define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
>
> diff  --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
> index 2120cb2581b9..8775957f7b7d 100644
> --- a/llvm/test/CodeGen/X86/funnel-shift.ll
> +++ b/llvm/test/CodeGen/X86/funnel-shift.ll
> @@ -7,6 +7,7 @@ declare i16 @llvm.fshl.i16(i16, i16, i16)
>  declare i32 @llvm.fshl.i32(i32, i32, i32)
>  declare i64 @llvm.fshl.i64(i64, i64, i64)
>  declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
> +declare i128 @llvm.fshl.i128(i128, i128, i128)
>
>  declare i8 @llvm.fshr.i8(i8, i8, i8)
>  declare i16 @llvm.fshr.i16(i16, i16, i16)
> @@ -36,6 +37,259 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind {
>    ret i32 %f
>  }
>
> +define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) nounwind {
> +; X32-SSE2-LABEL: fshl_i64:
> +; X32-SSE2:       # %bb.0:
> +; X32-SSE2-NEXT:    pushl %ebp
> +; X32-SSE2-NEXT:    pushl %ebx
> +; X32-SSE2-NEXT:    pushl %edi
> +; X32-SSE2-NEXT:    pushl %esi
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
> +; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %ch
> +; X32-SSE2-NEXT:    movb %ch, %cl
> +; X32-SSE2-NEXT:    notb %cl
> +; X32-SSE2-NEXT:    shrdl $1, %eax, %esi
> +; X32-SSE2-NEXT:    movl %eax, %ebx
> +; X32-SSE2-NEXT:    shrl %ebx
> +; X32-SSE2-NEXT:    shrdl %cl, %ebx, %esi
> +; X32-SSE2-NEXT:    shrl %cl, %ebx
> +; X32-SSE2-NEXT:    xorl %ebp, %ebp
> +; X32-SSE2-NEXT:    testb $32, %cl
> +; X32-SSE2-NEXT:    cmovnel %ebx, %esi
> +; X32-SSE2-NEXT:    cmovnel %ebp, %ebx
> +; X32-SSE2-NEXT:    movl %edi, %eax
> +; X32-SSE2-NEXT:    movb %ch, %cl
> +; X32-SSE2-NEXT:    shll %cl, %eax
> +; X32-SSE2-NEXT:    shldl %cl, %edi, %edx
> +; X32-SSE2-NEXT:    testb $32, %ch
> +; X32-SSE2-NEXT:    cmovnel %eax, %edx
> +; X32-SSE2-NEXT:    cmovnel %ebp, %eax
> +; X32-SSE2-NEXT:    orl %esi, %eax
> +; X32-SSE2-NEXT:    orl %ebx, %edx
> +; X32-SSE2-NEXT:    popl %esi
> +; X32-SSE2-NEXT:    popl %edi
> +; X32-SSE2-NEXT:    popl %ebx
> +; X32-SSE2-NEXT:    popl %ebp
> +; X32-SSE2-NEXT:    retl
> +;
> +; X64-AVX2-LABEL: fshl_i64:
> +; X64-AVX2:       # %bb.0:
> +; X64-AVX2-NEXT:    movq %rdx, %rcx
> +; X64-AVX2-NEXT:    movq %rdi, %rax
> +; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx
> +; X64-AVX2-NEXT:    shldq %cl, %rsi, %rax
> +; X64-AVX2-NEXT:    retq
> +  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
> +  ret i64 %f
> +}
> +
> +define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
> +; X32-SSE2-LABEL: fshl_i128:
> +; X32-SSE2:       # %bb.0:
> +; X32-SSE2-NEXT:    pushl %ebp
> +; X32-SSE2-NEXT:    pushl %ebx
> +; X32-SSE2-NEXT:    pushl %edi
> +; X32-SSE2-NEXT:    pushl %esi
> +; X32-SSE2-NEXT:    subl $64, %esp
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
> +; X32-SSE2-NEXT:    movl %edi, %esi
> +; X32-SSE2-NEXT:    shldl $31, %ecx, %esi
> +; X32-SSE2-NEXT:    notl %edx
> +; X32-SSE2-NEXT:    andl $127, %edx
> +; X32-SSE2-NEXT:    movl %edx, (%esp) # 4-byte Spill
> +; X32-SSE2-NEXT:    movb $64, %cl
> +; X32-SSE2-NEXT:    subb %dl, %cl
> +; X32-SSE2-NEXT:    shrl %edi
> +; X32-SSE2-NEXT:    movl %edi, %ebx
> +; X32-SSE2-NEXT:    shldl %cl, %esi, %ebx
> +; X32-SSE2-NEXT:    movl %esi, %ebp
> +; X32-SSE2-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    shll %cl, %ebp
> +; X32-SSE2-NEXT:    xorl %eax, %eax
> +; X32-SSE2-NEXT:    testb $32, %cl
> +; X32-SSE2-NEXT:    cmovnel %ebp, %ebx
> +; X32-SSE2-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    cmovnel %eax, %ebp
> +; X32-SSE2-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
> +; X32-SSE2-NEXT:    andl $127, %eax
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebp
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
> +; X32-SSE2-NEXT:    movl %eax, %ecx
> +; X32-SSE2-NEXT:    shldl %cl, %ebp, %edx
> +; X32-SSE2-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    movl (%esp), %ecx # 4-byte Reload
> +; X32-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
> +; X32-SSE2-NEXT:    addb $-64, %cl
> +; X32-SSE2-NEXT:    movl %esi, %edx
> +; X32-SSE2-NEXT:    shrdl %cl, %edi, %edx
> +; X32-SSE2-NEXT:    movl %edi, %esi
> +; X32-SSE2-NEXT:    shrl %cl, %esi
> +; X32-SSE2-NEXT:    testb $32, %cl
> +; X32-SSE2-NEXT:    cmovnel %esi, %edx
> +; X32-SSE2-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    movl $0, %ecx
> +; X32-SSE2-NEXT:    cmovnel %ecx, %esi
> +; X32-SSE2-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
> +; X32-SSE2-NEXT:    movl %eax, %ecx
> +; X32-SSE2-NEXT:    shldl %cl, %esi, %ebx
> +; X32-SSE2-NEXT:    movl %esi, %edx
> +; X32-SSE2-NEXT:    shll %cl, %edx
> +; X32-SSE2-NEXT:    shll %cl, %ebp
> +; X32-SSE2-NEXT:    testb $32, %al
> +; X32-SSE2-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
> +; X32-SSE2-NEXT:    cmovnel %ebp, %ecx
> +; X32-SSE2-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    cmovnel %edx, %ebx
> +; X32-SSE2-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    movl $0, %ecx
> +; X32-SSE2-NEXT:    cmovnel %ecx, %ebp
> +; X32-SSE2-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    cmovnel %ecx, %edx
> +; X32-SSE2-NEXT:    xorl %ecx, %ecx
> +; X32-SSE2-NEXT:    cmpl $64, %eax
> +; X32-SSE2-NEXT:    cmovael %ecx, %edx
> +; X32-SSE2-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
> +; X32-SSE2-NEXT:    shldl $31, %eax, %ebx
> +; X32-SSE2-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
> +; X32-SSE2-NEXT:    shrdl $1, %eax, %edx
> +; X32-SSE2-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    movl (%esp), %ecx # 4-byte Reload
> +; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
> +; X32-SSE2-NEXT:    shrdl %cl, %edi, %eax
> +; X32-SSE2-NEXT:    shrl %cl, %edi
> +; X32-SSE2-NEXT:    movl %edx, %ebp
> +; X32-SSE2-NEXT:    shrdl %cl, %ebx, %ebp
> +; X32-SSE2-NEXT:    movl %ebx, %edx
> +; X32-SSE2-NEXT:    shrl %cl, %edx
> +; X32-SSE2-NEXT:    testb $32, %cl
> +; X32-SSE2-NEXT:    cmovnel %edx, %ebp
> +; X32-SSE2-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    cmovnel %edi, %eax
> +; X32-SSE2-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    movl $0, %ebp
> +; X32-SSE2-NEXT:    cmovnel %ebp, %edx
> +; X32-SSE2-NEXT:    cmovnel %ebp, %edi
> +; X32-SSE2-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
> +; X32-SSE2-NEXT:    cmpl $64, %eax
> +; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
> +; X32-SSE2-NEXT:    cmovael %ebp, %ecx
> +; X32-SSE2-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    xorl %ebp, %ebp
> +; X32-SSE2-NEXT:    movb $64, %ch
> +; X32-SSE2-NEXT:    subb %al, %ch
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
> +; X32-SSE2-NEXT:    movb %ch, %cl
> +; X32-SSE2-NEXT:    shrl %cl, %edi
> +; X32-SSE2-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    testb $32, %ch
> +; X32-SSE2-NEXT:    cmovnel %ebp, %edi
> +; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
> +; X32-SSE2-NEXT:    movb %al, %cl
> +; X32-SSE2-NEXT:    addb $-64, %cl
> +; X32-SSE2-NEXT:    movl %esi, %ebp
> +; X32-SSE2-NEXT:    shll %cl, %ebp
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
> +; X32-SSE2-NEXT:    shldl %cl, %esi, %eax
> +; X32-SSE2-NEXT:    testb $32, %cl
> +; X32-SSE2-NEXT:    cmovnel %ebp, %eax
> +; X32-SSE2-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
> +; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
> +; X32-SSE2-NEXT:    movl $0, %esi
> +; X32-SSE2-NEXT:    cmovael %esi, %ebx
> +; X32-SSE2-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
> +; X32-SSE2-NEXT:    cmpl $64, %ebx
> +; X32-SSE2-NEXT:    cmovbl %edi, %eax
> +; X32-SSE2-NEXT:    testb $32, %cl
> +; X32-SSE2-NEXT:    movl $0, %esi
> +; X32-SSE2-NEXT:    cmovnel %esi, %ebp
> +; X32-SSE2-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
> +; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
> +; X32-SSE2-NEXT:    cmovael %esi, %edi
> +; X32-SSE2-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
> +; X32-SSE2-NEXT:    movb %ch, %cl
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
> +; X32-SSE2-NEXT:    shrdl %cl, %edi, %esi
> +; X32-SSE2-NEXT:    testb $32, %ch
> +; X32-SSE2-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
> +; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
> +; X32-SSE2-NEXT:    cmpl $64, %ebx
> +; X32-SSE2-NEXT:    cmovael %ebp, %esi
> +; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
> +; X32-SSE2-NEXT:    movl (%esp), %edi # 4-byte Reload
> +; X32-SSE2-NEXT:    cmpl $64, %edi
> +; X32-SSE2-NEXT:    cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
> +; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
> +; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
> +; X32-SSE2-NEXT:    cmpl $64, %edi
> +; X32-SSE2-NEXT:    cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
> +; X32-SSE2-NEXT:    testl %edi, %edi
> +; X32-SSE2-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
> +; X32-SSE2-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
> +; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
> +; X32-SSE2-NEXT:    movl %ecx, %edi
> +; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
> +; X32-SSE2-NEXT:    testl %ebx, %ebx
> +; X32-SSE2-NEXT:    cmovel {{[0-9]+}}(%esp), %eax
> +; X32-SSE2-NEXT:    cmovel {{[0-9]+}}(%esp), %esi
> +; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
> +; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
> +; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
> +; X32-SSE2-NEXT:    movl %eax, 12(%ecx)
> +; X32-SSE2-NEXT:    movl %esi, 8(%ecx)
> +; X32-SSE2-NEXT:    movl %edx, 4(%ecx)
> +; X32-SSE2-NEXT:    movl %edi, (%ecx)
> +; X32-SSE2-NEXT:    movl %ecx, %eax
> +; X32-SSE2-NEXT:    addl $64, %esp
> +; X32-SSE2-NEXT:    popl %esi
> +; X32-SSE2-NEXT:    popl %edi
> +; X32-SSE2-NEXT:    popl %ebx
> +; X32-SSE2-NEXT:    popl %ebp
> +; X32-SSE2-NEXT:    retl $4
> +;
> +; X64-AVX2-LABEL: fshl_i128:
> +; X64-AVX2:       # %bb.0:
> +; X64-AVX2-NEXT:    movq %r8, %r9
> +; X64-AVX2-NEXT:    movq %rcx, %r10
> +; X64-AVX2-NEXT:    movq %rdx, %r8
> +; X64-AVX2-NEXT:    movq %rsi, %rdx
> +; X64-AVX2-NEXT:    movl %r9d, %ecx
> +; X64-AVX2-NEXT:    shldq %cl, %rdi, %rdx
> +; X64-AVX2-NEXT:    shrdq $1, %r10, %r8
> +; X64-AVX2-NEXT:    shrq %r10
> +; X64-AVX2-NEXT:    notb %cl
> +; X64-AVX2-NEXT:    shrdq %cl, %r10, %r8
> +; X64-AVX2-NEXT:    shrq %cl, %r10
> +; X64-AVX2-NEXT:    xorl %eax, %eax
> +; X64-AVX2-NEXT:    testb $64, %cl
> +; X64-AVX2-NEXT:    cmovneq %r10, %r8
> +; X64-AVX2-NEXT:    cmovneq %rax, %r10
> +; X64-AVX2-NEXT:    movl %r9d, %ecx
> +; X64-AVX2-NEXT:    shlq %cl, %rdi
> +; X64-AVX2-NEXT:    testb $64, %r9b
> +; X64-AVX2-NEXT:    cmovneq %rdi, %rdx
> +; X64-AVX2-NEXT:    cmoveq %rdi, %rax
> +; X64-AVX2-NEXT:    orq %r8, %rax
> +; X64-AVX2-NEXT:    orq %r10, %rdx
> +; X64-AVX2-NEXT:    retq
> +  %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
> +  ret i128 %f
> +}
> +
>  ; Verify that weird types are minimally supported.
>  declare i37 @llvm.fshl.i37(i37, i37, i37)
>  define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
> @@ -916,13 +1170,13 @@ define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind {
>  ; X32-SSE2-NEXT:    shldl $24, %ebx, %edi
>  ; X32-SSE2-NEXT:    xorl %eax, %edi
>  ; X32-SSE2-NEXT:    orl %edi, %ecx
> -; X32-SSE2-NEXT:    jne .LBB44_1
> +; X32-SSE2-NEXT:    jne .LBB46_1
>  ; X32-SSE2-NEXT:  # %bb.2:
>  ; X32-SSE2-NEXT:    popl %esi
>  ; X32-SSE2-NEXT:    popl %edi
>  ; X32-SSE2-NEXT:    popl %ebx
>  ; X32-SSE2-NEXT:    jmp _Z3foov # TAILCALL
> -; X32-SSE2-NEXT:  .LBB44_1:
> +; X32-SSE2-NEXT:  .LBB46_1:
>  ; X32-SSE2-NEXT:    popl %esi
>  ; X32-SSE2-NEXT:    popl %edi
>  ; X32-SSE2-NEXT:    popl %ebx
> @@ -939,10 +1193,10 @@ define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind {
>  ; X64-AVX2-NEXT:    movq (%rsi,%rcx,4), %rcx
>  ; X64-AVX2-NEXT:    shrdq $40, %rdi, %rcx
>  ; X64-AVX2-NEXT:    cmpq %rax, %rcx
> -; X64-AVX2-NEXT:    jne .LBB44_1
> +; X64-AVX2-NEXT:    jne .LBB46_1
>  ; X64-AVX2-NEXT:  # %bb.2:
>  ; X64-AVX2-NEXT:    jmp _Z3foov # TAILCALL
> -; X64-AVX2-NEXT:  .LBB44_1:
> +; X64-AVX2-NEXT:  .LBB46_1:
>  ; X64-AVX2-NEXT:    retq
>    %3 = sext i32 %0 to i64
>    %4 = getelementptr inbounds %struct.S, %struct.S* %1, i64 %3
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list