[llvm] 09dcf31 - [NFC] Add tests for i128 fshl on a few targets.

Eli Friedman via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 24 11:44:44 PDT 2021


Author: Eli Friedman
Date: 2021-08-24T11:43:35-07:00
New Revision: 09dcf31d7449f79cfa525a630b7dcc2affc9a930

URL: https://github.com/llvm/llvm-project/commit/09dcf31d7449f79cfa525a630b7dcc2affc9a930
DIFF: https://github.com/llvm/llvm-project/commit/09dcf31d7449f79cfa525a630b7dcc2affc9a930.diff

LOG: [NFC] Add tests for i128 fshl on a few targets.

In preparation for D108058.

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/funnel-shift.ll
    llvm/test/CodeGen/PowerPC/funnel-shift.ll
    llvm/test/CodeGen/X86/funnel-shift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index 7e3b284d0b59..832ee5d39236 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -5,6 +5,7 @@ declare i8 @llvm.fshl.i8(i8, i8, i8)
 declare i16 @llvm.fshl.i16(i16, i16, i16)
 declare i32 @llvm.fshl.i32(i32, i32, i32)
 declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare i128 @llvm.fshl.i128(i128, i128, i128)
 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 
 declare i8 @llvm.fshr.i8(i8, i8, i8)
@@ -42,6 +43,37 @@ define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
   ret i64 %f
 }
 
+define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
+; CHECK-LABEL: fshl_i128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w9, w4
+; CHECK-NEXT:    and x12, x9, #0x7f
+; CHECK-NEXT:    extr x8, x3, x2, #1
+; CHECK-NEXT:    lsr x10, x3, #1
+; CHECK-NEXT:    tst x12, #0x40
+; CHECK-NEXT:    lsr x12, x0, #1
+; CHECK-NEXT:    lsr x8, x8, x9
+; CHECK-NEXT:    lsr x12, x12, x9
+; CHECK-NEXT:    lsr x9, x10, x9
+; CHECK-NEXT:    lsl x10, x10, #1
+; CHECK-NEXT:    lsl x10, x10, x4
+; CHECK-NEXT:    lsl x11, x1, x4
+; CHECK-NEXT:    and x14, x4, #0x7f
+; CHECK-NEXT:    orr x8, x10, x8
+; CHECK-NEXT:    lsl x13, x0, x4
+; CHECK-NEXT:    orr x11, x11, x12
+; CHECK-NEXT:    csel x10, xzr, x9, ne
+; CHECK-NEXT:    csel x8, x9, x8, ne
+; CHECK-NEXT:    tst x14, #0x40
+; CHECK-NEXT:    csel x9, x13, x11, ne
+; CHECK-NEXT:    csel x11, xzr, x13, ne
+; CHECK-NEXT:    orr x1, x9, x10
+; CHECK-NEXT:    orr x0, x11, x8
+; CHECK-NEXT:    ret
+  %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
+  ret i128 %f
+}
+
 ; Verify that weird types are minimally supported.
 declare i37 @llvm.fshl.i37(i37, i37, i37)
 define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {

diff  --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
index 9a4398b90ab4..c33904082f23 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
@@ -7,6 +7,7 @@ declare i8 @llvm.fshl.i8(i8, i8, i8)
 declare i16 @llvm.fshl.i16(i16, i16, i16)
 declare i32 @llvm.fshl.i32(i32, i32, i32)
 declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare i128 @llvm.fshl.i128(i128, i128, i128)
 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 
 declare i8 @llvm.fshr.i8(i8, i8, i8)
@@ -108,6 +109,395 @@ define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
   ret i64 %f
 }
 
+define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
+; CHECK32_32-LABEL: fshl_i128:
+; CHECK32_32:       # %bb.0:
+; CHECK32_32-NEXT:    stwu 1, -64(1)
+; CHECK32_32-NEXT:    lwz 0, 84(1)
+; CHECK32_32-NEXT:    rotlwi 12, 8, 31
+; CHECK32_32-NEXT:    srwi 11, 7, 1
+; CHECK32_32-NEXT:    rlwimi 12, 7, 31, 0, 0
+; CHECK32_32-NEXT:    andi. 7, 0, 127
+; CHECK32_32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
+; CHECK32_32-NEXT:    rotlwi 10, 10, 31
+; CHECK32_32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
+; CHECK32_32-NEXT:    rotlwi 30, 9, 31
+; CHECK32_32-NEXT:    subfic 27, 7, 32
+; CHECK32_32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
+; CHECK32_32-NEXT:    rlwimi 10, 9, 31, 0, 0
+; CHECK32_32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
+; CHECK32_32-NEXT:    rlwimi 30, 8, 31, 0, 0
+; CHECK32_32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; CHECK32_32-NEXT:    not 8, 0
+; CHECK32_32-NEXT:    subfic 9, 7, 96
+; CHECK32_32-NEXT:    addi 0, 7, -64
+; CHECK32_32-NEXT:    slw 28, 3, 7
+; CHECK32_32-NEXT:    subfic 25, 7, 64
+; CHECK32_32-NEXT:    srw 22, 4, 27
+; CHECK32_32-NEXT:    stw 20, 16(1) # 4-byte Folded Spill
+; CHECK32_32-NEXT:    srw 9, 6, 9
+; CHECK32_32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
+; CHECK32_32-NEXT:    slw 23, 5, 0
+; CHECK32_32-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
+; CHECK32_32-NEXT:    addi 29, 7, -96
+; CHECK32_32-NEXT:    srw 20, 5, 25
+; CHECK32_32-NEXT:    or 28, 28, 22
+; CHECK32_32-NEXT:    srw 22, 6, 25
+; CHECK32_32-NEXT:    subfic 25, 25, 32
+; CHECK32_32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
+; CHECK32_32-NEXT:    mcrf 1, 0
+; CHECK32_32-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
+; CHECK32_32-NEXT:    addi 26, 7, -32
+; CHECK32_32-NEXT:    andi. 8, 8, 127
+; CHECK32_32-NEXT:    slw 24, 5, 7
+; CHECK32_32-NEXT:    slw 29, 6, 29
+; CHECK32_32-NEXT:    or 9, 23, 9
+; CHECK32_32-NEXT:    slw 25, 5, 25
+; CHECK32_32-NEXT:    srw 5, 5, 27
+; CHECK32_32-NEXT:    srw 27, 6, 27
+; CHECK32_32-NEXT:    stw 21, 20(1) # 4-byte Folded Spill
+; CHECK32_32-NEXT:    slw 21, 4, 26
+; CHECK32_32-NEXT:    subfic 23, 8, 32
+; CHECK32_32-NEXT:    or 27, 24, 27
+; CHECK32_32-NEXT:    subfic 24, 8, 96
+; CHECK32_32-NEXT:    or 9, 9, 29
+; CHECK32_32-NEXT:    addi 29, 8, -64
+; CHECK32_32-NEXT:    or 25, 22, 25
+; CHECK32_32-NEXT:    stw 19, 12(1) # 4-byte Folded Spill
+; CHECK32_32-NEXT:    srw 19, 12, 8
+; CHECK32_32-NEXT:    or 28, 28, 21
+; CHECK32_32-NEXT:    slw 21, 11, 23
+; CHECK32_32-NEXT:    slw 24, 11, 24
+; CHECK32_32-NEXT:    srw 22, 12, 29
+; CHECK32_32-NEXT:    slw 26, 6, 26
+; CHECK32_32-NEXT:    or 5, 25, 5
+; CHECK32_32-NEXT:    addi 25, 8, -96
+; CHECK32_32-NEXT:    or 21, 19, 21
+; CHECK32_32-NEXT:    srw 19, 10, 8
+; CHECK32_32-NEXT:    or 24, 22, 24
+; CHECK32_32-NEXT:    slw 22, 30, 23
+; CHECK32_32-NEXT:    or 27, 27, 26
+; CHECK32_32-NEXT:    addi 26, 8, -32
+; CHECK32_32-NEXT:    srw 25, 11, 25
+; CHECK32_32-NEXT:    or 22, 19, 22
+; CHECK32_32-NEXT:    or 28, 28, 20
+; CHECK32_32-NEXT:    srw 20, 11, 26
+; CHECK32_32-NEXT:    or 25, 24, 25
+; CHECK32_32-NEXT:    subfic 24, 8, 64
+; CHECK32_32-NEXT:    srw 26, 30, 26
+; CHECK32_32-NEXT:    or 26, 22, 26
+; CHECK32_32-NEXT:    subfic 22, 24, 32
+; CHECK32_32-NEXT:    slw 23, 12, 23
+; CHECK32_32-NEXT:    srw 22, 12, 22
+; CHECK32_32-NEXT:    slw 12, 12, 24
+; CHECK32_32-NEXT:    slw 24, 11, 24
+; CHECK32_32-NEXT:    cmplwi 5, 7, 64
+; CHECK32_32-NEXT:    or 24, 24, 22
+; CHECK32_32-NEXT:    slw 22, 6, 0
+; CHECK32_32-NEXT:    slw 6, 6, 7
+; CHECK32_32-NEXT:    slw 7, 4, 7
+; CHECK32_32-NEXT:    srw 29, 11, 29
+; CHECK32_32-NEXT:    srw 11, 11, 8
+; CHECK32_32-NEXT:    cmplwi 6, 8, 64
+; CHECK32_32-NEXT:    srw 8, 30, 8
+; CHECK32_32-NEXT:    or 5, 7, 5
+; CHECK32_32-NEXT:    or 7, 26, 12
+; CHECK32_32-NEXT:    or 12, 24, 23
+; CHECK32_32-NEXT:    bc 12, 20, .LBB2_1
+; CHECK32_32-NEXT:    b .LBB2_2
+; CHECK32_32-NEXT:  .LBB2_1:
+; CHECK32_32-NEXT:    addi 9, 28, 0
+; CHECK32_32-NEXT:  .LBB2_2:
+; CHECK32_32-NEXT:    li 28, 0
+; CHECK32_32-NEXT:    bc 12, 20, .LBB2_4
+; CHECK32_32-NEXT:  # %bb.3:
+; CHECK32_32-NEXT:    ori 5, 22, 0
+; CHECK32_32-NEXT:    b .LBB2_4
+; CHECK32_32-NEXT:  .LBB2_4:
+; CHECK32_32-NEXT:    bc 12, 24, .LBB2_6
+; CHECK32_32-NEXT:  # %bb.5:
+; CHECK32_32-NEXT:    ori 7, 25, 0
+; CHECK32_32-NEXT:    b .LBB2_6
+; CHECK32_32-NEXT:  .LBB2_6:
+; CHECK32_32-NEXT:    or 8, 8, 12
+; CHECK32_32-NEXT:    or 21, 21, 20
+; CHECK32_32-NEXT:    bc 12, 20, .LBB2_8
+; CHECK32_32-NEXT:  # %bb.7:
+; CHECK32_32-NEXT:    ori 6, 28, 0
+; CHECK32_32-NEXT:    b .LBB2_8
+; CHECK32_32-NEXT:  .LBB2_8:
+; CHECK32_32-NEXT:    bc 12, 6, .LBB2_10
+; CHECK32_32-NEXT:  # %bb.9:
+; CHECK32_32-NEXT:    ori 4, 5, 0
+; CHECK32_32-NEXT:    b .LBB2_10
+; CHECK32_32-NEXT:  .LBB2_10:
+; CHECK32_32-NEXT:    bc 12, 2, .LBB2_12
+; CHECK32_32-NEXT:  # %bb.11:
+; CHECK32_32-NEXT:    ori 5, 7, 0
+; CHECK32_32-NEXT:    b .LBB2_13
+; CHECK32_32-NEXT:  .LBB2_12:
+; CHECK32_32-NEXT:    addi 5, 10, 0
+; CHECK32_32-NEXT:  .LBB2_13:
+; CHECK32_32-NEXT:    bc 12, 24, .LBB2_15
+; CHECK32_32-NEXT:  # %bb.14:
+; CHECK32_32-NEXT:    ori 7, 29, 0
+; CHECK32_32-NEXT:    ori 11, 28, 0
+; CHECK32_32-NEXT:    ori 0, 28, 0
+; CHECK32_32-NEXT:    b .LBB2_16
+; CHECK32_32-NEXT:  .LBB2_15:
+; CHECK32_32-NEXT:    addi 7, 8, 0
+; CHECK32_32-NEXT:    addi 0, 21, 0
+; CHECK32_32-NEXT:  .LBB2_16:
+; CHECK32_32-NEXT:    bc 12, 6, .LBB2_18
+; CHECK32_32-NEXT:  # %bb.17:
+; CHECK32_32-NEXT:    ori 3, 9, 0
+; CHECK32_32-NEXT:    b .LBB2_18
+; CHECK32_32-NEXT:  .LBB2_18:
+; CHECK32_32-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
+; CHECK32_32-NEXT:    or 6, 6, 5
+; CHECK32_32-NEXT:    bc 12, 20, .LBB2_20
+; CHECK32_32-NEXT:  # %bb.19:
+; CHECK32_32-NEXT:    ori 5, 28, 0
+; CHECK32_32-NEXT:    b .LBB2_21
+; CHECK32_32-NEXT:  .LBB2_20:
+; CHECK32_32-NEXT:    addi 5, 27, 0
+; CHECK32_32-NEXT:  .LBB2_21:
+; CHECK32_32-NEXT:    bc 12, 2, .LBB2_22
+; CHECK32_32-NEXT:    b .LBB2_23
+; CHECK32_32-NEXT:  .LBB2_22:
+; CHECK32_32-NEXT:    addi 7, 30, 0
+; CHECK32_32-NEXT:  .LBB2_23:
+; CHECK32_32-NEXT:    or 3, 3, 11
+; CHECK32_32-NEXT:    or 4, 4, 0
+; CHECK32_32-NEXT:    or 5, 5, 7
+; CHECK32_32-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
+; CHECK32_32-NEXT:    lwz 28, 48(1) # 4-byte Folded Reload
+; CHECK32_32-NEXT:    lwz 27, 44(1) # 4-byte Folded Reload
+; CHECK32_32-NEXT:    lwz 26, 40(1) # 4-byte Folded Reload
+; CHECK32_32-NEXT:    lwz 25, 36(1) # 4-byte Folded Reload
+; CHECK32_32-NEXT:    lwz 24, 32(1) # 4-byte Folded Reload
+; CHECK32_32-NEXT:    lwz 23, 28(1) # 4-byte Folded Reload
+; CHECK32_32-NEXT:    lwz 22, 24(1) # 4-byte Folded Reload
+; CHECK32_32-NEXT:    lwz 21, 20(1) # 4-byte Folded Reload
+; CHECK32_32-NEXT:    lwz 20, 16(1) # 4-byte Folded Reload
+; CHECK32_32-NEXT:    lwz 19, 12(1) # 4-byte Folded Reload
+; CHECK32_32-NEXT:    addi 1, 1, 64
+; CHECK32_32-NEXT:    blr
+;
+; CHECK32_64-LABEL: fshl_i128:
+; CHECK32_64:       # %bb.0:
+; CHECK32_64-NEXT:    stwu 1, -64(1)
+; CHECK32_64-NEXT:    lwz 12, 84(1)
+; CHECK32_64-NEXT:    rotlwi 11, 8, 31
+; CHECK32_64-NEXT:    rotlwi 10, 10, 31
+; CHECK32_64-NEXT:    rlwimi 10, 9, 31, 0, 0
+; CHECK32_64-NEXT:    rlwimi 11, 7, 31, 0, 0
+; CHECK32_64-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
+; CHECK32_64-NEXT:    rotlwi 30, 9, 31
+; CHECK32_64-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
+; CHECK32_64-NEXT:    not 9, 12
+; CHECK32_64-NEXT:    rlwimi 30, 8, 31, 0, 0
+; CHECK32_64-NEXT:    andi. 8, 12, 127
+; CHECK32_64-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
+; CHECK32_64-NEXT:    mcrf 1, 0
+; CHECK32_64-NEXT:    subfic 12, 8, 96
+; CHECK32_64-NEXT:    addi 0, 8, -64
+; CHECK32_64-NEXT:    subfic 27, 8, 32
+; CHECK32_64-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
+; CHECK32_64-NEXT:    andi. 9, 9, 127
+; CHECK32_64-NEXT:    srw 12, 6, 12
+; CHECK32_64-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
+; CHECK32_64-NEXT:    subfic 25, 8, 64
+; CHECK32_64-NEXT:    slw 23, 5, 0
+; CHECK32_64-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
+; CHECK32_64-NEXT:    addi 26, 8, -32
+; CHECK32_64-NEXT:    srw 22, 4, 27
+; CHECK32_64-NEXT:    srwi 7, 7, 1
+; CHECK32_64-NEXT:    or 12, 23, 12
+; CHECK32_64-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; CHECK32_64-NEXT:    slw 28, 3, 8
+; CHECK32_64-NEXT:    srw 23, 6, 25
+; CHECK32_64-NEXT:    stw 18, 8(1) # 4-byte Folded Spill
+; CHECK32_64-NEXT:    subfic 18, 9, 32
+; CHECK32_64-NEXT:    or 28, 28, 22
+; CHECK32_64-NEXT:    srw 22, 5, 27
+; CHECK32_64-NEXT:    srw 27, 6, 27
+; CHECK32_64-NEXT:    stw 20, 16(1) # 4-byte Folded Spill
+; CHECK32_64-NEXT:    srw 20, 5, 25
+; CHECK32_64-NEXT:    subfic 25, 25, 32
+; CHECK32_64-NEXT:    stw 21, 20(1) # 4-byte Folded Spill
+; CHECK32_64-NEXT:    slw 21, 4, 26
+; CHECK32_64-NEXT:    slw 26, 6, 26
+; CHECK32_64-NEXT:    or 28, 28, 21
+; CHECK32_64-NEXT:    slw 21, 7, 18
+; CHECK32_64-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
+; CHECK32_64-NEXT:    slw 24, 5, 8
+; CHECK32_64-NEXT:    slw 5, 5, 25
+; CHECK32_64-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
+; CHECK32_64-NEXT:    addi 29, 8, -96
+; CHECK32_64-NEXT:    subfic 25, 9, 96
+; CHECK32_64-NEXT:    slw 29, 6, 29
+; CHECK32_64-NEXT:    or 27, 24, 27
+; CHECK32_64-NEXT:    stw 19, 12(1) # 4-byte Folded Spill
+; CHECK32_64-NEXT:    srw 19, 11, 9
+; CHECK32_64-NEXT:    addi 24, 9, -64
+; CHECK32_64-NEXT:    or 12, 12, 29
+; CHECK32_64-NEXT:    srw 29, 10, 9
+; CHECK32_64-NEXT:    slw 25, 7, 25
+; CHECK32_64-NEXT:    or 21, 19, 21
+; CHECK32_64-NEXT:    srw 19, 11, 24
+; CHECK32_64-NEXT:    or 5, 23, 5
+; CHECK32_64-NEXT:    slw 23, 30, 18
+; CHECK32_64-NEXT:    or 27, 27, 26
+; CHECK32_64-NEXT:    addi 26, 9, -96
+; CHECK32_64-NEXT:    or 25, 19, 25
+; CHECK32_64-NEXT:    lwz 19, 12(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT:    or 29, 29, 23
+; CHECK32_64-NEXT:    addi 23, 9, -32
+; CHECK32_64-NEXT:    srw 26, 7, 26
+; CHECK32_64-NEXT:    or 28, 28, 20
+; CHECK32_64-NEXT:    srw 20, 7, 23
+; CHECK32_64-NEXT:    or 26, 25, 26
+; CHECK32_64-NEXT:    subfic 25, 9, 64
+; CHECK32_64-NEXT:    srw 23, 30, 23
+; CHECK32_64-NEXT:    or 29, 29, 23
+; CHECK32_64-NEXT:    subfic 23, 25, 32
+; CHECK32_64-NEXT:    or 5, 5, 22
+; CHECK32_64-NEXT:    slw 22, 11, 18
+; CHECK32_64-NEXT:    lwz 18, 8(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT:    srw 23, 11, 23
+; CHECK32_64-NEXT:    slw 11, 11, 25
+; CHECK32_64-NEXT:    slw 25, 7, 25
+; CHECK32_64-NEXT:    cmplwi 5, 8, 64
+; CHECK32_64-NEXT:    bc 12, 20, .LBB2_1
+; CHECK32_64-NEXT:    b .LBB2_2
+; CHECK32_64-NEXT:  .LBB2_1:
+; CHECK32_64-NEXT:    addi 12, 28, 0
+; CHECK32_64-NEXT:  .LBB2_2:
+; CHECK32_64-NEXT:    lwz 28, 48(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT:    or 25, 25, 23
+; CHECK32_64-NEXT:    bc 12, 6, .LBB2_4
+; CHECK32_64-NEXT:  # %bb.3:
+; CHECK32_64-NEXT:    ori 3, 12, 0
+; CHECK32_64-NEXT:    b .LBB2_4
+; CHECK32_64-NEXT:  .LBB2_4:
+; CHECK32_64-NEXT:    slw 23, 6, 0
+; CHECK32_64-NEXT:    slw 6, 6, 8
+; CHECK32_64-NEXT:    slw 8, 4, 8
+; CHECK32_64-NEXT:    cmplwi 6, 9, 64
+; CHECK32_64-NEXT:    or 5, 8, 5
+; CHECK32_64-NEXT:    bc 12, 20, .LBB2_6
+; CHECK32_64-NEXT:  # %bb.5:
+; CHECK32_64-NEXT:    ori 5, 23, 0
+; CHECK32_64-NEXT:    b .LBB2_6
+; CHECK32_64-NEXT:  .LBB2_6:
+; CHECK32_64-NEXT:    lwz 23, 28(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT:    li 8, 0
+; CHECK32_64-NEXT:    srw 24, 7, 24
+; CHECK32_64-NEXT:    bc 12, 6, .LBB2_8
+; CHECK32_64-NEXT:  # %bb.7:
+; CHECK32_64-NEXT:    ori 4, 5, 0
+; CHECK32_64-NEXT:    b .LBB2_8
+; CHECK32_64-NEXT:  .LBB2_8:
+; CHECK32_64-NEXT:    bc 12, 20, .LBB2_10
+; CHECK32_64-NEXT:  # %bb.9:
+; CHECK32_64-NEXT:    ori 6, 8, 0
+; CHECK32_64-NEXT:    b .LBB2_10
+; CHECK32_64-NEXT:  .LBB2_10:
+; CHECK32_64-NEXT:    srw 7, 7, 9
+; CHECK32_64-NEXT:    srw 9, 30, 9
+; CHECK32_64-NEXT:    bc 12, 24, .LBB2_12
+; CHECK32_64-NEXT:  # %bb.11:
+; CHECK32_64-NEXT:    ori 7, 8, 0
+; CHECK32_64-NEXT:    b .LBB2_12
+; CHECK32_64-NEXT:  .LBB2_12:
+; CHECK32_64-NEXT:    or 0, 25, 22
+; CHECK32_64-NEXT:    or 11, 29, 11
+; CHECK32_64-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT:    bc 12, 24, .LBB2_14
+; CHECK32_64-NEXT:  # %bb.13:
+; CHECK32_64-NEXT:    ori 5, 26, 0
+; CHECK32_64-NEXT:    b .LBB2_15
+; CHECK32_64-NEXT:  .LBB2_14:
+; CHECK32_64-NEXT:    addi 5, 11, 0
+; CHECK32_64-NEXT:  .LBB2_15:
+; CHECK32_64-NEXT:    or 9, 9, 0
+; CHECK32_64-NEXT:    or 21, 21, 20
+; CHECK32_64-NEXT:    bc 12, 2, .LBB2_16
+; CHECK32_64-NEXT:    b .LBB2_17
+; CHECK32_64-NEXT:  .LBB2_16:
+; CHECK32_64-NEXT:    addi 5, 10, 0
+; CHECK32_64-NEXT:  .LBB2_17:
+; CHECK32_64-NEXT:    bc 12, 24, .LBB2_19
+; CHECK32_64-NEXT:  # %bb.18:
+; CHECK32_64-NEXT:    ori 0, 8, 0
+; CHECK32_64-NEXT:    b .LBB2_20
+; CHECK32_64-NEXT:  .LBB2_19:
+; CHECK32_64-NEXT:    addi 0, 21, 0
+; CHECK32_64-NEXT:  .LBB2_20:
+; CHECK32_64-NEXT:    bc 12, 20, .LBB2_21
+; CHECK32_64-NEXT:    b .LBB2_22
+; CHECK32_64-NEXT:  .LBB2_21:
+; CHECK32_64-NEXT:    addi 8, 27, 0
+; CHECK32_64-NEXT:  .LBB2_22:
+; CHECK32_64-NEXT:    lwz 27, 44(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT:    or 3, 3, 7
+; CHECK32_64-NEXT:    bc 12, 24, .LBB2_24
+; CHECK32_64-NEXT:  # %bb.23:
+; CHECK32_64-NEXT:    ori 7, 24, 0
+; CHECK32_64-NEXT:    b .LBB2_25
+; CHECK32_64-NEXT:  .LBB2_24:
+; CHECK32_64-NEXT:    addi 7, 9, 0
+; CHECK32_64-NEXT:  .LBB2_25:
+; CHECK32_64-NEXT:    or 4, 4, 0
+; CHECK32_64-NEXT:    bc 12, 2, .LBB2_26
+; CHECK32_64-NEXT:    b .LBB2_27
+; CHECK32_64-NEXT:  .LBB2_26:
+; CHECK32_64-NEXT:    addi 7, 30, 0
+; CHECK32_64-NEXT:  .LBB2_27:
+; CHECK32_64-NEXT:    or 6, 6, 5
+; CHECK32_64-NEXT:    or 5, 8, 7
+; CHECK32_64-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT:    lwz 26, 40(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT:    lwz 25, 36(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT:    lwz 24, 32(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT:    lwz 22, 24(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT:    lwz 21, 20(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT:    lwz 20, 16(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT:    addi 1, 1, 64
+; CHECK32_64-NEXT:    blr
+;
+; CHECK64-LABEL: fshl_i128:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    clrlwi 8, 7, 25
+; CHECK64-NEXT:    rotldi 5, 5, 63
+; CHECK64-NEXT:    not 7, 7
+; CHECK64-NEXT:    rldicl 9, 6, 63, 1
+; CHECK64-NEXT:    subfic 10, 8, 64
+; CHECK64-NEXT:    addi 11, 8, -64
+; CHECK64-NEXT:    rldimi 5, 6, 63, 0
+; CHECK64-NEXT:    clrlwi 6, 7, 25
+; CHECK64-NEXT:    srd 7, 3, 10
+; CHECK64-NEXT:    sld 10, 3, 11
+; CHECK64-NEXT:    subfic 11, 6, 64
+; CHECK64-NEXT:    addi 12, 6, -64
+; CHECK64-NEXT:    sld 4, 4, 8
+; CHECK64-NEXT:    srd 5, 5, 6
+; CHECK64-NEXT:    sld 11, 9, 11
+; CHECK64-NEXT:    or 4, 4, 7
+; CHECK64-NEXT:    or 5, 5, 11
+; CHECK64-NEXT:    srd 7, 9, 12
+; CHECK64-NEXT:    or 4, 4, 10
+; CHECK64-NEXT:    srd 6, 9, 6
+; CHECK64-NEXT:    or 5, 5, 7
+; CHECK64-NEXT:    sld 3, 3, 8
+; CHECK64-NEXT:    or 4, 4, 6
+; CHECK64-NEXT:    or 3, 3, 5
+; CHECK64-NEXT:    blr
+  %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
+  ret i128 %f
+}
+
 ; Verify that weird types are minimally supported.
 declare i37 @llvm.fshl.i37(i37, i37, i37)
 define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {

diff  --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index 2120cb2581b9..8775957f7b7d 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -7,6 +7,7 @@ declare i16 @llvm.fshl.i16(i16, i16, i16)
 declare i32 @llvm.fshl.i32(i32, i32, i32)
 declare i64 @llvm.fshl.i64(i64, i64, i64)
 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare i128 @llvm.fshl.i128(i128, i128, i128)
 
 declare i8 @llvm.fshr.i8(i8, i8, i8)
 declare i16 @llvm.fshr.i16(i16, i16, i16)
@@ -36,6 +37,259 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind {
   ret i32 %f
 }
 
+define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) nounwind {
+; X32-SSE2-LABEL: fshl_i64:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    pushl %ebp
+; X32-SSE2-NEXT:    pushl %ebx
+; X32-SSE2-NEXT:    pushl %edi
+; X32-SSE2-NEXT:    pushl %esi
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X32-SSE2-NEXT:    movb %ch, %cl
+; X32-SSE2-NEXT:    notb %cl
+; X32-SSE2-NEXT:    shrdl $1, %eax, %esi
+; X32-SSE2-NEXT:    movl %eax, %ebx
+; X32-SSE2-NEXT:    shrl %ebx
+; X32-SSE2-NEXT:    shrdl %cl, %ebx, %esi
+; X32-SSE2-NEXT:    shrl %cl, %ebx
+; X32-SSE2-NEXT:    xorl %ebp, %ebp
+; X32-SSE2-NEXT:    testb $32, %cl
+; X32-SSE2-NEXT:    cmovnel %ebx, %esi
+; X32-SSE2-NEXT:    cmovnel %ebp, %ebx
+; X32-SSE2-NEXT:    movl %edi, %eax
+; X32-SSE2-NEXT:    movb %ch, %cl
+; X32-SSE2-NEXT:    shll %cl, %eax
+; X32-SSE2-NEXT:    shldl %cl, %edi, %edx
+; X32-SSE2-NEXT:    testb $32, %ch
+; X32-SSE2-NEXT:    cmovnel %eax, %edx
+; X32-SSE2-NEXT:    cmovnel %ebp, %eax
+; X32-SSE2-NEXT:    orl %esi, %eax
+; X32-SSE2-NEXT:    orl %ebx, %edx
+; X32-SSE2-NEXT:    popl %esi
+; X32-SSE2-NEXT:    popl %edi
+; X32-SSE2-NEXT:    popl %ebx
+; X32-SSE2-NEXT:    popl %ebp
+; X32-SSE2-NEXT:    retl
+;
+; X64-AVX2-LABEL: fshl_i64:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    movq %rdx, %rcx
+; X64-AVX2-NEXT:    movq %rdi, %rax
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-AVX2-NEXT:    shldq %cl, %rsi, %rax
+; X64-AVX2-NEXT:    retq
+  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
+  ret i64 %f
+}
+
+define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
+; X32-SSE2-LABEL: fshl_i128:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    pushl %ebp
+; X32-SSE2-NEXT:    pushl %ebx
+; X32-SSE2-NEXT:    pushl %edi
+; X32-SSE2-NEXT:    pushl %esi
+; X32-SSE2-NEXT:    subl $64, %esp
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X32-SSE2-NEXT:    movl %edi, %esi
+; X32-SSE2-NEXT:    shldl $31, %ecx, %esi
+; X32-SSE2-NEXT:    notl %edx
+; X32-SSE2-NEXT:    andl $127, %edx
+; X32-SSE2-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X32-SSE2-NEXT:    movb $64, %cl
+; X32-SSE2-NEXT:    subb %dl, %cl
+; X32-SSE2-NEXT:    shrl %edi
+; X32-SSE2-NEXT:    movl %edi, %ebx
+; X32-SSE2-NEXT:    shldl %cl, %esi, %ebx
+; X32-SSE2-NEXT:    movl %esi, %ebp
+; X32-SSE2-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    shll %cl, %ebp
+; X32-SSE2-NEXT:    xorl %eax, %eax
+; X32-SSE2-NEXT:    testb $32, %cl
+; X32-SSE2-NEXT:    cmovnel %ebp, %ebx
+; X32-SSE2-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    cmovnel %eax, %ebp
+; X32-SSE2-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    andl $127, %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shldl %cl, %ebp, %edx
+; X32-SSE2-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X32-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X32-SSE2-NEXT:    addb $-64, %cl
+; X32-SSE2-NEXT:    movl %esi, %edx
+; X32-SSE2-NEXT:    shrdl %cl, %edi, %edx
+; X32-SSE2-NEXT:    movl %edi, %esi
+; X32-SSE2-NEXT:    shrl %cl, %esi
+; X32-SSE2-NEXT:    testb $32, %cl
+; X32-SSE2-NEXT:    cmovnel %esi, %edx
+; X32-SSE2-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    movl $0, %ecx
+; X32-SSE2-NEXT:    cmovnel %ecx, %esi
+; X32-SSE2-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shldl %cl, %esi, %ebx
+; X32-SSE2-NEXT:    movl %esi, %edx
+; X32-SSE2-NEXT:    shll %cl, %edx
+; X32-SSE2-NEXT:    shll %cl, %ebp
+; X32-SSE2-NEXT:    testb $32, %al
+; X32-SSE2-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-SSE2-NEXT:    cmovnel %ebp, %ecx
+; X32-SSE2-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    cmovnel %edx, %ebx
+; X32-SSE2-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    movl $0, %ecx
+; X32-SSE2-NEXT:    cmovnel %ecx, %ebp
+; X32-SSE2-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    cmovnel %ecx, %edx
+; X32-SSE2-NEXT:    xorl %ecx, %ecx
+; X32-SSE2-NEXT:    cmpl $64, %eax
+; X32-SSE2-NEXT:    cmovael %ecx, %edx
+; X32-SSE2-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X32-SSE2-NEXT:    shldl $31, %eax, %ebx
+; X32-SSE2-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-SSE2-NEXT:    shrdl $1, %eax, %edx
+; X32-SSE2-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-SSE2-NEXT:    shrdl %cl, %edi, %eax
+; X32-SSE2-NEXT:    shrl %cl, %edi
+; X32-SSE2-NEXT:    movl %edx, %ebp
+; X32-SSE2-NEXT:    shrdl %cl, %ebx, %ebp
+; X32-SSE2-NEXT:    movl %ebx, %edx
+; X32-SSE2-NEXT:    shrl %cl, %edx
+; X32-SSE2-NEXT:    testb $32, %cl
+; X32-SSE2-NEXT:    cmovnel %edx, %ebp
+; X32-SSE2-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    cmovnel %edi, %eax
+; X32-SSE2-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    movl $0, %ebp
+; X32-SSE2-NEXT:    cmovnel %ebp, %edx
+; X32-SSE2-NEXT:    cmovnel %ebp, %edi
+; X32-SSE2-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-SSE2-NEXT:    cmpl $64, %eax
+; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-SSE2-NEXT:    cmovael %ebp, %ecx
+; X32-SSE2-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    xorl %ebp, %ebp
+; X32-SSE2-NEXT:    movb $64, %ch
+; X32-SSE2-NEXT:    subb %al, %ch
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X32-SSE2-NEXT:    movb %ch, %cl
+; X32-SSE2-NEXT:    shrl %cl, %edi
+; X32-SSE2-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    testb $32, %ch
+; X32-SSE2-NEXT:    cmovnel %ebp, %edi
+; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-SSE2-NEXT:    movb %al, %cl
+; X32-SSE2-NEXT:    addb $-64, %cl
+; X32-SSE2-NEXT:    movl %esi, %ebp
+; X32-SSE2-NEXT:    shll %cl, %ebp
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    shldl %cl, %esi, %eax
+; X32-SSE2-NEXT:    testb $32, %cl
+; X32-SSE2-NEXT:    cmovnel %ebp, %eax
+; X32-SSE2-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-SSE2-NEXT:    movl $0, %esi
+; X32-SSE2-NEXT:    cmovael %esi, %ebx
+; X32-SSE2-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-SSE2-NEXT:    cmpl $64, %ebx
+; X32-SSE2-NEXT:    cmovbl %edi, %eax
+; X32-SSE2-NEXT:    testb $32, %cl
+; X32-SSE2-NEXT:    movl $0, %esi
+; X32-SSE2-NEXT:    cmovnel %esi, %ebp
+; X32-SSE2-NEXT:    cmpl $64, (%esp) # 4-byte Folded Reload
+; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-SSE2-NEXT:    cmovael %esi, %edi
+; X32-SSE2-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-SSE2-NEXT:    movb %ch, %cl
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X32-SSE2-NEXT:    shrdl %cl, %edi, %esi
+; X32-SSE2-NEXT:    testb $32, %ch
+; X32-SSE2-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-SSE2-NEXT:    cmpl $64, %ebx
+; X32-SSE2-NEXT:    cmovael %ebp, %esi
+; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-SSE2-NEXT:    movl (%esp), %edi # 4-byte Reload
+; X32-SSE2-NEXT:    cmpl $64, %edi
+; X32-SSE2-NEXT:    cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-SSE2-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-SSE2-NEXT:    cmpl $64, %edi
+; X32-SSE2-NEXT:    cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-SSE2-NEXT:    testl %edi, %edi
+; X32-SSE2-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-SSE2-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-SSE2-NEXT:    movl %ecx, %edi
+; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-SSE2-NEXT:    testl %ebx, %ebx
+; X32-SSE2-NEXT:    cmovel {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    cmovel {{[0-9]+}}(%esp), %esi
+; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-SSE2-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE2-NEXT:    movl %eax, 12(%ecx)
+; X32-SSE2-NEXT:    movl %esi, 8(%ecx)
+; X32-SSE2-NEXT:    movl %edx, 4(%ecx)
+; X32-SSE2-NEXT:    movl %edi, (%ecx)
+; X32-SSE2-NEXT:    movl %ecx, %eax
+; X32-SSE2-NEXT:    addl $64, %esp
+; X32-SSE2-NEXT:    popl %esi
+; X32-SSE2-NEXT:    popl %edi
+; X32-SSE2-NEXT:    popl %ebx
+; X32-SSE2-NEXT:    popl %ebp
+; X32-SSE2-NEXT:    retl $4
+;
+; X64-AVX2-LABEL: fshl_i128:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    movq %r8, %r9
+; X64-AVX2-NEXT:    movq %rcx, %r10
+; X64-AVX2-NEXT:    movq %rdx, %r8
+; X64-AVX2-NEXT:    movq %rsi, %rdx
+; X64-AVX2-NEXT:    movl %r9d, %ecx
+; X64-AVX2-NEXT:    shldq %cl, %rdi, %rdx
+; X64-AVX2-NEXT:    shrdq $1, %r10, %r8
+; X64-AVX2-NEXT:    shrq %r10
+; X64-AVX2-NEXT:    notb %cl
+; X64-AVX2-NEXT:    shrdq %cl, %r10, %r8
+; X64-AVX2-NEXT:    shrq %cl, %r10
+; X64-AVX2-NEXT:    xorl %eax, %eax
+; X64-AVX2-NEXT:    testb $64, %cl
+; X64-AVX2-NEXT:    cmovneq %r10, %r8
+; X64-AVX2-NEXT:    cmovneq %rax, %r10
+; X64-AVX2-NEXT:    movl %r9d, %ecx
+; X64-AVX2-NEXT:    shlq %cl, %rdi
+; X64-AVX2-NEXT:    testb $64, %r9b
+; X64-AVX2-NEXT:    cmovneq %rdi, %rdx
+; X64-AVX2-NEXT:    cmoveq %rdi, %rax
+; X64-AVX2-NEXT:    orq %r8, %rax
+; X64-AVX2-NEXT:    orq %r10, %rdx
+; X64-AVX2-NEXT:    retq
+  %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
+  ret i128 %f
+}
+
 ; Verify that weird types are minimally supported.
 declare i37 @llvm.fshl.i37(i37, i37, i37)
 define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
@@ -916,13 +1170,13 @@ define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind {
 ; X32-SSE2-NEXT:    shldl $24, %ebx, %edi
 ; X32-SSE2-NEXT:    xorl %eax, %edi
 ; X32-SSE2-NEXT:    orl %edi, %ecx
-; X32-SSE2-NEXT:    jne .LBB44_1
+; X32-SSE2-NEXT:    jne .LBB46_1
 ; X32-SSE2-NEXT:  # %bb.2:
 ; X32-SSE2-NEXT:    popl %esi
 ; X32-SSE2-NEXT:    popl %edi
 ; X32-SSE2-NEXT:    popl %ebx
 ; X32-SSE2-NEXT:    jmp _Z3foov # TAILCALL
-; X32-SSE2-NEXT:  .LBB44_1:
+; X32-SSE2-NEXT:  .LBB46_1:
 ; X32-SSE2-NEXT:    popl %esi
 ; X32-SSE2-NEXT:    popl %edi
 ; X32-SSE2-NEXT:    popl %ebx
@@ -939,10 +1193,10 @@ define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind {
 ; X64-AVX2-NEXT:    movq (%rsi,%rcx,4), %rcx
 ; X64-AVX2-NEXT:    shrdq $40, %rdi, %rcx
 ; X64-AVX2-NEXT:    cmpq %rax, %rcx
-; X64-AVX2-NEXT:    jne .LBB44_1
+; X64-AVX2-NEXT:    jne .LBB46_1
 ; X64-AVX2-NEXT:  # %bb.2:
 ; X64-AVX2-NEXT:    jmp _Z3foov # TAILCALL
-; X64-AVX2-NEXT:  .LBB44_1:
+; X64-AVX2-NEXT:  .LBB46_1:
 ; X64-AVX2-NEXT:    retq
   %3 = sext i32 %0 to i64
   %4 = getelementptr inbounds %struct.S, %struct.S* %1, i64 %3


        


More information about the llvm-commits mailing list