[llvm] bb850d4 - [AArch64][RISCV][x86] add tests for funnel shift with shift logic; NFC

Mon Feb 21 07:32:50 PST 2022

Author: Sanjay Patel
Date: 2022-02-21T10:24:45-05:00
New Revision: bb850d422b6449d00c999ba4a1f2d1d68a9a2823

URL: https://github.com/llvm/llvm-project/commit/bb850d422b6449d00c999ba4a1f2d1d68a9a2823
DIFF: https://github.com/llvm/llvm-project/commit/bb850d422b6449d00c999ba4a1f2d1d68a9a2823.diff

LOG: [AArch64][RISCV][x86] add tests for funnel shift with shift logic; NFC

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/funnel-shift.ll
    llvm/test/CodeGen/RISCV/rv32zbp.ll
    llvm/test/CodeGen/X86/funnel-shift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index 51dc7ce2d061d..b4b4e37b4cba5 100644

--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -343,3 +343,164 @@ define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
   ret <4 x i32> %f
 }
 
+define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) {
+; CHECK-LABEL: or_shl_fshl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, w2
+; CHECK-NEXT:    mvn w9, w2
+; CHECK-NEXT:    lsr w10, w1, #1
+; CHECK-NEXT:    lsr w9, w10, w9
+; CHECK-NEXT:    lsl w8, w0, w8
+; CHECK-NEXT:    lsl w10, w1, w2
+; CHECK-NEXT:    orr w8, w8, w9
+; CHECK-NEXT:    orr w0, w8, w10
+; CHECK-NEXT:    ret
+  %shy = shl i32 %y, %s
+  %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
+  %or = or i32 %fun, %shy
+  ret i32 %or
+}
+
+define i32 @or_shl_rotl(i32 %x, i32 %y, i32 %s) {
+; CHECK-LABEL: or_shl_rotl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w2
+; CHECK-NEXT:    lsl w9, w0, w2
+; CHECK-NEXT:    ror w8, w1, w8
+; CHECK-NEXT:    orr w0, w8, w9
+; CHECK-NEXT:    ret
+  %shx = shl i32 %x, %s
+  %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
+  %or = or i32 %rot, %shx
+  ret i32 %or
+}
+
+define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) {
+; CHECK-LABEL: or_shl_fshl_commute:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, w2
+; CHECK-NEXT:    mvn w9, w2
+; CHECK-NEXT:    lsr w10, w1, #1
+; CHECK-NEXT:    lsr w9, w10, w9
+; CHECK-NEXT:    lsl w8, w0, w8
+; CHECK-NEXT:    lsl w10, w1, w2
+; CHECK-NEXT:    orr w8, w8, w9
+; CHECK-NEXT:    orr w0, w10, w8
+; CHECK-NEXT:    ret
+  %shy = shl i32 %y, %s
+  %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
+  %or = or i32 %shy, %fun
+  ret i32 %or
+}
+
+define i32 @or_shl_rotl_commute(i32 %x, i32 %y, i32 %s) {
+; CHECK-LABEL: or_shl_rotl_commute:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w8, w2
+; CHECK-NEXT:    lsl w9, w0, w2
+; CHECK-NEXT:    ror w8, w1, w8
+; CHECK-NEXT:    orr w0, w9, w8
+; CHECK-NEXT:    ret
+  %shx = shl i32 %x, %s
+  %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
+  %or = or i32 %shx, %rot
+  ret i32 %or
+}
+
+define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) {
+; CHECK-LABEL: or_lshr_fshr:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, w2
+; CHECK-NEXT:    mvn w9, w2
+; CHECK-NEXT:    lsl w10, w1, #1
+; CHECK-NEXT:    lsr w8, w0, w8
+; CHECK-NEXT:    lsl w9, w10, w9
+; CHECK-NEXT:    lsr w10, w1, w2
+; CHECK-NEXT:    orr w8, w9, w8
+; CHECK-NEXT:    orr w0, w8, w10
+; CHECK-NEXT:    ret
+  %shy = lshr i32 %y, %s
+  %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
+  %or = or i32 %fun, %shy
+  ret i32 %or
+}
+
+define i32 @or_lshr_rotr(i32 %x, i32 %y, i32 %s) {
+; CHECK-LABEL: or_lshr_rotr:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr w8, w0, w2
+; CHECK-NEXT:    ror w9, w1, w2
+; CHECK-NEXT:    orr w0, w9, w8
+; CHECK-NEXT:    ret
+  %shx = lshr i32 %x, %s
+  %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
+  %or = or i32 %rot, %shx
+  ret i32 %or
+}
+
+define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) {
+; CHECK-LABEL: or_lshr_fshr_commute:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, w2
+; CHECK-NEXT:    mvn w9, w2
+; CHECK-NEXT:    lsl w10, w1, #1
+; CHECK-NEXT:    lsr w8, w0, w8
+; CHECK-NEXT:    lsl w9, w10, w9
+; CHECK-NEXT:    lsr w10, w1, w2
+; CHECK-NEXT:    orr w8, w9, w8
+; CHECK-NEXT:    orr w0, w10, w8
+; CHECK-NEXT:    ret
+  %shy = lshr i32 %y, %s
+  %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
+  %or = or i32 %shy, %fun
+  ret i32 %or
+}
+
+define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) {
+; CHECK-LABEL: or_lshr_rotr_commute:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr w8, w0, w2
+; CHECK-NEXT:    ror w9, w1, w2
+; CHECK-NEXT:    orr w0, w8, w9
+; CHECK-NEXT:    ret
+  %shx = lshr i32 %x, %s
+  %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
+  %or = or i32 %shx, %rot
+  ret i32 %or
+}
+
+define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
+; CHECK-LABEL: or_shl_fshl_simplify:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, w2
+; CHECK-NEXT:    mvn w9, w2
+; CHECK-NEXT:    lsr w10, w0, #1
+; CHECK-NEXT:    lsr w9, w10, w9
+; CHECK-NEXT:    lsl w8, w1, w8
+; CHECK-NEXT:    lsl w10, w1, w2
+; CHECK-NEXT:    orr w8, w8, w9
+; CHECK-NEXT:    orr w0, w8, w10
+; CHECK-NEXT:    ret
+  %shy = shl i32 %y, %s
+  %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
+  %or = or i32 %fun, %shy
+  ret i32 %or
+}
+
+define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) {
+; CHECK-LABEL: or_lshr_fshr_simplify:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, w2
+; CHECK-NEXT:    mvn w9, w2
+; CHECK-NEXT:    lsl w10, w0, #1
+; CHECK-NEXT:    lsr w8, w1, w8
+; CHECK-NEXT:    lsl w9, w10, w9
+; CHECK-NEXT:    lsr w10, w1, w2
+; CHECK-NEXT:    orr w8, w9, w8
+; CHECK-NEXT:    orr w0, w10, w8
+; CHECK-NEXT:    ret
+  %shy = lshr i32 %y, %s
+  %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)
+  %or = or i32 %shy, %fun
+  ret i32 %or
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll
index d021b26f45612..7e113d6be7d0a 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll
@@ -2823,3 +2823,259 @@ define i64 @zexth_i64(i64 %a) nounwind {
   %and = and i64 %a, 65535
   ret i64 %and
 }
+
+define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) {
+; RV32I-LABEL: or_shl_fshl:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sll a3, a1, a2
+; RV32I-NEXT:    sll a0, a0, a2
+; RV32I-NEXT:    not a2, a2
+; RV32I-NEXT:    srli a1, a1, 1
+; RV32I-NEXT:    srl a1, a1, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-LABEL: or_shl_fshl:
+; RV32ZBP:       # %bb.0:
+; RV32ZBP-NEXT:    sll a3, a1, a2
+; RV32ZBP-NEXT:    sll a0, a0, a2
+; RV32ZBP-NEXT:    not a2, a2
+; RV32ZBP-NEXT:    srli a1, a1, 1
+; RV32ZBP-NEXT:    srl a1, a1, a2
+; RV32ZBP-NEXT:    or a0, a0, a1
+; RV32ZBP-NEXT:    or a0, a0, a3
+; RV32ZBP-NEXT:    ret
+  %shy = shl i32 %y, %s
+  %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
+  %or = or i32 %fun, %shy
+  ret i32 %or
+}
+
+define i32 @or_shl_rot(i32 %x, i32 %y, i32 %s) {
+; RV32I-LABEL: or_shl_rot:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sll a0, a0, a2
+; RV32I-NEXT:    sll a3, a1, a2
+; RV32I-NEXT:    neg a2, a2
+; RV32I-NEXT:    srl a1, a1, a2
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-LABEL: or_shl_rot:
+; RV32ZBP:       # %bb.0:
+; RV32ZBP-NEXT:    sll a0, a0, a2
+; RV32ZBP-NEXT:    rol a1, a1, a2
+; RV32ZBP-NEXT:    or a0, a1, a0
+; RV32ZBP-NEXT:    ret
+  %shx = shl i32 %x, %s
+  %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
+  %or = or i32 %rot, %shx
+  ret i32 %or
+}
+
+define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) {
+; RV32I-LABEL: or_shl_fshl_commute:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sll a3, a1, a2
+; RV32I-NEXT:    sll a0, a0, a2
+; RV32I-NEXT:    not a2, a2
+; RV32I-NEXT:    srli a1, a1, 1
+; RV32I-NEXT:    srl a1, a1, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-LABEL: or_shl_fshl_commute:
+; RV32ZBP:       # %bb.0:
+; RV32ZBP-NEXT:    sll a3, a1, a2
+; RV32ZBP-NEXT:    sll a0, a0, a2
+; RV32ZBP-NEXT:    not a2, a2
+; RV32ZBP-NEXT:    srli a1, a1, 1
+; RV32ZBP-NEXT:    srl a1, a1, a2
+; RV32ZBP-NEXT:    or a0, a0, a1
+; RV32ZBP-NEXT:    or a0, a3, a0
+; RV32ZBP-NEXT:    ret
+  %shy = shl i32 %y, %s
+  %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
+  %or = or i32 %shy, %fun
+  ret i32 %or
+}
+
+define i32 @or_shl_rot_commute(i32 %x, i32 %y, i32 %s) {
+; RV32I-LABEL: or_shl_rot_commute:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sll a0, a0, a2
+; RV32I-NEXT:    sll a3, a1, a2
+; RV32I-NEXT:    neg a2, a2
+; RV32I-NEXT:    srl a1, a1, a2
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-LABEL: or_shl_rot_commute:
+; RV32ZBP:       # %bb.0:
+; RV32ZBP-NEXT:    sll a0, a0, a2
+; RV32ZBP-NEXT:    rol a1, a1, a2
+; RV32ZBP-NEXT:    or a0, a0, a1
+; RV32ZBP-NEXT:    ret
+  %shx = shl i32 %x, %s
+  %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
+  %or = or i32 %shx, %rot
+  ret i32 %or
+}
+
+define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) {
+; RV32I-LABEL: or_lshr_fshr:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srl a3, a1, a2
+; RV32I-NEXT:    srl a0, a0, a2
+; RV32I-NEXT:    not a2, a2
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    sll a1, a1, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-LABEL: or_lshr_fshr:
+; RV32ZBP:       # %bb.0:
+; RV32ZBP-NEXT:    srl a3, a1, a2
+; RV32ZBP-NEXT:    srl a0, a0, a2
+; RV32ZBP-NEXT:    not a2, a2
+; RV32ZBP-NEXT:    slli a1, a1, 1
+; RV32ZBP-NEXT:    sll a1, a1, a2
+; RV32ZBP-NEXT:    or a0, a1, a0
+; RV32ZBP-NEXT:    or a0, a0, a3
+; RV32ZBP-NEXT:    ret
+  %shy = lshr i32 %y, %s
+  %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
+  %or = or i32 %fun, %shy
+  ret i32 %or
+}
+
+define i32 @or_lshr_rotr(i32 %x, i32 %y, i32 %s) {
+; RV32I-LABEL: or_lshr_rotr:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srl a0, a0, a2
+; RV32I-NEXT:    srl a3, a1, a2
+; RV32I-NEXT:    neg a2, a2
+; RV32I-NEXT:    sll a1, a1, a2
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-LABEL: or_lshr_rotr:
+; RV32ZBP:       # %bb.0:
+; RV32ZBP-NEXT:    srl a0, a0, a2
+; RV32ZBP-NEXT:    ror a1, a1, a2
+; RV32ZBP-NEXT:    or a0, a1, a0
+; RV32ZBP-NEXT:    ret
+  %shx = lshr i32 %x, %s
+  %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
+  %or = or i32 %rot, %shx
+  ret i32 %or
+}
+
+define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) {
+; RV32I-LABEL: or_lshr_fshr_commute:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srl a3, a1, a2
+; RV32I-NEXT:    srl a0, a0, a2
+; RV32I-NEXT:    not a2, a2
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    sll a1, a1, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-LABEL: or_lshr_fshr_commute:
+; RV32ZBP:       # %bb.0:
+; RV32ZBP-NEXT:    srl a3, a1, a2
+; RV32ZBP-NEXT:    srl a0, a0, a2
+; RV32ZBP-NEXT:    not a2, a2
+; RV32ZBP-NEXT:    slli a1, a1, 1
+; RV32ZBP-NEXT:    sll a1, a1, a2
+; RV32ZBP-NEXT:    or a0, a1, a0
+; RV32ZBP-NEXT:    or a0, a3, a0
+; RV32ZBP-NEXT:    ret
+  %shy = lshr i32 %y, %s
+  %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
+  %or = or i32 %shy, %fun
+  ret i32 %or
+}
+
+define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) {
+; RV32I-LABEL: or_lshr_rotr_commute:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srl a0, a0, a2
+; RV32I-NEXT:    srl a3, a1, a2
+; RV32I-NEXT:    neg a2, a2
+; RV32I-NEXT:    sll a1, a1, a2
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-LABEL: or_lshr_rotr_commute:
+; RV32ZBP:       # %bb.0:
+; RV32ZBP-NEXT:    srl a0, a0, a2
+; RV32ZBP-NEXT:    ror a1, a1, a2
+; RV32ZBP-NEXT:    or a0, a0, a1
+; RV32ZBP-NEXT:    ret
+  %shx = lshr i32 %x, %s
+  %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
+  %or = or i32 %shx, %rot
+  ret i32 %or
+}
+
+define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
+; RV32I-LABEL: or_shl_fshl_simplify:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sll a1, a1, a2
+; RV32I-NEXT:    not a2, a2
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    srl a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-LABEL: or_shl_fshl_simplify:
+; RV32ZBP:       # %bb.0:
+; RV32ZBP-NEXT:    sll a1, a1, a2
+; RV32ZBP-NEXT:    not a2, a2
+; RV32ZBP-NEXT:    srli a0, a0, 1
+; RV32ZBP-NEXT:    srl a0, a0, a2
+; RV32ZBP-NEXT:    or a0, a1, a0
+; RV32ZBP-NEXT:    or a0, a0, a1
+; RV32ZBP-NEXT:    ret
+  %shy = shl i32 %y, %s
+  %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
+  %or = or i32 %fun, %shy
+  ret i32 %or
+}
+
+define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) {
+; RV32I-LABEL: or_lshr_fshr_simplify:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srl a1, a1, a2
+; RV32I-NEXT:    not a2, a2
+; RV32I-NEXT:    slli a0, a0, 1
+; RV32I-NEXT:    sll a0, a0, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-LABEL: or_lshr_fshr_simplify:
+; RV32ZBP:       # %bb.0:
+; RV32ZBP-NEXT:    srl a1, a1, a2
+; RV32ZBP-NEXT:    not a2, a2
+; RV32ZBP-NEXT:    slli a0, a0, 1
+; RV32ZBP-NEXT:    sll a0, a0, a2
+; RV32ZBP-NEXT:    or a0, a0, a1
+; RV32ZBP-NEXT:    or a0, a1, a0
+; RV32ZBP-NEXT:    ret
+  %shy = lshr i32 %y, %s
+  %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)
+  %or = or i32 %shy, %fun
+  ret i32 %or
+}

diff  --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index 2577c333c9287..49cf2684c7a82 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -1036,3 +1036,281 @@ define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind {
   ret void
 }
 declare dso_local void @_Z3foov()
+
+define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) nounwind {
+; X86-SSE2-LABEL: or_shl_fshl:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pushl %esi
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT:    movl %edx, %esi
+; X86-SSE2-NEXT:    shll %cl, %esi
+; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
+; X86-SSE2-NEXT:    orl %esi, %eax
+; X86-SSE2-NEXT:    popl %esi
+; X86-SSE2-NEXT:    retl
+;
+; X64-AVX2-LABEL: or_shl_fshl:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    movl %edx, %ecx
+; X64-AVX2-NEXT:    movl %esi, %eax
+; X64-AVX2-NEXT:    shll %cl, %eax
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shldl %cl, %esi, %edi
+; X64-AVX2-NEXT:    orl %edi, %eax
+; X64-AVX2-NEXT:    retq
+  %shy = shl i32 %y, %s
+  %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
+  %or = or i32 %fun, %shy
+  ret i32 %or
+}
+
+define i32 @or_shl_rotl(i32 %x, i32 %y, i32 %s) nounwind {
+; X86-SSE2-LABEL: or_shl_rotl:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT:    shll %cl, %edx
+; X86-SSE2-NEXT:    roll %cl, %eax
+; X86-SSE2-NEXT:    orl %edx, %eax
+; X86-SSE2-NEXT:    retl
+;
+; X64-AVX2-LABEL: or_shl_rotl:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    movl %edx, %ecx
+; X64-AVX2-NEXT:    movl %esi, %eax
+; X64-AVX2-NEXT:    shll %cl, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    roll %cl, %eax
+; X64-AVX2-NEXT:    orl %edi, %eax
+; X64-AVX2-NEXT:    retq
+  %shx = shl i32 %x, %s
+  %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
+  %or = or i32 %rot, %shx
+  ret i32 %or
+}
+
+define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) nounwind {
+; X86-SSE2-LABEL: or_shl_fshl_commute:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pushl %esi
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT:    movl %edx, %esi
+; X86-SSE2-NEXT:    shll %cl, %esi
+; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
+; X86-SSE2-NEXT:    orl %esi, %eax
+; X86-SSE2-NEXT:    popl %esi
+; X86-SSE2-NEXT:    retl
+;
+; X64-AVX2-LABEL: or_shl_fshl_commute:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    movl %edx, %ecx
+; X64-AVX2-NEXT:    movl %esi, %eax
+; X64-AVX2-NEXT:    shll %cl, %eax
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shldl %cl, %esi, %edi
+; X64-AVX2-NEXT:    orl %edi, %eax
+; X64-AVX2-NEXT:    retq
+  %shy = shl i32 %y, %s
+  %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
+  %or = or i32 %shy, %fun
+  ret i32 %or
+}
+
+define i32 @or_shl_rotl_commute(i32 %x, i32 %y, i32 %s) nounwind {
+; X86-SSE2-LABEL: or_shl_rotl_commute:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT:    shll %cl, %edx
+; X86-SSE2-NEXT:    roll %cl, %eax
+; X86-SSE2-NEXT:    orl %edx, %eax
+; X86-SSE2-NEXT:    retl
+;
+; X64-AVX2-LABEL: or_shl_rotl_commute:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    movl %edx, %ecx
+; X64-AVX2-NEXT:    movl %esi, %eax
+; X64-AVX2-NEXT:    shll %cl, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    roll %cl, %eax
+; X64-AVX2-NEXT:    orl %edi, %eax
+; X64-AVX2-NEXT:    retq
+  %shx = shl i32 %x, %s
+  %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
+  %or = or i32 %shx, %rot
+  ret i32 %or
+}
+
+define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) nounwind {
+; X86-SSE2-LABEL: or_lshr_fshr:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pushl %esi
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT:    movl %edx, %esi
+; X86-SSE2-NEXT:    shrl %cl, %esi
+; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
+; X86-SSE2-NEXT:    orl %esi, %eax
+; X86-SSE2-NEXT:    popl %esi
+; X86-SSE2-NEXT:    retl
+;
+; X64-AVX2-LABEL: or_lshr_fshr:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    movl %edx, %ecx
+; X64-AVX2-NEXT:    movl %esi, %eax
+; X64-AVX2-NEXT:    shrl %cl, %eax
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrdl %cl, %esi, %edi
+; X64-AVX2-NEXT:    orl %edi, %eax
+; X64-AVX2-NEXT:    retq
+  %shy = lshr i32 %y, %s
+  %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
+  %or = or i32 %fun, %shy
+  ret i32 %or
+}
+
+define i32 @or_lshr_rotr(i32 %x, i32 %y, i32 %s) nounwind {
+; X86-SSE2-LABEL: or_lshr_rotr:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT:    shrl %cl, %edx
+; X86-SSE2-NEXT:    rorl %cl, %eax
+; X86-SSE2-NEXT:    orl %edx, %eax
+; X86-SSE2-NEXT:    retl
+;
+; X64-AVX2-LABEL: or_lshr_rotr:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    movl %edx, %ecx
+; X64-AVX2-NEXT:    movl %esi, %eax
+; X64-AVX2-NEXT:    shrl %cl, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    rorl %cl, %eax
+; X64-AVX2-NEXT:    orl %edi, %eax
+; X64-AVX2-NEXT:    retq
+  %shx = lshr i32 %x, %s
+  %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
+  %or = or i32 %rot, %shx
+  ret i32 %or
+}
+
+define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) nounwind {
+; X86-SSE2-LABEL: or_lshr_fshr_commute:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pushl %esi
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT:    movl %edx, %esi
+; X86-SSE2-NEXT:    shrl %cl, %esi
+; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
+; X86-SSE2-NEXT:    orl %esi, %eax
+; X86-SSE2-NEXT:    popl %esi
+; X86-SSE2-NEXT:    retl
+;
+; X64-AVX2-LABEL: or_lshr_fshr_commute:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    movl %edx, %ecx
+; X64-AVX2-NEXT:    movl %esi, %eax
+; X64-AVX2-NEXT:    shrl %cl, %eax
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrdl %cl, %esi, %edi
+; X64-AVX2-NEXT:    orl %edi, %eax
+; X64-AVX2-NEXT:    retq
+  %shy = lshr i32 %y, %s
+  %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
+  %or = or i32 %shy, %fun
+  ret i32 %or
+}
+
+define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) nounwind {
+; X86-SSE2-LABEL: or_lshr_rotr_commute:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT:    shrl %cl, %edx
+; X86-SSE2-NEXT:    rorl %cl, %eax
+; X86-SSE2-NEXT:    orl %edx, %eax
+; X86-SSE2-NEXT:    retl
+;
+; X64-AVX2-LABEL: or_lshr_rotr_commute:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    movl %edx, %ecx
+; X64-AVX2-NEXT:    movl %esi, %eax
+; X64-AVX2-NEXT:    shrl %cl, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    rorl %cl, %eax
+; X64-AVX2-NEXT:    orl %edi, %eax
+; X64-AVX2-NEXT:    retq
+  %shx = lshr i32 %x, %s
+  %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
+  %or = or i32 %shx, %rot
+  ret i32 %or
+}
+
+define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind {
+; X86-SSE2-LABEL: or_shl_fshl_simplify:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pushl %esi
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movl %eax, %esi
+; X86-SSE2-NEXT:    shll %cl, %esi
+; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
+; X86-SSE2-NEXT:    orl %esi, %eax
+; X86-SSE2-NEXT:    popl %esi
+; X86-SSE2-NEXT:    retl
+;
+; X64-AVX2-LABEL: or_shl_fshl_simplify:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    movl %edx, %ecx
+; X64-AVX2-NEXT:    movl %esi, %eax
+; X64-AVX2-NEXT:    shll %cl, %eax
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shldl %cl, %edi, %esi
+; X64-AVX2-NEXT:    orl %esi, %eax
+; X64-AVX2-NEXT:    retq
+  %shy = shl i32 %y, %s
+  %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
+  %or = or i32 %fun, %shy
+  ret i32 %or
+}
+
+define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) nounwind {
+; X86-SSE2-LABEL: or_lshr_fshr_simplify:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pushl %esi
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movl %eax, %esi
+; X86-SSE2-NEXT:    shrl %cl, %esi
+; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
+; X86-SSE2-NEXT:    orl %esi, %eax
+; X86-SSE2-NEXT:    popl %esi
+; X86-SSE2-NEXT:    retl
+;
+; X64-AVX2-LABEL: or_lshr_fshr_simplify:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    movl %edx, %ecx
+; X64-AVX2-NEXT:    movl %esi, %eax
+; X64-AVX2-NEXT:    shrl %cl, %eax
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrdl %cl, %edi, %esi
+; X64-AVX2-NEXT:    orl %esi, %eax
+; X64-AVX2-NEXT:    retq
+  %shy = lshr i32 %y, %s
+  %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)
+  %or = or i32 %shy, %fun
+  ret i32 %or
+}