[llvm] 2ed1598 - [SDAG] try to reduce compare of funnel shift equal 0
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 11 04:50:32 PDT 2022
Author: Sanjay Patel
Date: 2022-04-11T07:44:58-04:00
New Revision: 2ed15984b49a1af87be37ec8bd6ee3ab7f724767
URL: https://github.com/llvm/llvm-project/commit/2ed15984b49a1af87be37ec8bd6ee3ab7f724767
DIFF: https://github.com/llvm/llvm-project/commit/2ed15984b49a1af87be37ec8bd6ee3ab7f724767.diff
LOG: [SDAG] try to reduce compare of funnel shift equal 0
fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
This is similar to an existing setcc-of-rotate fold, but the
matching requires more checks for the more general funnel op:
https://alive2.llvm.org/ce/z/Ab2jDd
We are effectively decomposing the funnel shift into logical
shifts, reassociating, and removing a shift.
This should get us the final improvements for x86-64 that were
originally shown in D111530
( https://github.com/llvm/llvm-project/issues/49541 );
x86-32 still shows some SHLD/SHRD, so the pattern is not
matching there yet.
Differential Revision: https://reviews.llvm.org/D122919
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AArch64/setcc-fsh.ll
llvm/test/CodeGen/X86/icmp-shift-opt.ll
llvm/test/CodeGen/X86/setcc-fsh.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 58f5ae44ed52a..d38a5a154d688 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3875,6 +3875,72 @@ static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
return SDValue();
}
+static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &dl,
+ SelectionDAG &DAG) {
+ // If we are testing for all-bits-clear, we might be able to do that with
+ // less shifting since bit-order does not matter.
+ if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
+ return SDValue();
+
+ auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
+ if (!C1 || !C1->isZero())
+ return SDValue();
+
+ if (!N0.hasOneUse() ||
+ (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
+ return SDValue();
+
+ unsigned BitWidth = N0.getScalarValueSizeInBits();
+ auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
+ if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
+ return SDValue();
+
+ // Canonicalize fshr as fshl to reduce pattern-matching.
+ unsigned ShAmt = ShAmtC->getZExtValue();
+ if (N0.getOpcode() == ISD::FSHR)
+ ShAmt = BitWidth - ShAmt;
+
+ // Match an 'or' with a specific operand 'Other' in either commuted variant.
+ SDValue X, Y;
+ auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
+ if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
+ return false;
+ if (Or.getOperand(0) == Other) {
+ X = Or.getOperand(0);
+ Y = Or.getOperand(1);
+ return true;
+ }
+ if (Or.getOperand(1) == Other) {
+ X = Or.getOperand(1);
+ Y = Or.getOperand(0);
+ return true;
+ }
+ return false;
+ };
+
+ EVT OpVT = N0.getValueType();
+ EVT ShAmtVT = N0.getOperand(2).getValueType();
+ SDValue F0 = N0.getOperand(0);
+ SDValue F1 = N0.getOperand(1);
+ if (matchOr(F0, F1)) {
+ // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
+ SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
+ SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+ if (matchOr(F1, F0)) {
+ // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
+ SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+
+ return SDValue();
+}
+
/// Try to simplify a setcc built with the specified operands and cc. If it is
/// unable to simplify it, return a null SDValue.
SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -3914,6 +3980,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
return V;
+ if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
+ return V;
+
if (auto *N1C = isConstOrConstSplat(N1)) {
const APInt &C1 = N1C->getAPIntValue();
diff --git a/llvm/test/CodeGen/AArch64/setcc-fsh.ll b/llvm/test/CodeGen/AArch64/setcc-fsh.ll
index d70f0b42bbc66..aaf3a6c178135 100644
--- a/llvm/test/CodeGen/AArch64/setcc-fsh.ll
+++ b/llvm/test/CodeGen/AArch64/setcc-fsh.ll
@@ -9,8 +9,7 @@ declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
define i1 @fshl_or_eq_0(i32 %x, i32 %y) {
; CHECK-LABEL: fshl_or_eq_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: ror w8, w0, #27
-; CHECK-NEXT: orr w8, w8, w1, lsl #5
+; CHECK-NEXT: orr w8, w0, w1, lsl #5
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
@@ -23,8 +22,7 @@ define i1 @fshl_or_eq_0(i32 %x, i32 %y) {
define i1 @fshl_or_commute_eq_0(i32 %x, i32 %y) {
; CHECK-LABEL: fshl_or_commute_eq_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: ror w8, w0, #27
-; CHECK-NEXT: orr w8, w8, w1, lsl #5
+; CHECK-NEXT: orr w8, w0, w1, lsl #5
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
@@ -37,10 +35,8 @@ define i1 @fshl_or_commute_eq_0(i32 %x, i32 %y) {
define <4 x i1> @fshl_or2_eq_0(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: fshl_or2_eq_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr v1.16b, v0.16b, v1.16b
-; CHECK-NEXT: shl v0.4s, v0.4s, #25
; CHECK-NEXT: ushr v1.4s, v1.4s, #7
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
@@ -53,10 +49,8 @@ define <4 x i1> @fshl_or2_eq_0(<4 x i32> %x, <4 x i32> %y) {
define <4 x i1> @fshl_or2_commute_eq_0(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: fshl_or2_commute_eq_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr v1.16b, v1.16b, v0.16b
-; CHECK-NEXT: shl v0.4s, v0.4s, #25
; CHECK-NEXT: ushr v1.4s, v1.4s, #7
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
@@ -69,9 +63,7 @@ define <4 x i1> @fshl_or2_commute_eq_0(<4 x i32> %x, <4 x i32> %y) {
define i1 @fshr_or_eq_0(i16 %x, i16 %y) {
; CHECK-LABEL: fshr_or_eq_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: lsl w8, w0, #16
-; CHECK-NEXT: orr w9, w0, w1
-; CHECK-NEXT: extr w8, w9, w8, #24
+; CHECK-NEXT: orr w8, w0, w1, lsl #8
; CHECK-NEXT: tst w8, #0xffff
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
@@ -84,9 +76,7 @@ define i1 @fshr_or_eq_0(i16 %x, i16 %y) {
define i1 @fshr_or_commute_eq_0(i16 %x, i16 %y) {
; CHECK-LABEL: fshr_or_commute_eq_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: lsl w8, w0, #16
-; CHECK-NEXT: orr w9, w1, w0
-; CHECK-NEXT: extr w8, w9, w8, #24
+; CHECK-NEXT: orr w8, w0, w1, lsl #8
; CHECK-NEXT: tst w8, #0xffff
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
@@ -99,8 +89,7 @@ define i1 @fshr_or_commute_eq_0(i16 %x, i16 %y) {
define i1 @fshr_or2_eq_0(i64 %x, i64 %y) {
; CHECK-LABEL: fshr_or2_eq_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: ror x8, x0, #3
-; CHECK-NEXT: orr x8, x8, x1, lsr #3
+; CHECK-NEXT: orr x8, x0, x1, lsr #3
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
@@ -113,8 +102,7 @@ define i1 @fshr_or2_eq_0(i64 %x, i64 %y) {
define i1 @fshl_or_ne_0(i32 %x, i32 %y) {
; CHECK-LABEL: fshl_or_ne_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: ror w8, w0, #25
-; CHECK-NEXT: orr w8, w8, w1, lsl #7
+; CHECK-NEXT: orr w8, w0, w1, lsl #7
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
@@ -127,8 +115,7 @@ define i1 @fshl_or_ne_0(i32 %x, i32 %y) {
define i1 @fshl_or_commute_ne_0(i32 %x, i32 %y) {
; CHECK-LABEL: fshl_or_commute_ne_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: ror w8, w0, #25
-; CHECK-NEXT: orr w8, w8, w1, lsl #7
+; CHECK-NEXT: orr w8, w0, w1, lsl #7
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
@@ -141,10 +128,8 @@ define i1 @fshl_or_commute_ne_0(i32 %x, i32 %y) {
define <4 x i1> @fshl_or2_ne_0(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: fshl_or2_ne_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr v1.16b, v0.16b, v1.16b
-; CHECK-NEXT: shl v0.4s, v0.4s, #5
; CHECK-NEXT: ushr v1.4s, v1.4s, #27
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: cmtst v0.4s, v0.4s, v0.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
@@ -157,10 +142,8 @@ define <4 x i1> @fshl_or2_ne_0(<4 x i32> %x, <4 x i32> %y) {
define <4 x i1> @fshl_or2_commute_ne_0(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: fshl_or2_commute_ne_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr v1.16b, v1.16b, v0.16b
-; CHECK-NEXT: shl v0.4s, v0.4s, #5
; CHECK-NEXT: ushr v1.4s, v1.4s, #27
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: cmtst v0.4s, v0.4s, v0.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
@@ -173,8 +156,7 @@ define <4 x i1> @fshl_or2_commute_ne_0(<4 x i32> %x, <4 x i32> %y) {
define i1 @fshr_or_ne_0(i64 %x, i64 %y) {
; CHECK-LABEL: fshr_or_ne_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr w8, w0, w1
-; CHECK-NEXT: extr x8, x8, x0, #1
+; CHECK-NEXT: orr x8, x0, x1, lsl #63
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
@@ -187,8 +169,7 @@ define i1 @fshr_or_ne_0(i64 %x, i64 %y) {
define i1 @fshr_or_commute_ne_0(i64 %x, i64 %y) {
; CHECK-LABEL: fshr_or_commute_ne_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr w8, w1, w0
-; CHECK-NEXT: extr x8, x8, x0, #1
+; CHECK-NEXT: orr x8, x0, x1, lsl #63
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
@@ -201,9 +182,8 @@ define i1 @fshr_or_commute_ne_0(i64 %x, i64 %y) {
define i1 @fshr_or2_ne_0(i16 %x, i16 %y) {
; CHECK-LABEL: fshr_or2_ne_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr w8, w0, w1
-; CHECK-NEXT: lsl w8, w8, #16
-; CHECK-NEXT: extr w8, w0, w8, #18
+; CHECK-NEXT: and w8, w1, #0xfffc
+; CHECK-NEXT: orr w8, w0, w8, lsr #2
; CHECK-NEXT: tst w8, #0xffff
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
@@ -216,9 +196,8 @@ define i1 @fshr_or2_ne_0(i16 %x, i16 %y) {
define i1 @fshr_or2_commute_ne_0(i16 %x, i16 %y) {
; CHECK-LABEL: fshr_or2_commute_ne_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr w8, w1, w0
-; CHECK-NEXT: lsl w8, w8, #16
-; CHECK-NEXT: extr w8, w0, w8, #18
+; CHECK-NEXT: and w8, w1, #0xfffc
+; CHECK-NEXT: orr w8, w0, w8, lsr #2
; CHECK-NEXT: tst w8, #0xffff
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/X86/icmp-shift-opt.ll b/llvm/test/CodeGen/X86/icmp-shift-opt.ll
index 0afe8d38bf906..23524d23c3ba7 100644
--- a/llvm/test/CodeGen/X86/icmp-shift-opt.ll
+++ b/llvm/test/CodeGen/X86/icmp-shift-opt.ll
@@ -53,8 +53,8 @@ define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
; X64-NEXT: addq $1, %rax
; X64-NEXT: adcq $0, %rdx
; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: shrq $60, %rcx
; X64-NEXT: orq %rdx, %rcx
-; X64-NEXT: shrdq $60, %rdx, %rcx
; X64-NEXT: jne .LBB0_1
; X64-NEXT: # %bb.2: # %exit
; X64-NEXT: retq
@@ -90,8 +90,8 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
;
; X64-LABEL: opt_setcc_srl_eq_zero:
; X64: # %bb.0:
+; X64-NEXT: shrq $17, %rdi
; X64-NEXT: orq %rsi, %rdi
-; X64-NEXT: shrdq $17, %rsi, %rdi
; X64-NEXT: sete %al
; X64-NEXT: retq
%srl = lshr i128 %a, 17
@@ -119,8 +119,8 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
;
; X64-LABEL: opt_setcc_srl_ne_zero:
; X64: # %bb.0:
+; X64-NEXT: shrq $17, %rdi
; X64-NEXT: orq %rsi, %rdi
-; X64-NEXT: shrdq $17, %rsi, %rdi
; X64-NEXT: setne %al
; X64-NEXT: retq
%srl = lshr i128 %a, 17
@@ -148,8 +148,8 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
;
; X64-LABEL: opt_setcc_shl_eq_zero:
; X64: # %bb.0:
+; X64-NEXT: shlq $17, %rsi
; X64-NEXT: orq %rdi, %rsi
-; X64-NEXT: shldq $17, %rdi, %rsi
; X64-NEXT: sete %al
; X64-NEXT: retq
%shl = shl i128 %a, 17
@@ -177,8 +177,8 @@ define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
;
; X64-LABEL: opt_setcc_shl_ne_zero:
; X64: # %bb.0:
+; X64-NEXT: shlq $17, %rsi
; X64-NEXT: orq %rdi, %rsi
-; X64-NEXT: shldq $17, %rdi, %rsi
; X64-NEXT: setne %al
; X64-NEXT: retq
%shl = shl i128 %a, 17
@@ -255,8 +255,8 @@ define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
;
; X64-LABEL: opt_setcc_expanded_shl_correct_shifts:
; X64: # %bb.0:
+; X64-NEXT: shlq $17, %rdi
; X64-NEXT: orq %rsi, %rdi
-; X64-NEXT: shldq $17, %rsi, %rdi
; X64-NEXT: sete %al
; X64-NEXT: retq
%shl.a = shl i64 %a, 17
diff --git a/llvm/test/CodeGen/X86/setcc-fsh.ll b/llvm/test/CodeGen/X86/setcc-fsh.ll
index 59319a761dcda..7ab63959f58b0 100644
--- a/llvm/test/CodeGen/X86/setcc-fsh.ll
+++ b/llvm/test/CodeGen/X86/setcc-fsh.ll
@@ -265,8 +265,8 @@ define <4 x i1> @or_rotl_ne_eq0(<4 x i32> %x, <4 x i32> %y) nounwind {
define i1 @fshl_or_eq_0(i32 %x, i32 %y) {
; CHECK-LABEL: fshl_or_eq_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: shll $5, %esi
; CHECK-NEXT: orl %edi, %esi
-; CHECK-NEXT: shldl $5, %edi, %esi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%or = or i32 %x, %y
@@ -278,8 +278,8 @@ define i1 @fshl_or_eq_0(i32 %x, i32 %y) {
define i1 @fshl_or_commute_eq_0(i32 %x, i32 %y) {
; CHECK-LABEL: fshl_or_commute_eq_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: shll $5, %esi
; CHECK-NEXT: orl %edi, %esi
-; CHECK-NEXT: shldl $5, %edi, %esi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%or = or i32 %y, %x
@@ -291,12 +291,10 @@ define i1 @fshl_or_commute_eq_0(i32 %x, i32 %y) {
define <4 x i1> @fshl_or2_eq_0(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: fshl_or2_eq_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: por %xmm0, %xmm1
+; CHECK-NEXT: pxor %xmm2, %xmm2
; CHECK-NEXT: psrld $7, %xmm1
-; CHECK-NEXT: pslld $25, %xmm0
; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm0
; CHECK-NEXT: retq
%or = or <4 x i32> %x, %y
%f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %or, <4 x i32> <i32 25, i32 25, i32 25, i32 25>)
@@ -307,12 +305,10 @@ define <4 x i1> @fshl_or2_eq_0(<4 x i32> %x, <4 x i32> %y) {
define <4 x i1> @fshl_or2_commute_eq_0(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: fshl_or2_commute_eq_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: por %xmm0, %xmm1
+; CHECK-NEXT: pxor %xmm2, %xmm2
; CHECK-NEXT: psrld $7, %xmm1
-; CHECK-NEXT: pslld $25, %xmm0
; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm0
; CHECK-NEXT: retq
%or = or <4 x i32> %y, %x
%f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %or, <4 x i32> <i32 25, i32 25, i32 25, i32 25>)
@@ -323,8 +319,8 @@ define <4 x i1> @fshl_or2_commute_eq_0(<4 x i32> %x, <4 x i32> %y) {
define i1 @fshr_or_eq_0(i16 %x, i16 %y) {
; CHECK-LABEL: fshr_or_eq_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: orl %edi, %esi
-; CHECK-NEXT: shldw $8, %di, %si
+; CHECK-NEXT: shll $8, %esi
+; CHECK-NEXT: orw %di, %si
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%or = or i16 %x, %y
@@ -336,8 +332,8 @@ define i1 @fshr_or_eq_0(i16 %x, i16 %y) {
define i1 @fshr_or_commute_eq_0(i16 %x, i16 %y) {
; CHECK-LABEL: fshr_or_commute_eq_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: orl %edi, %esi
-; CHECK-NEXT: shldw $8, %di, %si
+; CHECK-NEXT: shll $8, %esi
+; CHECK-NEXT: orw %di, %si
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%or = or i16 %y, %x
@@ -349,8 +345,8 @@ define i1 @fshr_or_commute_eq_0(i16 %x, i16 %y) {
define i1 @fshr_or2_eq_0(i64 %x, i64 %y) {
; CHECK-LABEL: fshr_or2_eq_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: shrq $3, %rsi
; CHECK-NEXT: orq %rdi, %rsi
-; CHECK-NEXT: shrdq $3, %rdi, %rsi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%or = or i64 %x, %y
@@ -362,8 +358,8 @@ define i1 @fshr_or2_eq_0(i64 %x, i64 %y) {
define i1 @fshr_or2_commute_eq_0(i64 %x, i64 %y) {
; CHECK-LABEL: fshr_or2_commute_eq_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: shrq $3, %rsi
; CHECK-NEXT: orq %rdi, %rsi
-; CHECK-NEXT: shrdq $3, %rdi, %rsi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%or = or i64 %y, %x
@@ -375,8 +371,8 @@ define i1 @fshr_or2_commute_eq_0(i64 %x, i64 %y) {
define i1 @fshl_or_ne_0(i32 %x, i32 %y) {
; CHECK-LABEL: fshl_or_ne_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: shll $7, %esi
; CHECK-NEXT: orl %edi, %esi
-; CHECK-NEXT: shldl $7, %edi, %esi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%or = or i32 %x, %y
@@ -388,8 +384,8 @@ define i1 @fshl_or_ne_0(i32 %x, i32 %y) {
define i1 @fshl_or_commute_ne_0(i32 %x, i32 %y) {
; CHECK-LABEL: fshl_or_commute_ne_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: shll $7, %esi
; CHECK-NEXT: orl %edi, %esi
-; CHECK-NEXT: shldl $7, %edi, %esi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%or = or i32 %y, %x
@@ -401,12 +397,10 @@ define i1 @fshl_or_commute_ne_0(i32 %x, i32 %y) {
define <4 x i1> @fshl_or2_ne_0(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: fshl_or2_ne_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: por %xmm0, %xmm1
+; CHECK-NEXT: pxor %xmm2, %xmm2
; CHECK-NEXT: psrld $27, %xmm1
-; CHECK-NEXT: pslld $5, %xmm0
; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm0
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: retq
@@ -419,12 +413,10 @@ define <4 x i1> @fshl_or2_ne_0(<4 x i32> %x, <4 x i32> %y) {
define <4 x i1> @fshl_or2_commute_ne_0(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: fshl_or2_commute_ne_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: por %xmm0, %xmm1
+; CHECK-NEXT: pxor %xmm2, %xmm2
; CHECK-NEXT: psrld $27, %xmm1
-; CHECK-NEXT: pslld $5, %xmm0
; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm0
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: retq
@@ -437,8 +429,8 @@ define <4 x i1> @fshl_or2_commute_ne_0(<4 x i32> %x, <4 x i32> %y) {
define i1 @fshr_or_ne_0(i64 %x, i64 %y) {
; CHECK-LABEL: fshr_or_ne_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: orl %edi, %esi
-; CHECK-NEXT: shldq $63, %rdi, %rsi
+; CHECK-NEXT: shlq $63, %rsi
+; CHECK-NEXT: orq %rdi, %rsi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%or = or i64 %x, %y
@@ -450,8 +442,8 @@ define i1 @fshr_or_ne_0(i64 %x, i64 %y) {
define i1 @fshr_or_commute_ne_0(i64 %x, i64 %y) {
; CHECK-LABEL: fshr_or_commute_ne_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: orl %edi, %esi
-; CHECK-NEXT: shldq $63, %rdi, %rsi
+; CHECK-NEXT: shlq $63, %rsi
+; CHECK-NEXT: orq %rdi, %rsi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%or = or i64 %y, %x
@@ -463,8 +455,9 @@ define i1 @fshr_or_commute_ne_0(i64 %x, i64 %y) {
define i1 @fshr_or2_ne_0(i16 %x, i16 %y) {
; CHECK-LABEL: fshr_or2_ne_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: orl %edi, %esi
-; CHECK-NEXT: shrdw $2, %di, %si
+; CHECK-NEXT: movzwl %si, %eax
+; CHECK-NEXT: shrl $2, %eax
+; CHECK-NEXT: orw %di, %ax
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%or = or i16 %x, %y
@@ -476,8 +469,9 @@ define i1 @fshr_or2_ne_0(i16 %x, i16 %y) {
define i1 @fshr_or2_commute_ne_0(i16 %x, i16 %y) {
; CHECK-LABEL: fshr_or2_commute_ne_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: orl %edi, %esi
-; CHECK-NEXT: shrdw $2, %di, %si
+; CHECK-NEXT: movzwl %si, %eax
+; CHECK-NEXT: shrl $2, %eax
+; CHECK-NEXT: orw %di, %ax
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%or = or i16 %y, %x
More information about the llvm-commits
mailing list