[llvm] Recommit "[DAGCombiner] Transform (icmp eq/ne (and X,C0),(shift X,C1)) to use rotate or to getter constants." (2nd Try) (PR #71729)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 8 12:02:29 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: None (goldsteinn)
<details>
<summary>Changes</summary>
- [X86] Add more tests for transform `(icmp eq/ne (and X,C0),(shift X,C1))`; PR71598
- Recommit "[DAGCombiner] Transform `(icmp eq/ne (and X,C0),(shift X,C1))` to use rotate or to getter constants." (2nd Try)
---
Patch is 26.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71729.diff
5 Files Affected:
- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+18)
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+120-15)
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+67)
- (modified) llvm/lib/Target/X86/X86ISelLowering.h (+5)
- (modified) llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll (+306-71)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 58aad70c4bb36e6..c87537291e3b161 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -832,6 +832,24 @@ class TargetLoweringBase {
return N->getOpcode() == ISD::FDIV;
}
+ // Given:
+ // (icmp eq/ne (and X, C0), (shift X, C1))
+ // or
+ // (icmp eq/ne X, (rotate X, CPow2))
+
+ // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
+ // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
+ // Do we prefer the shift to be shift-right, shift-left, or rotate.
+ // Note: Its only valid to convert the rotate version to the shift version iff
+ // the shift-amt (`C1`) is a power of 2 (including 0).
+ // If ShiftOpc (current Opcode) is returned, do nothing.
+ virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(
+ EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
+ const APInt &ShiftOrRotateAmt,
+ const std::optional<APInt> &AndMask) const {
+ return ShiftOpc;
+ }
+
/// These two forms are equivalent:
/// sub %y, (xor %x, -1)
/// add (add %x, 1), %y
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a867d88f76c0cf6..3fe7be2821e4b15 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12456,27 +12456,132 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
- SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
- SDLoc(N), !PreferSetCC);
-
- if (!Combined)
- return SDValue();
+ SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);
- // If we prefer to have a setcc, and we don't, we'll try our best to
- // recreate one using rebuildSetCC.
- if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
- SDValue NewSetCC = rebuildSetCC(Combined);
+ if (Combined) {
+ // If we prefer to have a setcc, and we don't, we'll try our best to
+ // recreate one using rebuildSetCC.
+ if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
+ SDValue NewSetCC = rebuildSetCC(Combined);
- // We don't have anything interesting to combine to.
- if (NewSetCC.getNode() == N)
- return SDValue();
+ // We don't have anything interesting to combine to.
+ if (NewSetCC.getNode() == N)
+ return SDValue();
- if (NewSetCC)
- return NewSetCC;
+ if (NewSetCC)
+ return NewSetCC;
+ }
+ return Combined;
}
- return Combined;
+ // Optimize
+ // 1) (icmp eq/ne (and X, C0), (shift X, C1))
+ // or
+ // 2) (icmp eq/ne X, (rotate X, C1))
+ // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
+ // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
+ // Then:
+ // If C1 is a power of 2, then the rotate and shift+and versions are
+ // equivilent, so we can interchange them depending on target preference.
+ // Otherwise, if we have the shift+and version we can interchange srl/shl
+ // which inturn affects the constant C0. We can use this to get better
+ // constants again determined by target preference.
+ if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
+ auto IsAndWithShift = [](SDValue A, SDValue B) {
+ return A.getOpcode() == ISD::AND &&
+ (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
+ A.getOperand(0) == B.getOperand(0);
+ };
+ auto IsRotateWithOp = [](SDValue A, SDValue B) {
+ return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
+ B.getOperand(0) == A;
+ };
+ SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
+ bool IsRotate = false;
+
+ // Find either shift+and or rotate pattern.
+ if (IsAndWithShift(N0, N1)) {
+ AndOrOp = N0;
+ ShiftOrRotate = N1;
+ } else if (IsAndWithShift(N1, N0)) {
+ AndOrOp = N1;
+ ShiftOrRotate = N0;
+ } else if (IsRotateWithOp(N0, N1)) {
+ IsRotate = true;
+ AndOrOp = N0;
+ ShiftOrRotate = N1;
+ } else if (IsRotateWithOp(N1, N0)) {
+ IsRotate = true;
+ AndOrOp = N1;
+ ShiftOrRotate = N0;
+ }
+
+ if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
+ (IsRotate || AndOrOp.hasOneUse())) {
+ EVT OpVT = N0.getValueType();
+ // Get constant shift/rotate amount and possibly mask (if its shift+and
+ // variant).
+ auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
+ ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
+ /*AllowTrunc*/ false);
+ if (CNode == nullptr)
+ return std::nullopt;
+ return CNode->getAPIntValue();
+ };
+ std::optional<APInt> AndCMask =
+ IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
+ std::optional<APInt> ShiftCAmt =
+ GetAPIntValue(ShiftOrRotate.getOperand(1));
+ unsigned NumBits = OpVT.getScalarSizeInBits();
+
+ // We found constants.
+ if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
+ unsigned ShiftOpc = ShiftOrRotate.getOpcode();
+ // Check that the constants meet the constraints.
+ bool CanTransform = IsRotate;
+ if (!CanTransform) {
+ // Check that mask and shift compliment eachother
+ CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
+ // Check that we are comparing all bits
+ CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
+ // Check that the and mask is correct for the shift
+ CanTransform &=
+ ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
+ }
+
+ // See if target prefers another shift/rotate opcode.
+ unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
+ OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
+ // Transform is valid and we have a new preference.
+ if (CanTransform && NewShiftOpc != ShiftOpc) {
+ SDLoc DL(N);
+ SDValue NewShiftOrRotate =
+ DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
+ ShiftOrRotate.getOperand(1));
+ SDValue NewAndOrOp = SDValue();
+
+ if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
+ APInt NewMask =
+ NewShiftOpc == ISD::SHL
+ ? APInt::getHighBitsSet(NumBits,
+ NumBits - ShiftCAmt->getZExtValue())
+ : APInt::getLowBitsSet(NumBits,
+ NumBits - ShiftCAmt->getZExtValue());
+ NewAndOrOp =
+ DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
+ DAG.getConstant(NewMask, DL, OpVT));
+ } else {
+ NewAndOrOp = ShiftOrRotate.getOperand(0);
+ }
+
+ return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
+ }
+ }
+ }
+ }
+ return SDValue();
}
SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e6045a4de51ebe2..8002aa0141910c3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3257,6 +3257,73 @@ bool X86TargetLowering::
return NewShiftOpcode == ISD::SHL;
}
+unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand(
+ EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
+ const APInt &ShiftOrRotateAmt, const std::optional<APInt> &AndMask) const {
+ if (!VT.isInteger())
+ return ShiftOpc;
+
+ bool PreferRotate = false;
+ if (VT.isVector()) {
+ // For vectors, if we have rotate instruction support, then its definetly
+ // best. Otherwise its not clear what the best so just don't make changed.
+ PreferRotate = Subtarget.hasAVX512() && (VT.getScalarType() == MVT::i32 ||
+ VT.getScalarType() == MVT::i64);
+ } else {
+ // For scalar, if we have bmi prefer rotate for rorx. Otherwise prefer
+ // rotate unless we have a zext mask+shr.
+ PreferRotate = Subtarget.hasBMI2();
+ if (!PreferRotate) {
+ unsigned MaskBits =
+ VT.getScalarSizeInBits() - ShiftOrRotateAmt.getZExtValue();
+ PreferRotate = (MaskBits != 8) && (MaskBits != 16) && (MaskBits != 32);
+ }
+ }
+
+ if (ShiftOpc == ISD::SHL || ShiftOpc == ISD::SRL) {
+ assert(AndMask.has_value() && "Null andmask when querying about shift+and");
+
+ if (PreferRotate && MayTransformRotate)
+ return ISD::ROTL;
+
+ // If vector we don't really get much benefit swapping around constants.
+ // Maybe we could check if the DAG has the flipped node already in the
+ // future.
+ if (VT.isVector())
+ return ShiftOpc;
+
+ // See if the beneficial to swap shift type.
+ if (ShiftOpc == ISD::SHL) {
+ // If the current setup has imm64 mask, then inverse will have
+ // at least imm32 mask (or be zext i32 -> i64).
+ if (VT == MVT::i64)
+ return AndMask->getSignificantBits() > 32 ? (unsigned)ISD::SRL
+ : ShiftOpc;
+
+ // We can only benefit if req at least 7-bit for the mask. We
+ // don't want to replace shl of 1,2,3 as they can be implemented
+ // with lea/add.
+ return ShiftOrRotateAmt.uge(7) ? (unsigned)ISD::SRL : ShiftOpc;
+ }
+
+ if (VT == MVT::i64)
+ // Keep exactly 32-bit imm64, this is zext i32 -> i64 which is
+ // extremely efficient.
+ return AndMask->getSignificantBits() > 33 ? (unsigned)ISD::SHL : ShiftOpc;
+
+ // Keep small shifts as shl so we can generate add/lea.
+ return ShiftOrRotateAmt.ult(7) ? (unsigned)ISD::SHL : ShiftOpc;
+ }
+
+ // We prefer rotate for vectors of if we won't get a zext mask with SRL
+ // (PreferRotate will be set in the latter case).
+ if (PreferRotate || VT.isVector())
+ return ShiftOpc;
+
+ // Non-vector type and we have a zext mask with SRL.
+ return ISD::SRL;
+}
+
bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
return N->getOpcode() != ISD::FP_EXTEND;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 8046f42736951cd..3b1b2603fd8fc61 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1138,6 +1138,11 @@ namespace llvm {
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
SelectionDAG &DAG) const override;
+ unsigned preferedOpcodeForCmpEqPiecesOfOperand(
+ EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
+ const APInt &ShiftOrRotateAmt,
+ const std::optional<APInt> &AndMask) const override;
+
bool preferScalarizeSplat(SDNode *N) const override;
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
diff --git a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
index 8ec142acb71d4ce..7996454a0158eac 100644
--- a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
+++ b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
@@ -20,9 +20,8 @@ define i1 @shr_to_shl_eq_i8_s2(i8 %x) {
; CHECK-LABEL: shr_to_shl_eq_i8_s2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: andb $63, %al
-; CHECK-NEXT: shrb $2, %dil
-; CHECK-NEXT: cmpb %dil, %al
+; CHECK-NEXT: rolb $2, %al
+; CHECK-NEXT: cmpb %al, %dil
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%and = and i8 %x, 63
@@ -35,9 +34,9 @@ define i1 @shl_to_shr_ne_i8_s7(i8 %x) {
; CHECK-LABEL: shl_to_shr_ne_i8_s7:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: shlb $7, %al
-; CHECK-NEXT: andb $-128, %dil
-; CHECK-NEXT: cmpb %dil, %al
+; CHECK-NEXT: shrb $7, %al
+; CHECK-NEXT: andb $1, %dil
+; CHECK-NEXT: cmpb %al, %dil
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%shl = shl i8 %x, 7
@@ -63,9 +62,8 @@ define i1 @shr_to_shl_eq_i8_s1(i8 %x) {
; CHECK-LABEL: shr_to_shl_eq_i8_s1:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: andb $127, %al
-; CHECK-NEXT: shrb %dil
-; CHECK-NEXT: cmpb %dil, %al
+; CHECK-NEXT: rolb %al
+; CHECK-NEXT: cmpb %al, %dil
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%and = and i8 %x, 127
@@ -77,10 +75,10 @@ define i1 @shr_to_shl_eq_i8_s1(i8 %x) {
define i1 @shr_to_shl_eq_i32_s3(i32 %x) {
; CHECK-LABEL: shr_to_shl_eq_i32_s3:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: andl $536870911, %eax # imm = 0x1FFFFFFF
-; CHECK-NEXT: shrl $3, %edi
-; CHECK-NEXT: cmpl %edi, %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: leal (,%rdi,8), %eax
+; CHECK-NEXT: andl $-8, %edi
+; CHECK-NEXT: cmpl %eax, %edi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%and = and i32 %x, 536870911
@@ -105,14 +103,20 @@ define i1 @shl_to_shr_eq_i32_s3_fail(i32 %x) {
}
define i1 @shl_to_shr_ne_i32_s16(i32 %x) {
-; CHECK-LABEL: shl_to_shr_ne_i32_s16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: shll $16, %eax
-; CHECK-NEXT: andl $-65536, %edi # imm = 0xFFFF0000
-; CHECK-NEXT: cmpl %edi, %eax
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: retq
+; CHECK-NOBMI-LABEL: shl_to_shr_ne_i32_s16:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: movzwl %di, %eax
+; CHECK-NOBMI-NEXT: shrl $16, %edi
+; CHECK-NOBMI-NEXT: cmpl %edi, %eax
+; CHECK-NOBMI-NEXT: setne %al
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-LABEL: shl_to_shr_ne_i32_s16:
+; CHECK-BMI2: # %bb.0:
+; CHECK-BMI2-NEXT: rorxl $16, %edi, %eax
+; CHECK-BMI2-NEXT: cmpl %eax, %edi
+; CHECK-BMI2-NEXT: setne %al
+; CHECK-BMI2-NEXT: retq
%shl = shl i32 %x, 16
%and = and i32 %x, 4294901760
%r = icmp ne i32 %shl, %and
@@ -137,9 +141,8 @@ define i1 @shl_to_shr_ne_i32_s16_fail(i32 %x) {
define i1 @shr_to_shl_eq_i16_s1(i16 %x) {
; CHECK-LABEL: shr_to_shl_eq_i16_s1:
; CHECK: # %bb.0:
-; CHECK-NEXT: movzwl %di, %eax
-; CHECK-NEXT: andl $32767, %edi # imm = 0x7FFF
-; CHECK-NEXT: shrl %eax
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: rolw %ax
; CHECK-NEXT: cmpw %ax, %di
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
@@ -167,9 +170,9 @@ define i1 @shr_to_shl_eq_i16_s1_fail(i16 %x) {
define i1 @shl_to_shr_eq_i64_s44(i64 %x) {
; CHECK-LABEL: shl_to_shr_eq_i64_s44:
; CHECK: # %bb.0:
-; CHECK-NEXT: movabsq $-17592186044416, %rax # imm = 0xFFFFF00000000000
-; CHECK-NEXT: andq %rdi, %rax
-; CHECK-NEXT: shlq $44, %rdi
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: shrq $44, %rax
+; CHECK-NEXT: andl $1048575, %edi # imm = 0xFFFFF
; CHECK-NEXT: cmpq %rax, %rdi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
@@ -180,13 +183,20 @@ define i1 @shl_to_shr_eq_i64_s44(i64 %x) {
}
define i1 @shr_to_shl_ne_i64_s32(i64 %x) {
-; CHECK-LABEL: shr_to_shl_ne_i64_s32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: shrq $32, %rdi
-; CHECK-NEXT: cmpq %rdi, %rax
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: retq
+; CHECK-NOBMI-LABEL: shr_to_shl_ne_i64_s32:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: movl %edi, %eax
+; CHECK-NOBMI-NEXT: shrq $32, %rdi
+; CHECK-NOBMI-NEXT: cmpq %rdi, %rax
+; CHECK-NOBMI-NEXT: setne %al
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-LABEL: shr_to_shl_ne_i64_s32:
+; CHECK-BMI2: # %bb.0:
+; CHECK-BMI2-NEXT: rorxq $32, %rdi, %rax
+; CHECK-BMI2-NEXT: cmpq %rax, %rdi
+; CHECK-BMI2-NEXT: setne %al
+; CHECK-BMI2-NEXT: retq
%and = and i64 %x, 4294967295
%shr = lshr i64 %x, 32
%r = icmp ne i64 %and, %shr
@@ -230,9 +240,9 @@ define i1 @ashr_to_shl_ne_i64_s32_fail(i64 %x) {
define i1 @shl_to_shr_eq_i64_s63(i64 %x) {
; CHECK-LABEL: shl_to_shr_eq_i64_s63:
; CHECK: # %bb.0:
-; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; CHECK-NEXT: andq %rdi, %rax
-; CHECK-NEXT: shlq $63, %rdi
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: shrq $63, %rax
+; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: cmpq %rax, %rdi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
@@ -258,23 +268,14 @@ define i1 @shl_to_shr_eq_i64_s63_fail(i64 %x) {
}
define i1 @shr_to_shl_eq_i64_s7(i64 %x) {
-; CHECK-NOBMI-LABEL: shr_to_shl_eq_i64_s7:
-; CHECK-NOBMI: # %bb.0:
-; CHECK-NOBMI-NEXT: movabsq $144115188075855871, %rax # imm = 0x1FFFFFFFFFFFFFF
-; CHECK-NOBMI-NEXT: andq %rdi, %rax
-; CHECK-NOBMI-NEXT: shrq $7, %rdi
-; CHECK-NOBMI-NEXT: cmpq %rdi, %rax
-; CHECK-NOBMI-NEXT: sete %al
-; CHECK-NOBMI-NEXT: retq
-;
-; CHECK-BMI2-LABEL: shr_to_shl_eq_i64_s7:
-; CHECK-BMI2: # %bb.0:
-; CHECK-BMI2-NEXT: movb $57, %al
-; CHECK-BMI2-NEXT: bzhiq %rax, %rdi, %rax
-; CHECK-BMI2-NEXT: shrq $7, %rdi
-; CHECK-BMI2-NEXT: cmpq %rdi, %rax
-; CHECK-BMI2-NEXT: sete %al
-; CHECK-BMI2-NEXT: retq
+; CHECK-LABEL: shr_to_shl_eq_i64_s7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: shlq $7, %rax
+; CHECK-NEXT: andq $-128, %rdi
+; CHECK-NEXT: cmpq %rax, %rdi
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
%and = and i64 %x, 144115188075855871
%shr = lshr i64 %x, 7
%r = icmp eq i64 %and, %shr
@@ -284,9 +285,8 @@ define i1 @shr_to_shl_eq_i64_s7(i64 %x) {
define i1 @shl_to_shr_ne_i32_s24(i32 %x) {
; CHECK-LABEL: shl_to_shr_ne_i32_s24:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: shll $24, %eax
-; CHECK-NEXT: andl $-16777216, %edi # imm = 0xFF000000
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: shrl $24, %edi
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
@@ -312,14 +312,20 @@ define i1 @shr_to_shl_ne_i32_s24_fail(i32 %x) {
}
define i1 @shr_to_shl_ne_i32_s8(i32 %x) {
-; CHECK-LABEL: shr_to_shl_ne_i32_s8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: andl $16777215, %eax # imm = 0xFFFFFF
-; CHECK-NEXT: shrl $8, %edi
-; CHECK-NEXT: cmpl %edi, %eax
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: retq
+; CHECK-NOBMI-LABEL: shr_to_shl_ne_i32_s8:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: movl %edi, %eax
+; CHECK-NOBMI-NEXT: roll $8, %eax
+; CHECK-NOBMI-NEXT: cmpl %eax, %edi
+; CHECK-NOBMI-NEXT: setne %al
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-LABEL: shr_to_shl_ne_i32_s8:
+; CHECK-BMI2: # %bb.0:
+; CHECK-BMI2-NEXT: rorxl $24, %edi, %eax
+; CHECK-BMI2-NEXT: cmpl %eax, %edi
+; CHECK-BMI2-NEXT: setne %al
+; CHECK-BMI2-NEXT: retq
%and = and i32 %x, 16777215
%shr = lshr i32 %x, 8
%r = icmp ne i32 %and, %shr
@@ -359,9 +365,8 @@ define <4 x i1> @shr_to_ror_eq_4xi32_s4(<4 x i32> %x) {
;
; CHECK-AVX512-LABEL: shr_to_ror_eq_4xi32_s4:
; CHECK-AVX512: # %bb.0:
-; CHECK-AVX512-NEXT: vpsrld $4, %xmm0, %xmm1
-; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
+; CHECK-AVX512-NEXT: vprold $4, %xmm0, %xmm1
+; CHECK-AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; CHECK-AVX512-NEXT: retq
%shr = lshr <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
@@ -402,9 +407,8 @@ define <4 x i1> @shl_to_ror_eq_4xi32_s8(<4 x i32> %x) {
;
; CHECK-AVX512-LABEL: shl_to_ror_eq_4xi32_s8:
; CHECK-AVX512: # %bb.0:
-; CHECK-AVX512-NEXT: vpslld $8, %xmm0, %xmm1
-; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
+; CHECK-AVX512-NEXT: vprold $8, %xmm0, %xmm1
+; CHECK-AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; CHECK-AVX512-NEXT: retq
%shr = shl <4 x i32> %x, <i32 8, i32 8, i32 8, i32 8>
@@ -570,6 +574,237 @@ define <16 x i1> @shl_to_ror_eq_16xi16_s8_fail_preserve_i16(<16 x i16> %x) {
ret <16 x i1> %r
}
+define i1 @shr_to_shl_eq_i3...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/71729
More information about the llvm-commits
mailing list