[libcxx-commits] [libcxx] [flang] [libc] [clang-tools-extra] [clang] [llvm] [compiler-rt] [libunwind] [lld] [lldb] [X86] Use RORX over SHR imm (PR #77964)
Simon Pilgrim via libcxx-commits
libcxx-commits at lists.llvm.org
Sun Jan 28 08:55:03 PST 2024
================
@@ -4216,6 +4217,95 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
return CNode;
}
+// When the consumer of a right shift (arithmetic or logical) wouldn't notice
+// the difference if the instruction was a rotate right instead (because the
+// bits shifted in are truncated away), the shift can be replaced by the RORX
+// instruction from BMI2. This doesn't set flags and can output to a different
+// register. However, this increases code size in most cases, and doesn't leave
+// the high bits in a useful state. There may be other situations where this
+// transformation is profitable given those conditions, but currently the
+// transformation is only made when it likely avoids spilling flags.
+bool X86DAGToDAGISel::rightShiftUnclobberFlags(SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ // Target has to have BMI2 for RORX
+ if (!Subtarget->hasBMI2())
+ return false;
+
+ // Only handle scalar shifts.
+ if (VT.isVector())
+ return false;
+
+ unsigned OpSize;
+ if (VT == MVT::i64)
+ OpSize = 64;
+ else if (VT == MVT::i32)
+ OpSize = 32;
+ else if (VT == MVT::i16)
+ OpSize = 16;
+ else if (VT == MVT::i8)
+ return false; // i8 shift can't be truncated.
+ else
+ llvm_unreachable("Unexpected shift size");
+
+ unsigned TruncateSize = 0;
+ // This only works when the result is truncated.
+ for (const SDNode *User : N->uses()) {
+ if (!User->isMachineOpcode() ||
+ User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
+ return false;
+ EVT TuncateType = User->getValueType(0);
+ if (TuncateType == MVT::i32)
+ TruncateSize = std::max(TruncateSize, 32U);
+ else if (TuncateType == MVT::i16)
+ TruncateSize = std::max(TruncateSize, 16U);
+ else if (TuncateType == MVT::i8)
+ TruncateSize = std::max(TruncateSize, 8U);
+ else
+ return false;
+ }
+ if (TruncateSize >= OpSize)
+ return false;
+
+ // The shift must be by an immediate that wouldn't expose the zero or sign
+ // extended result.
+ auto *ShiftAmount = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!ShiftAmount || ShiftAmount->getZExtValue() > OpSize - TruncateSize)
+ return false;
+
+ // If the shift argument has non-dead EFLAGS, then this shift probably
+ // clobbers those flags making the transformation to RORX useful. This may
+ // have false negatives or positives so ideally this transformation is made
+ // later on.
+ bool ArgProducesFlags = false;
+ SDNode *Input = N->getOperand(0).getNode();
+ for (auto Use : Input->uses()) {
+ if (Use->getOpcode() == ISD::CopyToReg) {
+ auto *RegisterNode =
+ dyn_cast<RegisterSDNode>(Use->getOperand(1).getNode());
----------------
RKSimon wrote:
```dyn_cast<RegisterSDNode>(Use->getOperand(1))```
https://github.com/llvm/llvm-project/pull/77964
More information about the libcxx-commits
mailing list