[llvm] [X86][CodeGen] Support long instruction fixup for APX NDD instructions (PR #83578)

Fri Mar 1 22:54:19 PST 2024

================
@@ -613,6 +613,87 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
   case X86::CALL64m_RVMARKER:
     expandCALL_RVMARKER(MBB, MBBI);
     return true;
+  case X86::ADD32mi_ND:
+  case X86::ADD64mi32_ND:
+  case X86::SUB32mi_ND:
+  case X86::SUB64mi32_ND:
+  case X86::AND32mi_ND:
+  case X86::AND64mi32_ND:
+  case X86::OR32mi_ND:
+  case X86::OR64mi32_ND:
+  case X86::XOR32mi_ND:
+  case X86::XOR64mi32_ND:
+  case X86::ADC32mi_ND:
+  case X86::ADC64mi32_ND:
+  case X86::SBB32mi_ND:
+  case X86::SBB64mi32_ND: {
+    // It's possible for an EVEX-encoded legacy instruction to reach the 15-byte
+    // instruction length limit: 4 bytes of EVEX prefix + 1 byte of opcode + 1
+    // byte of ModRM + 1 byte of SIB + 4 bytes of displacement + 4 bytes of
+    // immediate = 15 bytes in total, e.g.
+    //
+    //  addq    $184, -96, %rax
+    //
+    // In such a case, no additional segment override prefix can be used. To
+    // resolve the issue, we split the “long” instruction into 2 instructions:
+    //
+    //  subq    $184, %fs:257(%rbx, %rcx), %rax
+    //
+    //  ->
+    //
+    //  movq %fs:257(%rbx, %rcx)，%rax
+    //  subq $184, %rax
+    int MemOpNo = X86::getFirstAddrOperandIdx(MI);
+    Register Segment = MI.getOperand(MemOpNo + X86::AddrSegmentReg).getReg();
+    if (Segment == X86::NoRegister)
+      return false;
+    const MachineOperand &ImmOp =
+        MI.getOperand(MI.getNumExplicitOperands() - 1);
+    // If the immediate is a expr, conservatively estimate 4 bytes.
+    if (ImmOp.isImm() && isInt<8>(ImmOp.getImm()))
+      return false;
+    Register Base = MI.getOperand(MemOpNo + X86::AddrBaseReg).getReg();
+    Register Index = MI.getOperand(MemOpNo + X86::AddrIndexReg).getReg();
+    if (!X86II::needSIB(Base, Index, /*In64BitMode=*/true))
+      return false;
+    const MachineOperand &DispOp = MI.getOperand(MemOpNo + X86::AddrDisp);
+    // If the displacement is a expr, conservatively estimate 4 bytes.
+    if (DispOp.isImm() && isInt<8>(DispOp.getImm()))
+      return false;
+    unsigned Opc, LoadOpc;
+    switch (Opcode) {
+#define MI_TO_RI(OP)                                                           \
+  case X86::OP##32mi_ND:                                                       \
+    Opc = X86::OP##32ri;                                                       \
+    LoadOpc = X86::MOV32rm;                                                    \
+    break;                                                                     \
+  case X86::OP##64mi32_ND:                                                     \
+    Opc = X86::OP##64ri32;                                                     \
+    LoadOpc = X86::MOV64rm;                                                    \
+    break;
+
+    default:
+      llvm_unreachable("Unexpected Opcode");
+      MI_TO_RI(ADD);
+      MI_TO_RI(SUB);
+      MI_TO_RI(AND);
+      MI_TO_RI(OR);
+      MI_TO_RI(XOR);
+      MI_TO_RI(ADC);
+      MI_TO_RI(SBB);
+#undef MI_TO_RI
+    }
+    // Insert OPri.
+    Register DestReg = MI.getOperand(0).getReg();
+    BuildMI(MBB, std::next(MBBI), DL, TII->get(Opc), DestReg)
+        .addReg(DestReg)
+        .add(ImmOp);
+    // Change OPmi_ND to MOVrm.
+    for (unsigned I = MI.getNumImplicitOperands() + 1; I != 0; --I)
+      MI.removeOperand(MI.getNumOperands() - 1);
----------------
KanRobert wrote:

When we build a new instruction with `BuildMI`, the implicit operands will be added automatically. 
OPmi_ND has a memory operand while OPri does not. Replacing it with OPri requires us to drop memory operands, which is verbose. It's also verbose to build a new MI with memory operands, so I reuse the existing one.

https://github.com/llvm/llvm-project/pull/83578