[llvm] 37293e6 - [X86][CodeGen] Support long instruction fixup for APX NDD instructions (#83578)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 2 21:03:39 PST 2024
Author: Shengchen Kan
Date: 2024-03-03T13:03:35+08:00
New Revision: 37293e69e6362e3559c1a4e1ac62b53f2b0edb0a
URL: https://github.com/llvm/llvm-project/commit/37293e69e6362e3559c1a4e1ac62b53f2b0edb0a
DIFF: https://github.com/llvm/llvm-project/commit/37293e69e6362e3559c1a4e1ac62b53f2b0edb0a.diff
LOG: [X86][CodeGen] Support long instruction fixup for APX NDD instructions (#83578)
RFC:
https://discourse.llvm.org/t/rfc-support-long-instruction-fixup-for-x86/76539
Added:
llvm/test/CodeGen/X86/apx/long-instruction-fixup-x32.ll
llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll
Modified:
llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
llvm/lib/Target/X86/X86ExpandPseudo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 4442b80861b61a..28a067d525e010 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -1315,6 +1315,33 @@ inline bool isKMasked(uint64_t TSFlags) {
inline bool isKMergeMasked(uint64_t TSFlags) {
return isKMasked(TSFlags) && (TSFlags & X86II::EVEX_Z) == 0;
}
+
+/// \returns true if the intruction needs a SIB.
+inline bool needSIB(unsigned BaseReg, unsigned IndexReg, bool In64BitMode) {
+ // The SIB byte must be used if there is an index register.
+ if (IndexReg)
+ return true;
+
+ // The SIB byte must be used if the base is ESP/RSP/R12/R20/R28, all of
+ // which encode to an R/M value of 4, which indicates that a SIB byte is
+ // present.
+ switch (BaseReg) {
+ default:
+ // If there is no base register and we're in 64-bit mode, we need a SIB
+ // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
+ return In64BitMode && !BaseReg;
+ case X86::ESP:
+ case X86::RSP:
+ case X86::R12:
+ case X86::R12D:
+ case X86::R20:
+ case X86::R20D:
+ case X86::R28:
+ case X86::R28D:
+ return true;
+ }
+}
+
} // namespace X86II
} // namespace llvm
#endif
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index f7c361393fea62..fdb11d1a408bb6 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -753,17 +753,8 @@ void X86MCCodeEmitter::emitMemModRMByte(
bool AllowDisp8 = !UseDisp32;
// Determine whether a SIB byte is needed.
- if ( // The SIB byte must be used if there is an index register or the
- // encoding requires a SIB byte.
- !ForceSIB && IndexReg.getReg() == 0 &&
- // The SIB byte must be used if the base is ESP/RSP/R12/R20/R28, all of
- // which encode to an R/M value of 4, which indicates that a SIB byte is
- // present.
- BaseRegNo != N86::ESP &&
- // If there is no base register and we're in 64-bit mode, we need a SIB
- // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
- (!STI.hasFeature(X86::Is64Bit) || BaseReg != 0)) {
-
+ if (!ForceSIB && !X86II::needSIB(BaseReg, IndexReg.getReg(),
+ STI.hasFeature(X86::Is64Bit))) {
if (BaseReg == 0) { // [disp32] in X86-32 mode
emitByte(modRMByte(0, RegOpcodeField, 5), CB);
emitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, StartByte, CB, Fixups);
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index b9fb3fdb239eef..f4c67f115c9f3d 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -613,6 +613,91 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case X86::CALL64m_RVMARKER:
expandCALL_RVMARKER(MBB, MBBI);
return true;
+ case X86::ADD32mi_ND:
+ case X86::ADD64mi32_ND:
+ case X86::SUB32mi_ND:
+ case X86::SUB64mi32_ND:
+ case X86::AND32mi_ND:
+ case X86::AND64mi32_ND:
+ case X86::OR32mi_ND:
+ case X86::OR64mi32_ND:
+ case X86::XOR32mi_ND:
+ case X86::XOR64mi32_ND:
+ case X86::ADC32mi_ND:
+ case X86::ADC64mi32_ND:
+ case X86::SBB32mi_ND:
+ case X86::SBB64mi32_ND: {
+ // It's possible for an EVEX-encoded legacy instruction to reach the 15-byte
+ // instruction length limit: 4 bytes of EVEX prefix + 1 byte of opcode + 1
+ // byte of ModRM + 1 byte of SIB + 4 bytes of displacement + 4 bytes of
+ // immediate = 15 bytes in total, e.g.
+ //
+ // subq $184, %fs:257(%rbx, %rcx), %rax
+ //
+ // In such a case, no additional (ADSIZE or segment override) prefix can be
+ // used. To resolve the issue, we split the “long” instruction into 2
+ // instructions:
+ //
+ // movq %fs:257(%rbx, %rcx),%rax
+ // subq $184, %rax
+ //
+ // Therefore we consider the OPmi_ND to be a pseudo instruction to some
+ // extent.
+ const MachineOperand &ImmOp =
+ MI.getOperand(MI.getNumExplicitOperands() - 1);
+ // If the immediate is a expr, conservatively estimate 4 bytes.
+ if (ImmOp.isImm() && isInt<8>(ImmOp.getImm()))
+ return false;
+ int MemOpNo = X86::getFirstAddrOperandIdx(MI);
+ const MachineOperand &DispOp = MI.getOperand(MemOpNo + X86::AddrDisp);
+ Register Base = MI.getOperand(MemOpNo + X86::AddrBaseReg).getReg();
+ // If the displacement is a expr, conservatively estimate 4 bytes.
+ if (Base && DispOp.isImm() && isInt<8>(DispOp.getImm()))
+ return false;
+ // There can only be one of three: SIB, segment override register, ADSIZE
+ Register Index = MI.getOperand(MemOpNo + X86::AddrIndexReg).getReg();
+ unsigned Count = !!MI.getOperand(MemOpNo + X86::AddrSegmentReg).getReg();
+ if (X86II::needSIB(Base, Index, /*In64BitMode=*/true))
+ ++Count;
+ if (X86MCRegisterClasses[X86::GR32RegClassID].contains(Base) ||
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(Index))
+ ++Count;
+ if (Count < 2)
+ return false;
+ unsigned Opc, LoadOpc;
+ switch (Opcode) {
+#define MI_TO_RI(OP) \
+ case X86::OP##32mi_ND: \
+ Opc = X86::OP##32ri; \
+ LoadOpc = X86::MOV32rm; \
+ break; \
+ case X86::OP##64mi32_ND: \
+ Opc = X86::OP##64ri32; \
+ LoadOpc = X86::MOV64rm; \
+ break;
+
+ default:
+ llvm_unreachable("Unexpected Opcode");
+ MI_TO_RI(ADD);
+ MI_TO_RI(SUB);
+ MI_TO_RI(AND);
+ MI_TO_RI(OR);
+ MI_TO_RI(XOR);
+ MI_TO_RI(ADC);
+ MI_TO_RI(SBB);
+#undef MI_TO_RI
+ }
+ // Insert OPri.
+ Register DestReg = MI.getOperand(0).getReg();
+ BuildMI(MBB, std::next(MBBI), DL, TII->get(Opc), DestReg)
+ .addReg(DestReg)
+ .add(ImmOp);
+ // Change OPmi_ND to MOVrm.
+ for (unsigned I = MI.getNumImplicitOperands() + 1; I != 0; --I)
+ MI.removeOperand(MI.getNumOperands() - 1);
+ MI.setDesc(TII->get(LoadOpc));
+ return true;
+ }
}
llvm_unreachable("Previous switch has a fallthrough?");
}
diff --git a/llvm/test/CodeGen/X86/apx/long-instruction-fixup-x32.ll b/llvm/test/CodeGen/X86/apx/long-instruction-fixup-x32.ll
new file mode 100644
index 00000000000000..fb24704fe0491a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/long-instruction-fixup-x32.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 -mattr=+ndd -verify-machineinstrs | FileCheck %s
+
+
+define i32 @add32mi_SIB_ADSIZE(ptr nocapture noundef readonly %a, i32 noundef %b) {
+; CHECK-LABEL: add32mi_SIB_ADSIZE:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl 1164(%edi,%esi,4), %eax
+; CHECK-NEXT: addl $4660, %eax # imm = 0x1234
+; CHECK-NEXT: retq
+entry:
+ %add.ptr = getelementptr inbounds i32, ptr %a, i32 %b
+ %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i32 1164
+ %0 = load i32, ptr %add.ptr1
+ %add = add nsw i32 %0, 4660
+ ret i32 %add
+}
+
+declare ptr @llvm.thread.pointer()
+
+define i32 @add32mi_FS_ADSIZE(i32 %i) {
+; CHECK-LABEL: add32mi_FS_ADSIZE:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl %fs:0, %eax
+; CHECK-NEXT: addl $4660, (%eax,%edi,4), %eax # imm = 0x1234
+; CHECK-NEXT: retq
+entry:
+ %0 = tail call ptr @llvm.thread.pointer()
+ %arrayidx = getelementptr inbounds i32, ptr %0, i32 %i
+ %1 = load i32, ptr %arrayidx
+ %add = add nsw i32 %1, 4660
+ ret i32 %add
+}
+
+define i32 @add32mi_FS_SIB(i32 %i) {
+; CHECK-LABEL: add32mi_FS_SIB:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl %fs:0, %eax
+; CHECK-NEXT: addl $4660, %eax # imm = 0x1234
+; CHECK-NEXT: retq
+entry:
+ %0 = tail call ptr @llvm.thread.pointer()
+ %arrayidx = getelementptr inbounds i32, ptr %0, i32 0
+ %1 = load i32, ptr %arrayidx
+ %add = add nsw i32 %1, 4660
+ ret i32 %add
+}
+
+define i32 @add32mi_GS_ADSIZE(ptr addrspace(256) %a) {
+; CHECK-LABEL: add32mi_GS_ADSIZE:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl %gs:4936(%edi), %eax
+; CHECK-NEXT: addl $123456, %eax # imm = 0x1E240
+; CHECK-NEXT: retq
+entry:
+ %arrayidx = getelementptr inbounds i32, ptr addrspace(256) %a, i32 1234
+ %t = load i32, ptr addrspace(256) %arrayidx
+ %add = add i32 %t, 123456
+ ret i32 %add
+}
diff --git a/llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll b/llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll
new file mode 100644
index 00000000000000..30c485836797f5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/long-instruction-fixup.ll
@@ -0,0 +1,254 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs | FileCheck %s
+
+define i32 @add32mi_GS() {
+; CHECK-LABEL: add32mi_GS:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl %gs:255, %eax
+; CHECK-NEXT: addl $123456, %eax # imm = 0x1E240
+; CHECK-NEXT: retq
+entry:
+ %a= inttoptr i32 255 to ptr addrspace(256)
+ %t = load i32, ptr addrspace(256) %a
+ %add = add i32 %t, 123456
+ ret i32 %add
+}
+
+define i64 @add64mi_FS() {
+; CHECK-LABEL: add64mi_FS:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %fs:255, %rax
+; CHECK-NEXT: addq $123456, %rax # imm = 0x1E240
+; CHECK-NEXT: retq
+entry:
+ %a= inttoptr i64 255 to ptr addrspace(257)
+ %t = load i64, ptr addrspace(257) %a
+ %add = add i64 %t, 123456
+ ret i64 %add
+}
+
+define i32 @sub32mi_GS() {
+; CHECK-LABEL: sub32mi_GS:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl %gs:255, %eax
+; CHECK-NEXT: addl $129, %eax
+; CHECK-NEXT: retq
+entry:
+ %a= inttoptr i32 255 to ptr addrspace(256)
+ %t = load i32, ptr addrspace(256) %a
+ %sub = sub i32 %t, -129
+ ret i32 %sub
+}
+
+define i64 @sub64mi_FS() {
+; CHECK-LABEL: sub64mi_FS:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %fs:255, %rax
+; CHECK-NEXT: subq $-2147483648, %rax # imm = 0x80000000
+; CHECK-NEXT: retq
+entry:
+ %a= inttoptr i64 255 to ptr addrspace(257)
+ %t = load i64, ptr addrspace(257) %a
+ %sub = sub i64 %t, -2147483648
+ ret i64 %sub
+}
+
+define i32 @and32mi_GS() {
+; CHECK-LABEL: and32mi_GS:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl %gs:255, %eax
+; CHECK-NEXT: andl $-129, %eax
+; CHECK-NEXT: retq
+entry:
+ %a= inttoptr i32 255 to ptr addrspace(256)
+ %t = load i32, ptr addrspace(256) %a
+ %and = and i32 %t, -129
+ ret i32 %and
+}
+
+define i64 @and64mi_FS() {
+; CHECK-LABEL: and64mi_FS:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %fs:255, %rax
+; CHECK-NEXT: andq $-2147483648, %rax # imm = 0x80000000
+; CHECK-NEXT: retq
+entry:
+ %a= inttoptr i64 255 to ptr addrspace(257)
+ %t = load i64, ptr addrspace(257) %a
+ %and = and i64 %t, -2147483648
+ ret i64 %and
+}
+
+define i32 @or32mi_GS() {
+; CHECK-LABEL: or32mi_GS:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl %gs:255, %eax
+; CHECK-NEXT: orl $-129, %eax
+; CHECK-NEXT: retq
+entry:
+ %a= inttoptr i32 255 to ptr addrspace(256)
+ %t = load i32, ptr addrspace(256) %a
+ %or = or i32 %t, -129
+ ret i32 %or
+}
+
+define i64 @or64mi_FS() {
+; CHECK-LABEL: or64mi_FS:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %fs:255, %rax
+; CHECK-NEXT: orq $-2147483648, %rax # imm = 0x80000000
+; CHECK-NEXT: retq
+entry:
+ %a= inttoptr i64 255 to ptr addrspace(257)
+ %t = load i64, ptr addrspace(257) %a
+ %or = or i64 %t, -2147483648
+ ret i64 %or
+}
+
+define i32 @xor32mi_GS() {
+; CHECK-LABEL: xor32mi_GS:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl %gs:255, %eax
+; CHECK-NEXT: xorl $-129, %eax
+; CHECK-NEXT: retq
+entry:
+ %a= inttoptr i32 255 to ptr addrspace(256)
+ %t = load i32, ptr addrspace(256) %a
+ %xor = xor i32 %t, -129
+ ret i32 %xor
+}
+
+define i64 @xor64mi_FS() {
+; CHECK-LABEL: xor64mi_FS:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %fs:255, %rax
+; CHECK-NEXT: xorq $-2147483648, %rax # imm = 0x80000000
+; CHECK-NEXT: retq
+entry:
+ %a= inttoptr i64 255 to ptr addrspace(257)
+ %t = load i64, ptr addrspace(257) %a
+ %xor = xor i64 %t, -2147483648
+ ret i64 %xor
+}
+
+define i32 @adc32mi_GS(i32 %x, i32 %y) {
+; CHECK-LABEL: adc32mi_GS:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpl %edi, %esi
+; CHECK-NEXT: movl %gs:255, %eax
+; CHECK-NEXT: adcl $123456, %eax # imm = 0x1E240
+; CHECK-NEXT: retq
+entry:
+ %a = inttoptr i32 255 to ptr addrspace(256)
+ %t = load i32, ptr addrspace(256) %a
+ %s = add i32 %t, 123456
+ %k = icmp ugt i32 %x, %y
+ %z = zext i1 %k to i32
+ %r = add i32 %s, %z
+ ret i32 %r
+}
+
+define i64 @adc64mi_FS(i64 %x, i64 %y) {
+; CHECK-LABEL: adc64mi_FS:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpq %rdi, %rsi
+; CHECK-NEXT: movq %fs:255, %rax
+; CHECK-NEXT: adcq $123456, %rax # imm = 0x1E240
+; CHECK-NEXT: retq
+entry:
+ %a = inttoptr i64 255 to ptr addrspace(257)
+ %t = load i64, ptr addrspace(257) %a
+ %s = add i64 %t, 123456
+ %k = icmp ugt i64 %x, %y
+ %z = zext i1 %k to i64
+ %r = add i64 %s, %z
+ ret i64 %r
+}
+
+define i32 @sbb32mi_GS(i32 %x, i32 %y) {
+; CHECK-LABEL: sbb32mi_GS:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpl %edi, %esi
+; CHECK-NEXT: sbbl $0, %gs:255, %eax
+; CHECK-NEXT: addl $-123456, %eax # imm = 0xFFFE1DC0
+; CHECK-NEXT: retq
+entry:
+ %a= inttoptr i32 255 to ptr addrspace(256)
+ %t = load i32, ptr addrspace(256) %a
+ %s = sub i32 %t, 123456
+ %k = icmp ugt i32 %x, %y
+ %z = zext i1 %k to i32
+ %r = sub i32 %s, %z
+ ret i32 %r
+}
+
+define i64 @sbb64mi_FS(i64 %x, i64 %y) {
+; CHECK-LABEL: sbb64mi_FS:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpq %rdi, %rsi
+; CHECK-NEXT: sbbq $0, %fs:255, %rax
+; CHECK-NEXT: addq $-123456, %rax # imm = 0xFFFE1DC0
+; CHECK-NEXT: retq
+entry:
+ %a= inttoptr i64 255 to ptr addrspace(257)
+ %t = load i64, ptr addrspace(257) %a
+ %s = sub i64 %t, 123456
+ %k = icmp ugt i64 %x, %y
+ %z = zext i1 %k to i64
+ %r = sub i64 %s, %z
+ ret i64 %r
+}
+
+define i32 @add32mi8_GS() {
+; CHECK-LABEL: add32mi8_GS:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addl $127, %gs:255, %eax
+; CHECK-NEXT: retq
+entry:
+ %a= inttoptr i32 255 to ptr addrspace(256)
+ %t = load i32, ptr addrspace(256) %a
+ %add = add i32 %t, 127
+ ret i32 %add
+}
+
+define i32 @add32mi_GS_Disp0(ptr addrspace(256) %a, i32 %i) {
+; CHECK-LABEL: add32mi_GS_Disp0:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movslq %esi, %rax
+; CHECK-NEXT: addl $123456, %gs:(%rdi,%rax,4), %eax # imm = 0x1E240
+; CHECK-NEXT: retq
+entry:
+ %arrayidx = getelementptr inbounds i32, ptr addrspace(256) %a, i32 %i
+ %t = load i32, ptr addrspace(256) %arrayidx
+ %add = add i32 %t, 123456
+ ret i32 %add
+}
+
+define i32 @add32mi_GS_Disp8(ptr addrspace(256) %a, i32 %i) {
+; CHECK-LABEL: add32mi_GS_Disp8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movslq %esi, %rax
+; CHECK-NEXT: addl $123456, %gs:123(%rdi,%rax,4), %eax # imm = 0x1E240
+; CHECK-NEXT: retq
+entry:
+ %arrayidx = getelementptr inbounds i32, ptr addrspace(256) %a, i32 %i
+ %add.ptr = getelementptr inbounds i8, ptr addrspace(256) %arrayidx, i32 123
+ %t = load i32, ptr addrspace(256) %add.ptr
+ %add = add i32 %t, 123456
+ ret i32 %add
+}
+
+define i32 @add32mi_GS_Disp32(ptr addrspace(256) %a, i32 %i) {
+; CHECK-LABEL: add32mi_GS_Disp32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movslq %esi, %rax
+; CHECK-NEXT: movl %gs:1234(%rdi,%rax,4), %eax
+; CHECK-NEXT: addl $123456, %eax # imm = 0x1E240
+; CHECK-NEXT: retq
+entry:
+ %arrayidx = getelementptr inbounds i32, ptr addrspace(256) %a, i32 %i
+ %add.ptr = getelementptr inbounds i8, ptr addrspace(256) %arrayidx, i32 1234
+ %t = load i32, ptr addrspace(256) %add.ptr
+ %add = add i32 %t, 123456
+ ret i32 %add
+}
More information about the llvm-commits
mailing list