[llvm] r287875 - [x86] Fixing PR28755 by precomputing the address used in CMPXCHG8B
Nikolai Bozhenov via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 24 05:23:36 PST 2016
Author: n.bozhenov
Date: Thu Nov 24 07:23:35 2016
New Revision: 287875
URL: http://llvm.org/viewvc/llvm-project?rev=287875&view=rev
Log:
[x86] Fixing PR28755 by precomputing the address used in CMPXCHG8B
The bug arises during register allocation on i686 for
CMPXCHG8B instruction when base pointer is needed. CMPXCHG8B
needs 4 implicit registers (EAX, EBX, ECX, EDX) and a memory address,
plus ESI is reserved as the base pointer. With such constraints the only
way register allocator would do its job successfully is when the addressing
mode of the instruction requires only one register. If that is not the case
- we are emitting additional LEA instruction to compute the address.
It fixes PR28755.
Patch by Alexander Ivchenko <alexander.ivchenko at intel.com>
Differential Revision: https://reviews.llvm.org/D25088
Added:
llvm/trunk/test/CodeGen/X86/cmpxchg8b_alloca_regalloc_handling.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrBuilder.h
llvm/trunk/lib/Target/X86/X86InstrCompiler.td
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=287875&r1=287874&r2=287875&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Nov 24 07:23:35 2016
@@ -25153,6 +25153,57 @@ X86TargetLowering::EmitInstrWithCustomIn
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
+ case X86::LCMPXCHG8B: {
+ const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ // In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B
+ // requires a memory operand. If it happens that current architecture is
+ // i686 and for current function we need a base pointer
+ // - which is ESI for i686 - register allocator would not be able to
+ // allocate registers for an address in form of X(%reg, %reg, Y)
+ // - there never would be enough unreserved registers during regalloc
+ // (without the need for base ptr the only option would be X(%edi, %esi, Y).
+ // We are giving a hand to register allocator by precomputing the address in
+ // a new vreg using LEA.
+
+ // If it is not i686 or there is no base pointer - nothing to do here.
+ if (!Subtarget.is32Bit() || !TRI->hasBasePointer(*MF))
+ return BB;
+
+ // Even though this code does not necessarily needs the base pointer to
+ // be ESI, we check for that. The reason: if this assert fails, there are
+ // some changes happened in the compiler base pointer handling, which most
+ // probably have to be addressed somehow here.
+ assert(TRI->getBaseRegister() == X86::ESI &&
+ "LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a "
+ "base pointer in mind");
+
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MVT SPTy = getPointerTy(MF->getDataLayout());
+ const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
+ unsigned computedAddrVReg = MRI.createVirtualRegister(AddrRegClass);
+
+ X86AddressMode AM = getAddressFromInstr(&MI, 0);
+ // Regalloc does not need any help when the memory operand of CMPXCHG8B
+ // does not use index register.
+ if (AM.IndexReg == X86::NoRegister)
+ return BB;
+
+ // After X86TargetLowering::ReplaceNodeResults CMPXCHG8B is glued to its
+ // four operand definitions that are E[ABCD] registers. We skip them and
+ // then insert the LEA.
+ MachineBasicBlock::iterator MBBI(MI);
+ while (MBBI->definesRegister(X86::EAX) || MBBI->definesRegister(X86::EBX) ||
+ MBBI->definesRegister(X86::ECX) || MBBI->definesRegister(X86::EDX))
+ --MBBI;
+ addFullAddress(
+ BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM);
+
+ setDirectAddressInInstr(&MI, 0, computedAddrVReg);
+
+ return BB;
+ }
+ case X86::LCMPXCHG16B:
+ return BB;
case X86::LCMPXCHG8B_SAVE_EBX:
case X86::LCMPXCHG16B_SAVE_RBX: {
unsigned BasePtr =
Modified: llvm/trunk/lib/Target/X86/X86InstrBuilder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrBuilder.h?rev=287875&r1=287874&r2=287875&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrBuilder.h (original)
+++ llvm/trunk/lib/Target/X86/X86InstrBuilder.h Thu Nov 24 07:23:35 2016
@@ -128,6 +128,17 @@ addDirectMem(const MachineInstrBuilder &
return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0).addReg(0);
}
+/// Replace the address used in the instruction with the direct memory
+/// reference.
+static inline void setDirectAddressInInstr(MachineInstr *MI, unsigned Operand,
+ unsigned Reg) {
+ // Direct memory address is in a form of: Reg, 1 (Scale), NoReg, 0, NoReg.
+ MI->getOperand(Operand).setReg(Reg);
+ MI->getOperand(Operand + 1).setImm(1);
+ MI->getOperand(Operand + 2).setReg(0);
+ MI->getOperand(Operand + 3).setImm(0);
+ MI->getOperand(Operand + 4).setReg(0);
+}
static inline const MachineInstrBuilder &
addOffset(const MachineInstrBuilder &MIB, int Offset) {
Modified: llvm/trunk/lib/Target/X86/X86InstrCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrCompiler.td?rev=287875&r1=287874&r2=287875&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrCompiler.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrCompiler.td Thu Nov 24 07:23:35 2016
@@ -723,7 +723,7 @@ defm LOCK_DEC : LOCK_ArithUnOp<0xFE,
multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
SDPatternOperator frag, X86MemOperand x86memop,
InstrItinClass itin> {
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, usesCustomInserter = 1 in {
def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
!strconcat(mnemonic, "\t$ptr"),
[(frag addr:$ptr)], itin>, TB, LOCK;
Added: llvm/trunk/test/CodeGen/X86/cmpxchg8b_alloca_regalloc_handling.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cmpxchg8b_alloca_regalloc_handling.ll?rev=287875&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cmpxchg8b_alloca_regalloc_handling.ll (added)
+++ llvm/trunk/test/CodeGen/X86/cmpxchg8b_alloca_regalloc_handling.ll Thu Nov 24 07:23:35 2016
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 -stackrealign -O2 | FileCheck %s
+; PR28755
+
+; Check that register allocator is able to handle that
+; a-lot-of-fixed-and-reserved-registers case. We do that by
+; emmiting lea before 4 cmpxchg8b operands generators.
+
+define void @foo_alloca(i64* %a, i32 %off, i32 %n) {
+ %dummy = alloca i32, i32 %n
+ %addr = getelementptr inbounds i64, i64* %a, i32 %off
+
+ %res = cmpxchg i64* %addr, i64 0, i64 1 monotonic monotonic
+ ret void
+}
+
+; CHECK-LABEL: foo_alloca
+; CHECK: leal {{\(%e..,%e..,.*\)}}, [[REGISTER:%e.i]]
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: movl $1, %ebx
+; CHECK-NEXT: lock cmpxchg8b ([[REGISTER]])
+
+; If we don't use index register in the address mode -
+; check that we did not generate the lea.
+define void @foo_alloca_direct_address(i64* %addr, i32 %n) {
+ %dummy = alloca i32, i32 %n
+
+ %res = cmpxchg i64* %addr, i64 0, i64 1 monotonic monotonic
+ ret void
+}
+
+; CHECK-LABEL: foo_alloca_direct_address
+; CHECK-NOT: leal {{\(%e.*\)}}, [[REGISTER:%e.i]]
+; CHECK: lock cmpxchg8b ([[REGISTER]])
More information about the llvm-commits
mailing list