[llvm] 68e1a8d - [X86] Defer the creation of LCMPXCHG16B_SAVE_RBX until finalize-isel

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 7 17:01:02 PDT 2020


Author: Craig Topper
Date: 2020-10-07T17:00:43-07:00
New Revision: 68e1a8d20795802077987529e1268c184d749564

URL: https://github.com/llvm/llvm-project/commit/68e1a8d20795802077987529e1268c184d749564
DIFF: https://github.com/llvm/llvm-project/commit/68e1a8d20795802077987529e1268c184d749564.diff

LOG: [X86] Defer the creation of LCMPXCHG16B_SAVE_RBX until finalize-isel

We need to use LCMPXCHG16B_SAVE_RBX if RBX/EBX is being used as
the frame pointer. We previously checked for this during type
legalization, but that's too early to know for sure if the base
pointer is needed.

This patch adds a new pseudo instruction to emit from isel that
uses a virtual register for the RBX input. Then we use the custom
inserter hook to emit LCMPXCHG16B if RBX isn't needed as a base
pointer or LCMPXCHG16B_SAVE_RBX if it is.

Fixes PR42064.

Reviewed By: pengfei

Differential Revision: https://reviews.llvm.org/D88808

Added: 
    llvm/test/CodeGen/X86/pr42064.ll

Modified: 
    llvm/lib/Target/X86/X86ExpandPseudo.cpp
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86ISelLowering.h
    llvm/lib/Target/X86/X86InstrCompiler.td
    llvm/lib/Target/X86/X86InstrInfo.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index a5a1a4ff93e6..d9c0964e9ed8 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -338,9 +338,9 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     // Perform the following transformation.
     // SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx
     // =>
-    // [E|R]BX = InArg
+    // RBX = InArg
     // actualcmpxchg Addr
-    // [E|R]BX = SaveRbx
+    // RBX = SaveRbx
     const MachineOperand &InArg = MBBI->getOperand(6);
     Register SaveRbx = MBBI->getOperand(7).getReg();
 

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 66986a1b9c10..b320df3fec90 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30481,38 +30481,30 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     swapInH =
         DAG.getCopyToReg(cpInH.getValue(0), dl, Regs64bit ? X86::RCX : X86::ECX,
                          swapInH, cpInH.getValue(1));
-    // If the current function needs the base pointer, RBX,
-    // we shouldn't use cmpxchg directly.
-    // Indeed the lowering of that instruction will clobber
-    // that register and since RBX will be a reserved register
-    // the register allocator will not make sure its value will
-    // be properly saved and restored around this live-range.
-    const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+    // In 64-bit mode we might need the base pointer in RBX, but we can't know
+    // until later. So we keep the RBX input in a vreg and use a custom
+    // inserter.
+    // Since RBX will be a reserved register the register allocator will not
+    // make sure its value will be properly saved and restored around this
+    // live-range.
     SDValue Result;
     SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
-    Register BasePtr = TRI->getBaseRegister();
     MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
-    if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
-        (BasePtr == X86::RBX || BasePtr == X86::EBX)) {
-      assert(Regs64bit && "RBX/EBX base pointer only expected for i128 CAS");
-      SDValue RBXSave = DAG.getCopyFromReg(swapInH.getValue(0), dl,
-                                           X86::RBX,
-                                           HalfT, swapInH.getValue(1));
-      SDValue Ops[] = {/*Chain*/ RBXSave.getValue(1), N->getOperand(1), swapInL,
-                       RBXSave,
-                       /*Glue*/ RBXSave.getValue(2)};
-      Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG16_SAVE_RBX_DAG, dl, Tys,
-                                       Ops, T, MMO);
+    if (Regs64bit) {
+      SDValue Ops[] = {swapInH.getValue(0), N->getOperand(1), swapInL,
+                       swapInH.getValue(1)};
+      Result =
+          DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG16_DAG, dl, Tys, Ops, T, MMO);
     } else {
-      unsigned Opcode =
-          Regs64bit ? X86ISD::LCMPXCHG16_DAG : X86ISD::LCMPXCHG8_DAG;
-      swapInL = DAG.getCopyToReg(swapInH.getValue(0), dl,
-                                 Regs64bit ? X86::RBX : X86::EBX, swapInL,
+      swapInL = DAG.getCopyToReg(swapInH.getValue(0), dl, X86::EBX, swapInL,
                                  swapInH.getValue(1));
       SDValue Ops[] = {swapInL.getValue(0), N->getOperand(1),
                        swapInL.getValue(1)};
-      Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO);
+      Result =
+          DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, T, MMO);
     }
+
     SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
                                         Regs64bit ? X86::RAX : X86::EAX,
                                         HalfT, Result.getValue(1));
@@ -30811,7 +30803,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(LCMPXCHG_DAG)
   NODE_NAME_CASE(LCMPXCHG8_DAG)
   NODE_NAME_CASE(LCMPXCHG16_DAG)
-  NODE_NAME_CASE(LCMPXCHG8_SAVE_EBX_DAG)
   NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)
   NODE_NAME_CASE(LADD)
   NODE_NAME_CASE(LSUB)
@@ -33770,11 +33761,31 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
 
     return BB;
   }
-  case X86::LCMPXCHG16B:
-    return BB;
-  case X86::LCMPXCHG16B_SAVE_RBX: {
-    if (!BB->isLiveIn(X86::RBX))
-      BB->addLiveIn(X86::RBX);
+  case X86::LCMPXCHG16B_NO_RBX: {
+    const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+    Register BasePtr = TRI->getBaseRegister();
+    X86AddressMode AM = getAddressFromInstr(&MI, 0);
+    if (TRI->hasBasePointer(*MF) &&
+        (BasePtr == X86::RBX || BasePtr == X86::EBX)) {
+      if (!BB->isLiveIn(BasePtr))
+        BB->addLiveIn(BasePtr);
+      // Save RBX into a virtual register.
+      Register SaveRBX =
+          MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
+      BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX)
+          .addReg(X86::RBX);
+      Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
+      addFullAddress(
+          BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst), AM)
+          .add(MI.getOperand(X86::AddrNumOperands))
+          .addReg(SaveRBX);
+    } else {
+      // Simple case, just copy the virtual register to RBX.
+      BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::RBX)
+          .add(MI.getOperand(X86::AddrNumOperands));
+      addFullAddress(BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B)), AM);
+    }
+    MI.eraseFromParent();
     return BB;
   }
   case X86::MWAITX: {

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index f9bf6fb988eb..24b2c8fd31c2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -756,7 +756,6 @@ namespace llvm {
     LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
     LCMPXCHG8_DAG,
     LCMPXCHG16_DAG,
-    LCMPXCHG8_SAVE_EBX_DAG,
     LCMPXCHG16_SAVE_RBX_DAG,
 
     /// LOCK-prefixed arithmetic read-modify-write instructions.

diff  --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 195ea8b1b127..b0e4bd1ee761 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -809,15 +809,6 @@ let Predicates = [UseIncDec] in {
 }
 
 // Atomic compare and swap.
-multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
-                         SDPatternOperator frag, X86MemOperand x86memop> {
-let isCodeGenOnly = 1, usesCustomInserter = 1 in {
-  def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
-               !strconcat(mnemonic, "\t$ptr"),
-               [(frag addr:$ptr)]>, TB, LOCK;
-}
-}
-
 multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
                           string mnemonic, SDPatternOperator frag> {
 let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in {
@@ -841,14 +832,19 @@ let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in {
 }
 
 let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
-    Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW] in {
-defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
+    Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW],
+    isCodeGenOnly = 1, usesCustomInserter = 1 in {
+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
+                   "cmpxchg8b\t$ptr",
+                   [(X86cas8 addr:$ptr)]>, TB, LOCK;
 }
 
 let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
-    Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW] in {
-defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
-                                 X86cas16, i128mem>, REX_W;
+    Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
+    isCodeGenOnly = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in {
+def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr),
+                     "cmpxchg16b\t$ptr",
+                     []>, TB, LOCK;
 }
 
 // This pseudo must be used when the frame uses RBX as
@@ -872,14 +868,24 @@ defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
 // the value of RBX.
 let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX],
     Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
-    isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst",
-    usesCustomInserter = 1 in {
+    isCodeGenOnly = 1, isPseudo = 1,
+    mayLoad = 1, mayStore = 1, hasSideEffects = 0,
+    Constraints = "$rbx_save = $dst" in {
 def LCMPXCHG16B_SAVE_RBX :
     I<0, Pseudo, (outs GR64:$dst),
-      (ins i128mem:$ptr, GR64:$rbx_input, GR64:$rbx_save),
-      !strconcat("cmpxchg16b", "\t$ptr"),
-      [(set GR64:$dst, (X86cas16save_rbx addr:$ptr, GR64:$rbx_input,
-                                                    GR64:$rbx_save))]>;
+      (ins i128mem:$ptr, GR64:$rbx_input, GR64:$rbx_save), "", []>;
+}
+
+// Pseudo instruction that doesn't read/write RBX. Will be turned into either
+// LCMPXCHG16B_SAVE_RBX or LCMPXCHG16B via a custom inserter.
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RCX, RDX],
+    Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
+    isCodeGenOnly = 1, isPseudo = 1,
+    mayLoad = 1, mayStore = 1, hasSideEffects = 0,
+    usesCustomInserter = 1 in {
+def LCMPXCHG16B_NO_RBX :
+    I<0, Pseudo, (outs), (ins i128mem:$ptr, GR64:$rbx_input), "",
+      [(X86cas16 addr:$ptr, GR64:$rbx_input)]>;
 }
 
 // This pseudo must be used when the frame uses RBX/EBX as

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index ada5c2ffdc0b..5251998b2b5a 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -69,10 +69,8 @@ def SDTX86wrpkru : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
 
 def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
                                      SDTCisVT<2, i8>]>;
-def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-def SDTX86caspairSaveRbx16 : SDTypeProfile<1, 3,
-                                           [SDTCisVT<0, i64>, SDTCisPtrTy<1>,
-                                           SDTCisVT<2, i64>, SDTCisVT<3, i64>]>;
+def SDTX86cas8pair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDTX86cas16pair : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i64>]>;
 
 def SDTLockBinaryArithWithFlags : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
                                                        SDTCisPtrTy<1>,
@@ -171,16 +169,12 @@ def X86wrpkru : SDNode<"X86ISD::WRPKRU",    SDTX86wrpkru,
 def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
                         [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
-def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86caspair,
+def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8pair,
                         [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
-def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair,
+def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86cas16pair,
                         [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
-def X86cas16save_rbx : SDNode<"X86ISD::LCMPXCHG16_SAVE_RBX_DAG",
-                                SDTX86caspairSaveRbx16,
-                                [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
-                                SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
 
 def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
                         [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;

diff  --git a/llvm/test/CodeGen/X86/pr42064.ll b/llvm/test/CodeGen/X86/pr42064.ll
new file mode 100644
index 000000000000..6269a59ff055
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr42064.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc19.11.0 -mattr=+avx,+cx16 | FileCheck %s
+
+%struct.TestStruct = type { %union.Int128 }
+%union.Int128 = type { i128 }
+%struct.SomeArrays = type { %struct.SillyArray, %struct.SillyArray, %struct.SillyArray }
+%struct.SillyArray = type { i8*, i32, i32 }
+
+declare void @llvm.lifetime.start.p0i8(i64, i8*)
+
+define void @foo(%struct.TestStruct* %arg) align 2 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+; Check that %rbx is being used for a frame pointer
+; CHECK-LABEL: foo:
+; CHECK:         movq %rsp, %rbx
+
+; Check that %rbx is saved and restored around both lock cmpxchg16b.
+; CHECK:         movq %rbx, %r9
+; CHECK-NEXT:    movabsq $1393743441367457520, %rcx # imm = 0x135792468ABCDEF0
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:    movq %rcx, %rdx
+; CHECK-NEXT:    movq %rcx, %rbx
+; CHECK-NEXT:    lock cmpxchg16b (%r8)
+; CHECK-NEXT:    movq %r9, %rbx
+
+; CHECK:         movq %rbx, %r9
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:    movq %rcx, %rdx
+; CHECK-NEXT:    movq %rcx, %rbx
+; CHECK-NEXT:    lock cmpxchg16b (%r8)
+; CHECK-NEXT:    movq %r9, %rbx
+bb:
+  %i = alloca %struct.SomeArrays, align 8
+  %i1 = alloca %struct.SomeArrays, align 8
+  %i2 = getelementptr inbounds %struct.TestStruct, %struct.TestStruct* %arg, i64 0, i32 0, i32 0
+  %i3 = cmpxchg i128* %i2, i128 25710028567316702934644703134494809840, i128 25710028567316702934644703134494809840 seq_cst seq_cst
+  %i4 = extractvalue { i128, i1 } %i3, 0
+  %i5 = trunc i128 %i4 to i64
+  %i6 = icmp eq i64 %i5, 0
+  br i1 %i6, label %bb9, label %bb7
+
+bb7:                                              ; preds = %bb
+  %i8 = cmpxchg i128* %i2, i128 25710028567316702934644703134494809840, i128 25710028567316702934644703134494809840 seq_cst seq_cst
+  br label %bb9
+
+bb9:                                              ; preds = %bb7, %bb
+  %i10 = bitcast %struct.SomeArrays* %i to i8*
+  call void @llvm.lifetime.start.p0i8(i64 48, i8* nonnull %i10)
+  call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(48) %i10, i8 0, i64 48, i1 false)
+  %i11 = bitcast %struct.SomeArrays* %i1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 48, i8* nonnull %i11)
+  %i12 = bitcast %struct.SomeArrays* %i1 to i8*
+  call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(48) %i12, i8 0, i64 48, i1 false)
+  %i13 = invoke nonnull align 8 dereferenceable(48) %struct.SomeArrays* @"??4SomeArrays@@QEAAAEAU0@$$QEAU0@@Z"(%struct.SomeArrays* nonnull %i, %struct.SomeArrays* nonnull align 8 dereferenceable(48) %i1)
+          to label %bb14 unwind label %bb45
+
+bb14:                                             ; preds = %bb9
+  call void @llvm.lifetime.end.p0i8(i64 48, i8* nonnull %i10)
+  ret void
+
+bb45:                                             ; preds = %bb9
+  %i46 = cleanuppad within none []
+  %i47 = getelementptr inbounds %struct.SomeArrays, %struct.SomeArrays* %i1, i64 0, i32 2, i32 0
+  %i48 = load i8*, i8** %i47, align 8
+  invoke void @"?free@@YAXPEAX at Z"(i8* %i48) [ "funclet"(token %i46) ]
+          to label %bb51 unwind label %bb49
+
+bb49:                                             ; preds = %bb45
+  %i50 = cleanuppad within %i46 []
+  call void @__std_terminate() [ "funclet"(token %i50) ]
+  unreachable
+
+bb51:                                             ; preds = %bb45
+  %i52 = getelementptr inbounds %struct.SomeArrays, %struct.SomeArrays* %i1, i64 0, i32 1, i32 0
+  %i53 = load i8*, i8** %i52, align 8
+  invoke void @"?free@@YAXPEAX at Z"(i8* %i53) [ "funclet"(token %i46) ]
+          to label %bb56 unwind label %bb54
+
+bb54:                                             ; preds = %bb51
+  %i55 = cleanuppad within %i46 []
+  call void @__std_terminate() [ "funclet"(token %i55) ]
+  unreachable
+
+bb56:                                             ; preds = %bb51
+  call void @llvm.lifetime.end.p0i8(i64 48, i8* nonnull %i10)
+  cleanupret from %i46 unwind to caller
+}
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
+declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1)
+
+declare dso_local i32 @__CxxFrameHandler3(...)
+
+declare nonnull align 8 dereferenceable(48) %struct.SomeArrays* @"??4SomeArrays@@QEAAAEAU0@$$QEAU0@@Z"(%struct.SomeArrays*, %struct.SomeArrays* nonnull align 8 dereferenceable(48)) align 2
+
+declare void @"?free@@YAXPEAX at Z"(i8*)
+
+declare void @__std_terminate()


        


More information about the llvm-commits mailing list