[llvm] [x86] Enable indirect tail calls with more arguments (PR #137643)

Hans Wennborg via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 30 06:13:21 PDT 2025


https://github.com/zmodem updated https://github.com/llvm/llvm-project/pull/137643

>From 0d00d2b6d446ec4d0191bdaa5b86fbf7aa0f3b93 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Fri, 25 Apr 2025 17:08:52 +0200
Subject: [PATCH 1/8] (WORK IN PROGRESS) try to tail call address computation
 w/ more than two args

this folds more stuff, but also finds new breakages

Fixes #136848
---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 35 ++++++++++++++++---------
 llvm/test/CodeGen/X86/cfguard-checks.ll |  3 +--
 llvm/test/CodeGen/X86/fold-call-4.ll    | 16 +++++++++++
 3 files changed, 39 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/fold-call-4.ll

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 01118beb9cf5e..be9ca87b781ef 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -897,20 +897,29 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
     Chain = Chain.getOperand(0);
   }
 
-  if (!Chain.getNumOperands())
-    return false;
-  // Since we are not checking for AA here, conservatively abort if the chain
-  // writes to memory. It's not safe to move the callee (a load) across a store.
-  if (isa<MemSDNode>(Chain.getNode()) &&
-      cast<MemSDNode>(Chain.getNode())->writeMem())
+  while (true) {
+    if (!Chain.getNumOperands())
+      return false;
+    // Since we are not checking for AA here, conservatively abort if the chain
+    // writes to memory. It's not safe to move the callee (a load) across a store.
+    if (isa<MemSDNode>(Chain.getNode()) &&
+        cast<MemSDNode>(Chain.getNode())->writeMem())
+      return false;
+
+    if (Chain.getOperand(0).getNode() == Callee.getNode())
+      return true;
+    if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
+        Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
+        Callee.getValue(1).hasOneUse())
+      return true;
+
+    // Look past CopyToReg's.
+    if (Chain.getOperand(0).getOpcode() == ISD::CopyToReg) {
+      Chain = Chain.getOperand(0);
+      continue;
+    }
     return false;
-  if (Chain.getOperand(0).getNode() == Callee.getNode())
-    return true;
-  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
-      Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
-      Callee.getValue(1).hasOneUse())
-    return true;
-  return false;
+  }
 }
 
 static bool isEndbrImm64(uint64_t Imm) {
diff --git a/llvm/test/CodeGen/X86/cfguard-checks.ll b/llvm/test/CodeGen/X86/cfguard-checks.ll
index a727bbbfdcbe3..db19efaf910a3 100644
--- a/llvm/test/CodeGen/X86/cfguard-checks.ll
+++ b/llvm/test/CodeGen/X86/cfguard-checks.ll
@@ -210,8 +210,7 @@ entry:
   ; X64-LABEL: vmptr_thunk:
   ; X64:            movq (%rcx), %rax
   ; X64-NEXT:       movq 8(%rax), %rax
-  ; X64-NEXT:       movq __guard_dispatch_icall_fptr(%rip), %rdx
-  ; X64-NEXT:       rex64 jmpq *%rdx            # TAILCALL
+  ; X64-NEXT:       rex64 jmpq *__guard_dispatch_icall_fptr(%rip) # TAILCALL
   ; X64-NOT:   callq
 }
 
diff --git a/llvm/test/CodeGen/X86/fold-call-4.ll b/llvm/test/CodeGen/X86/fold-call-4.ll
new file mode 100644
index 0000000000000..d22dfc1759613
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fold-call-4.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; The callee address computation should get folded into the call.
+; CHECK-LABEL: f:
+; CHECK-NOT: mov
+; CHECK: jmpq *(%rdi,%rsi,8)
+
+define void @f(ptr %table, i64 %idx) {
+entry:
+  %arrayidx = getelementptr inbounds ptr, ptr %table, i64 %idx
+  %funcptr = load ptr, ptr %arrayidx, align 8
+  tail call void %funcptr(ptr %table, i64 %idx)
+  ret void
+}

>From 4913eea23129249ad20cd24dfe28e271d41e8391 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Mon, 28 Apr 2025 16:03:48 +0200
Subject: [PATCH 2/8] fixes

---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 28 +++++++++++++++++++++++--
 llvm/test/CodeGen/X86/fold-call-4.ll    |  9 +++-----
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index be9ca87b781ef..7d6359f701368 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -890,6 +890,12 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
       LD->getExtensionType() != ISD::NON_EXTLOAD)
     return false;
 
+  // If the load's outgoing chain has more than one use, we can't (currently)
+  // move the load since we'd most likely create a loop. TODO: Maybe it could
+  // work if moveBelowOrigChain() updated *all* the chain users.
+  if (!Callee.getValue(1).hasOneUse())
+    return false;
+
   // Now let's find the callseq_start.
   while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
     if (!Chain.hasOneUse())
@@ -913,11 +919,13 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
         Callee.getValue(1).hasOneUse())
       return true;
 
-    // Look past CopyToReg's.
-    if (Chain.getOperand(0).getOpcode() == ISD::CopyToReg) {
+    // Look past CopyToRegs. We only walk one path, so the chain mustn't branch.
+    if (Chain.getOperand(0).getOpcode() == ISD::CopyToReg &&
+        Chain.getOperand(0).getValue(0).hasOneUse()) {
       Chain = Chain.getOperand(0);
       continue;
     }
+
     return false;
   }
 }
@@ -1362,6 +1370,22 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
          (N->getOpcode() == X86ISD::TC_RETURN &&
           (Subtarget->is64Bit() ||
            !getTargetMachine().isPositionIndependent())))) {
+
+      if (N->getOpcode() == X86ISD::TC_RETURN) {
+        // There needs to be enough non-callee-saved GPRs available to compute
+        // the load address if folded into the tailcall. See how the
+        // X86tcret_6regs and X86tcret_1reg classes are used and defined.
+        unsigned NumRegs = 0;
+        for (unsigned I = 3, E = N->getNumOperands(); I != E; ++I) {
+          if (isa<RegisterSDNode>(N->getOperand(I)))
+            ++NumRegs;
+        }
+        if (!Subtarget->is64Bit() && NumRegs > 1)
+          continue;
+        if (NumRegs > 6)
+          continue;
+      }
+
       /// Also try moving call address load from outside callseq_start to just
       /// before the call to allow it to be folded.
       ///
diff --git a/llvm/test/CodeGen/X86/fold-call-4.ll b/llvm/test/CodeGen/X86/fold-call-4.ll
index d22dfc1759613..708e05a0bfff0 100644
--- a/llvm/test/CodeGen/X86/fold-call-4.ll
+++ b/llvm/test/CodeGen/X86/fold-call-4.ll
@@ -1,16 +1,13 @@
-; RUN: llc < %s | FileCheck %s
-
-target triple = "x86_64-unknown-linux-gnu"
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 
 ; The callee address computation should get folded into the call.
 ; CHECK-LABEL: f:
 ; CHECK-NOT: mov
 ; CHECK: jmpq *(%rdi,%rsi,8)
-
-define void @f(ptr %table, i64 %idx) {
+define void @f(ptr %table, i64 %idx, i64 %aux1, i64 %aux2, i64 %aux3) {
 entry:
   %arrayidx = getelementptr inbounds ptr, ptr %table, i64 %idx
   %funcptr = load ptr, ptr %arrayidx, align 8
-  tail call void %funcptr(ptr %table, i64 %idx)
+  tail call void %funcptr(ptr %table, i64 %idx, i64 %aux1, i64 %aux2, i64 %aux3)
   ret void
 }

>From 4695fe8bd02849f1d8f84cfd91c04a2e912d5e77 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Tue, 29 Apr 2025 11:27:36 +0200
Subject: [PATCH 3/8] format

---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 7d6359f701368..e81b17e7399dc 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -907,7 +907,8 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
     if (!Chain.getNumOperands())
       return false;
     // Since we are not checking for AA here, conservatively abort if the chain
-    // writes to memory. It's not safe to move the callee (a load) across a store.
+    // writes to memory. It's not safe to move the callee (a load) across a
+    // store.
     if (isa<MemSDNode>(Chain.getNode()) &&
         cast<MemSDNode>(Chain.getNode())->writeMem())
       return false;

>From 9d820881b4a18441a9737aa83d7f9157742296b0 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Tue, 29 Apr 2025 13:28:21 +0200
Subject: [PATCH 4/8] don't allow TokenFactor with more than one use (found in
 chromium win64 build)

---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp |  1 +
 llvm/test/CodeGen/X86/fold-call-4.ll    | 17 +++++++++++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index e81b17e7399dc..831ca0560cc77 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -916,6 +916,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
     if (Chain.getOperand(0).getNode() == Callee.getNode())
       return true;
     if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
+        Chain.getOperand(0).getValue(0).hasOneUse() &&
         Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
         Callee.getValue(1).hasOneUse())
       return true;
diff --git a/llvm/test/CodeGen/X86/fold-call-4.ll b/llvm/test/CodeGen/X86/fold-call-4.ll
index 708e05a0bfff0..2c99f2cb62641 100644
--- a/llvm/test/CodeGen/X86/fold-call-4.ll
+++ b/llvm/test/CodeGen/X86/fold-call-4.ll
@@ -1,9 +1,11 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=LIN
+; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefix=WIN
 
 ; The callee address computation should get folded into the call.
 ; CHECK-LABEL: f:
 ; CHECK-NOT: mov
-; CHECK: jmpq *(%rdi,%rsi,8)
+; LIN: jmpq *(%rdi,%rsi,8)
+; WIN: rex64 jmpq *(%rcx,%rdx,8)
 define void @f(ptr %table, i64 %idx, i64 %aux1, i64 %aux2, i64 %aux3) {
 entry:
   %arrayidx = getelementptr inbounds ptr, ptr %table, i64 %idx
@@ -11,3 +13,14 @@ entry:
   tail call void %funcptr(ptr %table, i64 %idx, i64 %aux1, i64 %aux2, i64 %aux3)
   ret void
 }
+
+; Check that we don't assert here. On Win64 this has a TokenFactor with
+; multiple uses, which we can't currently fold.
+define void @thunk(ptr %this, ...) {
+entry:
+  %vtable = load ptr, ptr %this, align 8
+  %vfn = getelementptr inbounds nuw i8, ptr %vtable, i64 8
+  %0 = load ptr, ptr %vfn, align 8
+  musttail call void (ptr, ...) %0(ptr %this, ...)
+  ret void
+}

>From 50851ca4cf4556caa185e171cd799a214998469e Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Tue, 29 Apr 2025 14:35:02 +0200
Subject: [PATCH 5/8] don't allow moving the load across inline asm

---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp |  4 ++++
 llvm/test/CodeGen/X86/fold-call.ll      | 12 ++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 831ca0560cc77..e142d7c9ef9a2 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -912,6 +912,10 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
     if (isa<MemSDNode>(Chain.getNode()) &&
         cast<MemSDNode>(Chain.getNode())->writeMem())
       return false;
+    // Moving across inline asm is not safe: it could do anything.
+    if (Chain.getNode()->getOpcode() == ISD::INLINEASM ||
+        Chain.getNode()->getOpcode() == ISD::INLINEASM_BR)
+      return false;
 
     if (Chain.getOperand(0).getNode() == Callee.getNode())
       return true;
diff --git a/llvm/test/CodeGen/X86/fold-call.ll b/llvm/test/CodeGen/X86/fold-call.ll
index 8be817618cd92..25b4df778768f 100644
--- a/llvm/test/CodeGen/X86/fold-call.ll
+++ b/llvm/test/CodeGen/X86/fold-call.ll
@@ -24,3 +24,15 @@ entry:
   tail call void %0()
   ret void
 }
+
+; Don't fold the load+call if there's inline asm in between.
+; CHECK: test3
+; CHECK: mov{{.*}}
+; CHECK: jmp{{.*}}
+define void @test3(ptr nocapture %x) {
+entry:
+  %0 = load ptr, ptr %x
+  call void asm sideeffect "", ""()  ; It could do anything.
+  tail call void %0()
+  ret void
+}

>From 4b997528430b66fb35dfab307874ba7909cdd80f Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Wed, 30 Apr 2025 10:49:37 +0200
Subject: [PATCH 6/8] only allow moving the load across specific node types

---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index e142d7c9ef9a2..1d2f25d38d3d2 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -906,16 +906,18 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
   while (true) {
     if (!Chain.getNumOperands())
       return false;
-    // Since we are not checking for AA here, conservatively abort if the chain
-    // writes to memory. It's not safe to move the callee (a load) across a
-    // store.
-    if (isa<MemSDNode>(Chain.getNode()) &&
-        cast<MemSDNode>(Chain.getNode())->writeMem())
-      return false;
-    // Moving across inline asm is not safe: it could do anything.
-    if (Chain.getNode()->getOpcode() == ISD::INLINEASM ||
-        Chain.getNode()->getOpcode() == ISD::INLINEASM_BR)
-      return false;
+
+    // It's not safe to move the callee (a load) across e.g. a store.
+    // Conservatively abort if the chain contains a node other than the ones
+    // below.
+    switch (Chain.getNode()->getOpcode()) {
+      case ISD::CALLSEQ_START:
+      case ISD::CopyToReg:
+      case ISD::LOAD:
+        break;
+      default:
+        return false;
+    }
 
     if (Chain.getOperand(0).getNode() == Callee.getNode())
       return true;

>From 27716e1fbf755896c4aa581fb8d8bf39576d4ff0 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Wed, 30 Apr 2025 12:09:38 +0200
Subject: [PATCH 7/8] only count GPRs, and compare against the size of
 getGPRsForTailCall()

---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 1d2f25d38d3d2..9dbe81fdce857 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1380,17 +1380,20 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
            !getTargetMachine().isPositionIndependent())))) {
 
       if (N->getOpcode() == X86ISD::TC_RETURN) {
-        // There needs to be enough non-callee-saved GPRs available to compute
-        // the load address if folded into the tailcall. See how the
+        // There needs to be two non-callee-saved GPRs available to compute the
+        // callee address if the load is folded into the tailcall. See how the
         // X86tcret_6regs and X86tcret_1reg classes are used and defined.
-        unsigned NumRegs = 0;
+        const X86RegisterInfo *RI = Subtarget->getRegisterInfo();
+        unsigned UsedGPRs = 0;
+        // X86tcret args: (*chain, ptr, imm, regs..., glue)
         for (unsigned I = 3, E = N->getNumOperands(); I != E; ++I) {
-          if (isa<RegisterSDNode>(N->getOperand(I)))
-            ++NumRegs;
+          if (const auto *RN = dyn_cast<RegisterSDNode>(N->getOperand(I))) {
+            if (RI->isGeneralPurposeRegister(*MF, RN->getReg()))
+              ++UsedGPRs;
+          }
         }
-        if (!Subtarget->is64Bit() && NumRegs > 1)
-          continue;
-        if (NumRegs > 6)
+        unsigned NumTailCallGPRs = RI->getGPRsForTailCall(*MF)->getNumRegs();
+        if (UsedGPRs + 2 > NumTailCallGPRs)
           continue;
       }
 

>From b60bc943143ddb02873f084177dfd8d865d4cd12 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Wed, 30 Apr 2025 15:09:53 +0200
Subject: [PATCH 8/8] extract the checks in X86tcret_6regs/1reg to a function
 and use it

---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp  | 63 +++++++++++++++++-------
 llvm/lib/Target/X86/X86InstrFragments.td | 19 +------
 2 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9dbe81fdce857..b5848e982ed69 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -618,6 +618,7 @@ namespace {
     bool onlyUsesZeroFlag(SDValue Flags) const;
     bool hasNoSignFlagUses(SDValue Flags) const;
     bool hasNoCarryFlagUses(SDValue Flags) const;
+    bool checkTCRetRegUsage(SDNode *N, LoadSDNode *Load) const;
   };
 
   class X86DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
@@ -1379,23 +1380,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
           (Subtarget->is64Bit() ||
            !getTargetMachine().isPositionIndependent())))) {
 
-      if (N->getOpcode() == X86ISD::TC_RETURN) {
-        // There needs to be two non-callee-saved GPRs available to compute the
-        // callee address if the load is folded into the tailcall. See how the
-        // X86tcret_6regs and X86tcret_1reg classes are used and defined.
-        const X86RegisterInfo *RI = Subtarget->getRegisterInfo();
-        unsigned UsedGPRs = 0;
-        // X86tcret args: (*chain, ptr, imm, regs..., glue)
-        for (unsigned I = 3, E = N->getNumOperands(); I != E; ++I) {
-          if (const auto *RN = dyn_cast<RegisterSDNode>(N->getOperand(I))) {
-            if (RI->isGeneralPurposeRegister(*MF, RN->getReg()))
-              ++UsedGPRs;
-          }
-        }
-        unsigned NumTailCallGPRs = RI->getGPRsForTailCall(*MF)->getNumRegs();
-        if (UsedGPRs + 2 > NumTailCallGPRs)
-          continue;
-      }
+      if (N->getOpcode() == X86ISD::TC_RETURN &&
+          !checkTCRetRegUsage(N, nullptr))
+        continue;
 
       /// Also try moving call address load from outside callseq_start to just
       /// before the call to allow it to be folded.
@@ -3533,6 +3520,48 @@ static bool mayUseCarryFlag(X86::CondCode CC) {
   return true;
 }
 
+bool X86DAGToDAGISel::checkTCRetRegUsage(SDNode *N, LoadSDNode *Load) const {
+  const X86RegisterInfo *RI = Subtarget->getRegisterInfo();
+  const TargetRegisterClass *TailCallGPRs = RI->getGPRsForTailCall(*MF);
+  unsigned MaxGPRs = TailCallGPRs->getNumRegs();
+  if (Subtarget->is64Bit()) {
+    assert(TailCallGPRs->contains(X86::RSP));
+    assert(TailCallGPRs->contains(X86::RIP));
+    MaxGPRs -= 2; // Can't use RSP or RIP for the address in general.
+  } else {
+    assert(TailCallGPRs->contains(X86::ESP));
+    MaxGPRs -= 1; // Can't use ESP for the address in general.
+  }
+
+  // The load's base and index potentially need two registers.
+  unsigned LoadGPRs = 2;
+
+  if (Load) {
+    // But not if it's loading from a frame slot or global.
+    // XXX: Couldn't we be indexing off of the global though?
+    const SDValue& BasePtr = Load->getBasePtr();
+    if (isa<FrameIndexSDNode>(BasePtr)) {
+      LoadGPRs = 0;
+    } else if (BasePtr->getNumOperands() &&
+        isa<GlobalAddressSDNode>(BasePtr->getOperand(0)))
+      LoadGPRs = 0;
+  }
+
+  unsigned TCGPRs = 0;
+  // X86tcret args: (*chain, ptr, imm, regs..., glue)
+  for (unsigned I = 3, E = N->getNumOperands(); I != E; ++I) {
+    if (const auto *RN = dyn_cast<RegisterSDNode>(N->getOperand(I))) {
+      if (!RI->isGeneralPurposeRegister(*MF, RN->getReg()))
+        continue;
+      if (++TCGPRs + LoadGPRs > MaxGPRs)
+        return false;
+    }
+  }
+
+  return true;
+}
+
+
 /// Check whether or not the chain ending in StoreNode is suitable for doing
 /// the {load; op; store} to modify transformation.
 static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index f9d70d1bb5d85..b8573662b1bcd 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -675,27 +675,12 @@ def X86lock_sub_nocf : PatFrag<(ops node:$lhs, node:$rhs),
 
 def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
                              (X86tcret node:$ptr, node:$off), [{
-  // X86tcret args: (*chain, ptr, imm, regs..., glue)
-  unsigned NumRegs = 0;
-  for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)
-    if (isa<RegisterSDNode>(N->getOperand(i)) && ++NumRegs > 6)
-      return false;
-  return true;
+  return checkTCRetRegUsage(N, nullptr);
 }]>;
 
 def X86tcret_1reg : PatFrag<(ops node:$ptr, node:$off),
                              (X86tcret node:$ptr, node:$off), [{
-  // X86tcret args: (*chain, ptr, imm, regs..., glue)
-  unsigned NumRegs = 1;
-  const SDValue& BasePtr = cast<LoadSDNode>(N->getOperand(1))->getBasePtr();
-  if (isa<FrameIndexSDNode>(BasePtr))
-    NumRegs = 3;
-  else if (BasePtr->getNumOperands() && isa<GlobalAddressSDNode>(BasePtr->getOperand(0)))
-    NumRegs = 3;
-  for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)
-    if (isa<RegisterSDNode>(N->getOperand(i)) && ( NumRegs-- == 0))
-      return false;
-  return true;
+  return checkTCRetRegUsage(N, cast<LoadSDNode>(N->getOperand(1)));
 }]>;
 
 // If this is an anyext of the remainder of an 8-bit sdivrem, use a MOVSX



More information about the llvm-commits mailing list