[llvm-branch-commits] CodeGenPrepare, X86: Support sinking phi nodes. (PR #141716)

Tue May 27 21:55:50 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: Peter Collingbourne (pcc)

<details>
<summary>Changes</summary>

isProfitableToSinkOperands() may now return a phi node. If it does,
check that all incoming values are identical and if so, replace the phi
use with a clone of the incoming value. Use this mechanism to sink phi
node operands of shifts whose incoming values are identical casts of
a constant.

This could be improved a bit by changing the isProfitableToSinkOperands()
API so that we don't need to call hasIdenticalValue() again in the caller
but hopefully the case of a PHI operand to a shift is rare enough that
it doesn't matter that we need to do it multiple times.


---
Full diff: https://github.com/llvm/llvm-project/pull/141716.diff


5 Files Affected:

- (modified) llvm/include/llvm/IR/Instructions.h (+5) 
- (modified) llvm/lib/CodeGen/CodeGenPrepare.cpp (+13-4) 
- (modified) llvm/lib/IR/Instructions.cpp (+35) 
- (modified) llvm/lib/Target/X86/X86TargetTransformInfo.cpp (+10-3) 
- (added) llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll (+92) 


``````````diff

diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index c164f76eb335b..e8229036b51db 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -2822,6 +2822,11 @@ class PHINode : public Instruction {
   /// non-undef value.
   bool hasConstantOrUndefValue() const;
 
+  /// If the specified PHI node (possibly via other PHI nodes) merges together
+  /// the same or identical (i.e. Instruction::isIdenticalTo() returns true)
+  /// values, return one of the values, otherwise return null.
+  Value *hasIdenticalValue();
+
   /// If the PHI node is complete which means all of its parent's predecessors
   /// have incoming value in this PHI, return true, otherwise return false.
   bool isComplete() const {
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 52263026d6cea..422916a28669f 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -7739,9 +7739,14 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
 
   for (Use *U : reverse(OpsToSink)) {
     auto *UI = cast<Instruction>(U->get());
-    if (isa<PHINode>(UI))
-      continue;
-    if (UI->getParent() == TargetBB) {
+    if (auto *PN = dyn_cast<PHINode>(UI)) {
+      auto *I0 = dyn_cast<Instruction>(PN->hasIdenticalValue());
+      if (!I0)
+        continue;
+      if (I0->getParent() == TargetBB &&
+          InstOrdering[I0] < InstOrdering[InsertPoint])
+        InsertPoint = I0;
+    } else if (UI->getParent() == TargetBB) {
       if (InstOrdering[UI] < InstOrdering[InsertPoint])
         InsertPoint = UI;
       continue;
@@ -7753,7 +7758,11 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
   DenseMap<Instruction *, Instruction *> NewInstructions;
   for (Use *U : ToReplace) {
     auto *UI = cast<Instruction>(U->get());
-    Instruction *NI = UI->clone();
+    Instruction *NI;
+    if (auto *PN = dyn_cast<PHINode>(UI))
+      NI = cast<Instruction>(PN->hasIdenticalValue())->clone();
+    else
+      NI = UI->clone();
 
     if (IsHugeFunc) {
       // Now we clone an instruction, its operands' defs may sink to this BB
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index f404e11b9c0f0..9c59378213991 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -48,6 +48,7 @@
 #include <cassert>
 #include <cstdint>
 #include <optional>
+#include <set>
 #include <vector>
 
 using namespace llvm;
@@ -239,6 +240,40 @@ bool PHINode::hasConstantOrUndefValue() const {
   return true;
 }
 
+/// If the specified PHI node (possibly via other PHI nodes) merges together the
+/// same or identical (i.e. Instruction::isIdenticalTo() returns true) values,
+/// return one of the values, otherwise return null.
+Value *PHINode::hasIdenticalValue() {
+  std::vector<PHINode *> Worklist;
+  std::set<PHINode *> Seen;
+  Value *Result = nullptr;
+  Worklist.push_back(this);
+  while (!Worklist.empty()) {
+    PHINode *PN = Worklist.back();
+    Worklist.pop_back();
+    if (!Seen.insert(PN).second)
+      continue;
+    for (Value *V : PN->incoming_values()) {
+      if (auto *PN = dyn_cast<PHINode>(V)) {
+        Worklist.push_back(PN);
+        continue;
+      }
+      if (!Result) {
+        Result = V;
+        continue;
+      }
+      if (V == Result)
+        continue;
+      if (auto *I = dyn_cast<Instruction>(V))
+        if (auto *ResultI = dyn_cast<Instruction>(Result))
+          if (I->isIdenticalTo(ResultI))
+            continue;
+      return nullptr;
+    }
+  }
+  return Result;
+}
+
 //===----------------------------------------------------------------------===//
 //                       LandingPadInst Implementation
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 072705bd31b1a..5baf5e3001baa 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -7167,7 +7167,14 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
   }
   if (ShiftAmountOpNum == -1)
     return false;
-  auto *ShiftAmount = &I->getOperandUse(ShiftAmountOpNum);
+  auto *ShiftAmountUse = &I->getOperandUse(ShiftAmountOpNum);
+
+  Value *ShiftAmount = ShiftAmountUse->get();
+  if (auto *PN = dyn_cast<PHINode>(ShiftAmount)) {
+    ShiftAmount = PN->hasIdenticalValue();
+    if (!ShiftAmount)
+      return false;
+  }
 
   // A uniform shift amount in a vector shift or funnel shift may be much
   // cheaper than a generic variable vector shift, so make that pattern visible
@@ -7175,7 +7182,7 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
   auto *Shuf = dyn_cast<ShuffleVectorInst>(ShiftAmount);
   if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 &&
       isVectorShiftByScalarCheap(I->getType())) {
-    Ops.push_back(ShiftAmount);
+    Ops.push_back(ShiftAmountUse);
     return true;
   }
 
@@ -7186,7 +7193,7 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
   // instruction's immediate operand.
   if (auto *CI = dyn_cast<CastInst>(ShiftAmount)) {
     if (isa<ConstantExpr>(CI->getOperand(0))) {
-      Ops.push_back(ShiftAmount);
+      Ops.push_back(ShiftAmountUse);
       return true;
     }
   }
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll
new file mode 100644
index 0000000000000..98f71cdce913d
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll
@@ -0,0 +1,92 @@
+; Make sure that if a phi with identical inputs gets created it gets undone by CodeGenPrepare.
+
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at __typeid__ZTS1S_global_addr = external hidden global [0 x i8], code_model "small"
+ at __typeid__ZTS1S_align = external hidden global [0 x i8], !absolute_symbol !0
+ at __typeid__ZTS1S_size_m1 = external hidden global [0 x i8], !absolute_symbol !1
+
+; Check that we recover the third pair of zexts from the phi.
+
+; CHECK: define void @f4
+define void @f4(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2, ptr noundef %3) #1 {
+  br i1 %0, label %5, label %18
+
+5:
+  %6 = load ptr, ptr %1, align 8
+  %7 = ptrtoint ptr %6 to i64
+  %8 = sub i64 %7, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+  ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+  %9 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
+  %10 = lshr i64 %8, %9
+  ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+  %11 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
+  %12 = shl i64 %8, %11
+  %13 = or i64 %10, %12
+  %14 = icmp ugt i64 %13, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+  br i1 %14, label %15, label %16
+
+15:
+  tail call void @llvm.ubsantrap(i8 2) #5
+  unreachable
+
+16:
+  %17 = load ptr, ptr %6, align 8
+  tail call void %17(ptr noundef nonnull align 8 dereferenceable(8) %1)
+  br label %31
+
+18:
+  %19 = load ptr, ptr %2, align 8
+  %20 = ptrtoint ptr %19 to i64
+  %21 = sub i64 %20, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+  ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+  %22 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
+  %23 = lshr i64 %21, %22
+  ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+  %24 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
+  %25 = shl i64 %21, %24
+  %26 = or i64 %23, %25
+  %27 = icmp ugt i64 %26, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+  br i1 %27, label %28, label %29
+
+28:
+  tail call void @llvm.ubsantrap(i8 2) #5
+  unreachable
+
+29:
+  %30 = load ptr, ptr %19, align 8
+  tail call void %30(ptr noundef nonnull align 8 dereferenceable(8) %2)
+  br label %31
+
+31:
+  %32 = phi i64 [ %24, %29 ], [ %11, %16 ]
+  %33 = phi i64 [ %22, %29 ], [ %9, %16 ]
+  %34 = load ptr, ptr %3, align 8
+  %35 = ptrtoint ptr %34 to i64
+  %36 = sub i64 %35, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
+  ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+  %37 = lshr i64 %36, %33
+  ; CHECK: zext {{.*}} @__typeid__ZTS1S_align
+  %38 = shl i64 %36, %32
+  %39 = or i64 %37, %38
+  %40 = icmp ugt i64 %39, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
+  br i1 %40, label %41, label %42
+
+41:
+  tail call void @llvm.ubsantrap(i8 2) #5
+  unreachable
+
+42:
+  %43 = load ptr, ptr %34, align 8
+  tail call void %43(ptr noundef nonnull align 8 dereferenceable(8) %3)
+  ret void
+}
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.ubsantrap(i8 immarg)
+
+!0 = !{i64 0, i64 256}
+!1 = !{i64 0, i64 128}

``````````

</details>


https://github.com/llvm/llvm-project/pull/141716