[llvm] [InstCombine] Don't folder select to or if value argument is user of invalid addrspacecast inst (PR #144686)

Wenju He via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 18 19:53:01 PDT 2025


https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/144686

>From b5c9a4834accbac18664f1431acc5204c0527fa8 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Wed, 18 Jun 2025 14:21:09 +0200
Subject: [PATCH 1/2] [InstCombine] Don't folder select to or if value argument
 is user of invalid addrspacecast inst

In our downstream GPU target, following IR is valid before instcombine
although the second addrspacecast causes UB.
  define i1 @test(ptr addrspace(1) noundef %v) {
    %0 = addrspacecast ptr addrspace(1) %v to ptr addrspace(4)
    %1 = call i32 @llvm.xxxx.isaddr.shared(ptr addrspace(4) %0)
    %2 = icmp eq i32 %1, 0
    %3 = addrspacecast ptr addrspace(4) %0 to ptr addrspace(3)
    %4 = select i1 %2, ptr addrspace(3) null, ptr addrspace(3) %3
    %5 = icmp eq ptr addrspace(3) %4, null
    ret i1 %5
  }
We have a custom optimization that replaces invalid addrspacecast with
poison, and IR is still valid since `select` stops poison propagation.

However, instcombine pass optimizes `select` to `or`:
    %0 = addrspacecast ptr addrspace(1) %v to ptr addrspace(4)
    %1 = call i32 @llvm.xxxx.isaddr.shared(ptr addrspace(4) %0)
    %2 = icmp eq i32 %1, 0
    %3 = addrspacecast ptr addrspace(1) %v to ptr addrspace(3)
    %4 = icmp eq ptr addrspace(3) %3, null
    %5 = or i1 %2, %4
    ret i1 %5
The transform is invalid for our target.
---
 .../InstCombine/InstCombineSelect.cpp         | 35 +++++++++++++++----
 .../InstCombine/AMDGPU/addrspacecast.ll       | 23 ++++++++++++
 2 files changed, 52 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 73ba0f78e8053..a2335640f917b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3194,8 +3194,23 @@ static Instruction *foldNestedSelects(SelectInst &OuterSelVal,
 /// Return true if V is poison or \p Expected given that ValAssumedPoison is
 /// already poison. For example, if ValAssumedPoison is `icmp samesign X, 10`
 /// and V is `icmp ne X, 5`, impliesPoisonOrCond returns true.
-static bool impliesPoisonOrCond(const Value *ValAssumedPoison, const Value *V,
-                                bool Expected) {
+static bool impliesPoisonOrCond(
+    const Value *ValAssumedPoison, const Value *V, bool Expected,
+    llvm::function_ref<bool(unsigned, unsigned)> isValidAddrSpaceCast) {
+  // Handle the case that ValAssumedPoison is `icmp eq ptr addrspace(3) X, null`
+  // and X is `addrspacecast ptr addrspace(1) Y to ptr addrspace(3)`. Target can
+  // replace X with poison if the addrspacecast is invalid. However, `V` might
+  // not be poison.
+  if (auto *ICmp = dyn_cast<ICmpInst>(ValAssumedPoison)) {
+    auto CanCreatePoison = [&](Value *Op) {
+      auto *ASC = dyn_cast<AddrSpaceCastInst>(Op);
+      return ASC && !isValidAddrSpaceCast(ASC->getDestAddressSpace(),
+                                          ASC->getSrcAddressSpace());
+    };
+    if (llvm::any_of(ICmp->operands(), CanCreatePoison))
+      return false;
+  }
+
   if (impliesPoison(ValAssumedPoison, V))
     return true;
 
@@ -3241,17 +3256,23 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
   auto *Zero = ConstantInt::getFalse(SelType);
   Value *A, *B, *C, *D;
 
+  auto IsValidAddrSpaceCast = [&](unsigned FromAS, unsigned ToAS) {
+    return isValidAddrSpaceCast(FromAS, ToAS);
+  };
+
   // Folding select to and/or i1 isn't poison safe in general. impliesPoison
   // checks whether folding it does not convert a well-defined value into
   // poison.
   if (match(TrueVal, m_One())) {
-    if (impliesPoisonOrCond(FalseVal, CondVal, /*Expected=*/false)) {
+    if (impliesPoisonOrCond(FalseVal, CondVal, /*Expected=*/false,
+                            IsValidAddrSpaceCast)) {
       // Change: A = select B, true, C --> A = or B, C
       return BinaryOperator::CreateOr(CondVal, FalseVal);
     }
 
     if (match(CondVal, m_OneUse(m_Select(m_Value(A), m_One(), m_Value(B)))) &&
-        impliesPoisonOrCond(FalseVal, B, /*Expected=*/false)) {
+        impliesPoisonOrCond(FalseVal, B, /*Expected=*/false,
+                            IsValidAddrSpaceCast)) {
       // (A || B) || C --> A || (B | C)
       return replaceInstUsesWith(
           SI, Builder.CreateLogicalOr(A, Builder.CreateOr(B, FalseVal)));
@@ -3287,13 +3308,15 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
   }
 
   if (match(FalseVal, m_Zero())) {
-    if (impliesPoisonOrCond(TrueVal, CondVal, /*Expected=*/true)) {
+    if (impliesPoisonOrCond(TrueVal, CondVal, /*Expected=*/true,
+                            IsValidAddrSpaceCast)) {
       // Change: A = select B, C, false --> A = and B, C
       return BinaryOperator::CreateAnd(CondVal, TrueVal);
     }
 
     if (match(CondVal, m_OneUse(m_Select(m_Value(A), m_Value(B), m_Zero()))) &&
-        impliesPoisonOrCond(TrueVal, B, /*Expected=*/true)) {
+        impliesPoisonOrCond(TrueVal, B, /*Expected=*/true,
+                            IsValidAddrSpaceCast)) {
       // (A && B) && C --> A && (B & C)
       return replaceInstUsesWith(
           SI, Builder.CreateLogicalAnd(A, Builder.CreateAnd(B, TrueVal)));
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll b/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll
new file mode 100644
index 0000000000000..4791d2c434884
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine %s | FileCheck %s
+
+; Check that `select B, true, C` isn't optimized to `or B, C`.
+define i1 @not_fold_select(ptr addrspace(1) noundef %x) {
+; CHECK-LABEL: define i1 @not_fold_select(
+; CHECK-SAME: ptr addrspace(1) noundef [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[X]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[X]] to ptr addrspace(3)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq ptr addrspace(3) [[TMP2]], null
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP1]], i1 true, i1 [[TMP3]]
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  entry:
+  %0 = addrspacecast ptr addrspace(1) %x to ptr
+  %1 = tail call i1 @llvm.amdgcn.is.shared(ptr %0)
+  %2 = addrspacecast ptr %0 to ptr addrspace(3)
+  %3 = select i1 %1, ptr addrspace(3) null, ptr addrspace(3) %2
+  %4 = icmp eq ptr addrspace(3) %3, null
+  ret i1 %4
+}

>From ffff2c3624239c1290e71abcce44f3317aab597a Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Thu, 19 Jun 2025 04:39:06 +0200
Subject: [PATCH 2/2] update LangRef, refine test

---
 llvm/docs/LangRef.rst                             |  3 +++
 .../Transforms/InstCombine/InstCombineSelect.cpp  |  2 +-
 .../InstCombine/AMDGPU/addrspacecast.ll           | 15 ++++++++-------
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index cc72a37f68599..51d28d6215f3a 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -12621,6 +12621,9 @@ have no side effects, and must not capture the value of the pointer.
 If the source is :ref:`poison <poisonvalues>`, the result is
 :ref:`poison <poisonvalues>`.
 
+If the source is not :ref:`poison <poisonvalues>`, and the result pointer is
+non-dereferenceable, the result is :ref:`poison <poisonvalues>`.
+
 If the source is not :ref:`poison <poisonvalues>`, and both source and
 destination are :ref:`integral pointers <nointptrtype>`, and the
 result pointer is dereferenceable, the cast is assumed to be
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index a2335640f917b..c470362ba75d4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3196,7 +3196,7 @@ static Instruction *foldNestedSelects(SelectInst &OuterSelVal,
 /// and V is `icmp ne X, 5`, impliesPoisonOrCond returns true.
 static bool impliesPoisonOrCond(
     const Value *ValAssumedPoison, const Value *V, bool Expected,
-    llvm::function_ref<bool(unsigned, unsigned)> isValidAddrSpaceCast) {
+    function_ref<bool(unsigned, unsigned)> isValidAddrSpaceCast) {
   // Handle the case that ValAssumedPoison is `icmp eq ptr addrspace(3) X, null`
   // and X is `addrspacecast ptr addrspace(1) Y to ptr addrspace(3)`. Target can
   // replace X with poison if the addrspacecast is invalid. However, `V` might
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll b/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll
index 4791d2c434884..2c2dbc796a31d 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll
@@ -10,14 +10,15 @@ define i1 @not_fold_select(ptr addrspace(1) noundef %x) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[TMP0]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[X]] to ptr addrspace(3)
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq ptr addrspace(3) [[TMP2]], null
-; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP1]], i1 true, i1 [[TMP3]]
+; CHECK-NEXT:    [[NOT_IS_SHARED:%.*]] = xor i1 [[TMP1]], true
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[NOT_IS_SHARED]], i1 true, i1 [[TMP3]]
 ; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
   entry:
-  %0 = addrspacecast ptr addrspace(1) %x to ptr
-  %1 = tail call i1 @llvm.amdgcn.is.shared(ptr %0)
-  %2 = addrspacecast ptr %0 to ptr addrspace(3)
-  %3 = select i1 %1, ptr addrspace(3) null, ptr addrspace(3) %2
-  %4 = icmp eq ptr addrspace(3) %3, null
-  ret i1 %4
+  %asc.flat = addrspacecast ptr addrspace(1) %x to ptr
+  %is.shared = tail call i1 @llvm.amdgcn.is.shared(ptr %asc.flat)
+  %asc.shared = addrspacecast ptr %asc.flat to ptr addrspace(3)
+  %shared.addr = select i1 %is.shared, ptr addrspace(3) %asc.shared, ptr addrspace(3) null
+  %result = icmp eq ptr addrspace(3) %shared.addr, null
+  ret i1 %result
 }



More information about the llvm-commits mailing list