[llvm] [NVPTX, InstCombine] instcombine known pointer AS checks. (PR #114325)

Artem Belevich via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 30 16:06:14 PDT 2024


https://github.com/Artem-B created https://github.com/llvm/llvm-project/pull/114325

The change improves the code in general and, as a side effect, avoids crashing
on an impossible address space casts guarded by `__isGlobal/__isShared`, which
partially fixes https://github.com/llvm/llvm-project/issues/112760
It's still possible to trigger the issue by using explicit AS casts w/o
AS checks, but LLVM should no longer crash on valid code.

This is #112964 + a small fix for the crash on unintended argument access which 
was the root cause to revers the earlier version of the patch.

>From 673d66954933fc6749fb7932d8d7304b80e0e831 Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra at google.com>
Date: Thu, 17 Oct 2024 15:33:00 -0700
Subject: [PATCH 1/7] [NVPTX] instcombine known pointer AS checks.

This avoids crashing on impossible address space casts guarded by `__isGlobal/__isShared`.
---
 llvm/include/llvm/Support/NVPTXAddrSpace.h    |  33 +++
 llvm/lib/Analysis/InstructionSimplify.cpp     |  30 ++
 .../Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h |  12 +-
 .../Transforms/InstCombine/NVPTX/isspacep.ll  | 261 ++++++++++++++++++
 4 files changed, 326 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/Support/NVPTXAddrSpace.h
 create mode 100644 llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll

diff --git a/llvm/include/llvm/Support/NVPTXAddrSpace.h b/llvm/include/llvm/Support/NVPTXAddrSpace.h
new file mode 100644
index 00000000000000..063d2aaffdc57d
--- /dev/null
+++ b/llvm/include/llvm/Support/NVPTXAddrSpace.h
@@ -0,0 +1,33 @@
+//===---------------- AMDGPUAddrSpace.h -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// AMDGPU address space definition
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_NVPTXADDRSPACE_H
+#define LLVM_SUPPORT_NVPTXADDRSPACE_H
+
+namespace llvm {
+namespace NVPTXAS {
+enum AddressSpace : unsigned {
+  ADDRESS_SPACE_GENERIC = 0,
+  ADDRESS_SPACE_GLOBAL = 1,
+  ADDRESS_SPACE_SHARED = 3,
+  ADDRESS_SPACE_CONST = 4,
+  ADDRESS_SPACE_LOCAL = 5,
+
+  ADDRESS_SPACE_PARAM = 101,
+};
+} // end namespace NVPTXAS
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_NVPTXADDRSPACE_H
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index d08be1e55c853e..b525bc27d72b8b 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -38,10 +38,12 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Statepoint.h"
 #include "llvm/Support/KnownBits.h"
+#include "llvm/Support/NVPTXAddrSpace.h"
 #include <algorithm>
 #include <optional>
 using namespace llvm;
@@ -6365,6 +6367,34 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
 
     break;
   }
+  case Intrinsic::nvvm_isspacep_global:
+  case Intrinsic::nvvm_isspacep_local:
+  case Intrinsic::nvvm_isspacep_shared:
+  case Intrinsic::nvvm_isspacep_const: {
+    auto *Ty = F->getReturnType();
+    unsigned AS = Op0->getType()->getPointerAddressSpace();
+    if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC) {
+      if (auto *ASC = dyn_cast<AddrSpaceCastInst>(Op0))
+        AS = ASC->getSrcAddressSpace();
+      else if (auto *CE = dyn_cast<ConstantExpr>(Op0)) {
+        if (CE->getOpcode() == Instruction::AddrSpaceCast)
+          AS = CE->getOperand(0)->getType()->getPointerAddressSpace();
+      }
+    }
+    if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
+        AS == NVPTXAS::ADDRESS_SPACE_PARAM)
+      return nullptr; // Got to check at run-time.
+    bool ASMatches = (AS == NVPTXAS::ADDRESS_SPACE_GLOBAL &&
+                      IID == Intrinsic::nvvm_isspacep_global) ||
+                     (AS == NVPTXAS::ADDRESS_SPACE_LOCAL &&
+                      IID == Intrinsic::nvvm_isspacep_local) ||
+                     (AS == NVPTXAS::ADDRESS_SPACE_SHARED &&
+                      IID == Intrinsic::nvvm_isspacep_shared) ||
+                     (AS == NVPTXAS::ADDRESS_SPACE_CONST &&
+                      IID == Intrinsic::nvvm_isspacep_const);
+    return ConstantInt::get(Ty, ASMatches);
+    break;
+  }
   default:
     break;
   }
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index 815b600fe93a9f..d06e2c00ec3f96 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -16,18 +16,10 @@
 #ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H
 #define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H
 
+#include "llvm/Support/NVPTXAddrSpace.h"
 namespace llvm {
 
-enum AddressSpace {
-  ADDRESS_SPACE_GENERIC = 0,
-  ADDRESS_SPACE_GLOBAL = 1,
-  ADDRESS_SPACE_SHARED = 3,
-  ADDRESS_SPACE_CONST = 4,
-  ADDRESS_SPACE_LOCAL = 5,
-
-  // NVVM Internal
-  ADDRESS_SPACE_PARAM = 101
-};
+using namespace NVPTXAS;
 
 namespace NVPTXII {
 enum {
diff --git a/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll b/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll
new file mode 100644
index 00000000000000..f53ec0120cfb3e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll
@@ -0,0 +1,261 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -mtriple=nvptx64-nvidia-cuda -S | FileCheck %s
+target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; Source data in different AS.
+ at shared_data = dso_local addrspace(3) global i32 undef, align 4
+ at global_data = dso_local addrspace(1) externally_initialized global i32 0, align 4
+ at const_data = dso_local addrspace(4) externally_initialized constant i32 3, align 4
+
+; Results get stored here.
+ at gen = dso_local addrspace(1) externally_initialized global i8 0, align 1
+ at g1 = dso_local addrspace(1) externally_initialized global i8 0, align 1
+ at g2 = dso_local addrspace(1) externally_initialized global i8 0, align 1
+ at s1 = dso_local addrspace(1) externally_initialized global i8 0, align 1
+ at s2 = dso_local addrspace(1) externally_initialized global i8 0, align 1
+ at c1 = dso_local addrspace(1) externally_initialized global i8 0, align 1
+ at c2 = dso_local addrspace(1) externally_initialized global i8 0, align 1
+ at l = dso_local addrspace(1) externally_initialized global i8 0, align 1
+
+declare i1 @llvm.nvvm.isspacep.global(ptr nocapture)
+declare i1 @llvm.nvvm.isspacep.shared(ptr nocapture)
+declare i1 @llvm.nvvm.isspacep.const(ptr nocapture)
+declare i1 @llvm.nvvm.isspacep.local(ptr nocapture)
+
+define dso_local void @check_global(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @check_global(
+; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.global(ptr [[GENERIC_DATA]])
+; CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8
+; CHECK-NEXT:    store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
+; CHECK-NEXT:    store i8 1, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
+; CHECK-NEXT:    store i8 1, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  ; No constant folding for generic pointers of unknown origin.
+  %gen0 = tail call i1 @llvm.nvvm.isspacep.global(ptr %generic_data)
+  %storedv = zext i1 %gen0 to i8
+  store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
+
+  %isg1 = tail call i1 @llvm.nvvm.isspacep.global(ptr addrspacecast (ptr addrspace(1) @global_data to ptr))
+  %isg18 = zext i1 %isg1 to i8
+  store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
+
+  %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr
+  %isg2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %global_data_asc)
+  %isg28 = zext i1 %isg2 to i8
+  store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1
+
+  %iss1 = tail call i1 @llvm.nvvm.isspacep.global(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr))
+  %iss18 = zext i1 %iss1 to i8
+  store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1
+
+  %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr
+  %iss2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %shared_data_asc)
+  %iss28 = zext i1 %iss2 to i8
+  store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1
+
+  %isc1 = tail call i1 @llvm.nvvm.isspacep.global(ptr addrspacecast (ptr addrspace(4) @const_data to ptr))
+  %isc18 = zext i1 %isc1 to i8
+  store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1
+
+  %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr
+  %isc2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %const_data_asc)
+  %isc28 = zext i1 %isc2 to i8
+  store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1
+
+  ; Local data can't ihave a constant address, so we can't have a constant ASC expression
+  ; We can only use an ASC instruction.
+  %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr
+  %isl = call i1 @llvm.nvvm.isspacep.global(ptr nonnull %local_data_asc)
+  %isl8 = zext i1 %isl to i8
+  store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
+
+  ret void
+}
+
+define dso_local void @check_shared(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @check_shared(
+; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.shared(ptr [[GENERIC_DATA]])
+; CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8
+; CHECK-NEXT:    store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1
+; CHECK-NEXT:    store i8 1, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1
+; CHECK-NEXT:    store i8 1, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  ; No constant folding for generic pointers of unknown origin.
+  %gen0 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %generic_data)
+  %storedv = zext i1 %gen0 to i8
+  store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
+
+  %isg1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr addrspacecast (ptr addrspace(1) @global_data to ptr))
+  %isg18 = zext i1 %isg1 to i8
+  store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
+
+  %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr
+  %isg2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %global_data_asc)
+  %isg28 = zext i1 %isg2 to i8
+  store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1
+
+  %iss1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr))
+  %iss18 = zext i1 %iss1 to i8
+  store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1
+
+  %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr
+  %iss2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %shared_data_asc)
+  %iss28 = zext i1 %iss2 to i8
+  store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1
+
+  %isc1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr addrspacecast (ptr addrspace(4) @const_data to ptr))
+  %isc18 = zext i1 %isc1 to i8
+  store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1
+
+  %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr
+  %isc2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %const_data_asc)
+  %isc28 = zext i1 %isc2 to i8
+  store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1
+
+  ; Local data can't have a constant address, so we can't have a constant ASC expression
+  ; We can only use an ASC instruction.
+  %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr
+  %isl = call i1 @llvm.nvvm.isspacep.shared(ptr nonnull %local_data_asc)
+  %isl8 = zext i1 %isl to i8
+  store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
+
+  ret void
+}
+
+define dso_local void @check_const(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @check_const(
+; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.const(ptr [[GENERIC_DATA]])
+; CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8
+; CHECK-NEXT:    store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1
+; CHECK-NEXT:    store i8 1, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1
+; CHECK-NEXT:    store i8 1, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  ; No constant folding for generic pointers of unknown origin.
+  %gen0 = tail call i1 @llvm.nvvm.isspacep.const(ptr %generic_data)
+  %storedv = zext i1 %gen0 to i8
+  store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
+
+  %isg1 = tail call i1 @llvm.nvvm.isspacep.const(ptr addrspacecast (ptr addrspace(1) @global_data to ptr))
+  %isg18 = zext i1 %isg1 to i8
+  store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
+
+  %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr
+  %isg2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %global_data_asc)
+  %isg28 = zext i1 %isg2 to i8
+  store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1
+
+  %iss1 = tail call i1 @llvm.nvvm.isspacep.const(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr))
+  %iss18 = zext i1 %iss1 to i8
+  store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1
+
+  %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr
+  %iss2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %shared_data_asc)
+  %iss28 = zext i1 %iss2 to i8
+  store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1
+
+  %isc1 = tail call i1 @llvm.nvvm.isspacep.const(ptr addrspacecast (ptr addrspace(4) @const_data to ptr))
+  %isc18 = zext i1 %isc1 to i8
+  store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1
+
+  %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr
+  %isc2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %const_data_asc)
+  %isc28 = zext i1 %isc2 to i8
+  store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1
+
+  ; Local data can't have a constant address, so we can't have a constant ASC expression
+  ; We can only use an ASC instruction.
+  %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr
+  %isl = call i1 @llvm.nvvm.isspacep.const(ptr nonnull %local_data_asc)
+  %isl8 = zext i1 %isl to i8
+  store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
+
+  ret void
+}
+
+define dso_local void @check_local(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @check_local(
+; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.local(ptr [[GENERIC_DATA]])
+; CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8
+; CHECK-NEXT:    store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1
+; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1
+; CHECK-NEXT:    store i8 1, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  ; No constant folding for generic pointers of unknown origin.
+  %gen0 = tail call i1 @llvm.nvvm.isspacep.local(ptr %generic_data)
+  %storedv = zext i1 %gen0 to i8
+  store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
+
+  %isg1 = tail call i1 @llvm.nvvm.isspacep.local(ptr addrspacecast (ptr addrspace(1) @global_data to ptr))
+  %isg18 = zext i1 %isg1 to i8
+  store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
+
+  %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr
+  %isg2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %global_data_asc)
+  %isg28 = zext i1 %isg2 to i8
+  store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1
+
+  %iss1 = tail call i1 @llvm.nvvm.isspacep.local(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr))
+  %iss18 = zext i1 %iss1 to i8
+  store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1
+
+  %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr
+  %iss2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %shared_data_asc)
+  %iss28 = zext i1 %iss2 to i8
+  store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1
+
+  %isc1 = tail call i1 @llvm.nvvm.isspacep.local(ptr addrspacecast (ptr addrspace(4) @const_data to ptr))
+  %isc18 = zext i1 %isc1 to i8
+  store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1
+
+  %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr
+  %isc2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %const_data_asc)
+  %isc28 = zext i1 %isc2 to i8
+  store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1
+
+  ; Local data can't have a constant address, so we can't have a constant ASC expression
+  ; We can only use an ASC instruction.
+  %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr
+  %isl = call i1 @llvm.nvvm.isspacep.local(ptr nonnull %local_data_asc)
+  %isl8 = zext i1 %isl to i8
+  store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
+
+  ret void
+}
+

>From 48b3e6f69b64a82ce0ff16ed8a69cb87a7ac1424 Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra at google.com>
Date: Fri, 18 Oct 2024 12:57:25 -0700
Subject: [PATCH 2/7] Clean up names in comments.

---
 llvm/include/llvm/Support/NVPTXAddrSpace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Support/NVPTXAddrSpace.h b/llvm/include/llvm/Support/NVPTXAddrSpace.h
index 063d2aaffdc57d..93eae39e3d2305 100644
--- a/llvm/include/llvm/Support/NVPTXAddrSpace.h
+++ b/llvm/include/llvm/Support/NVPTXAddrSpace.h
@@ -1,4 +1,4 @@
-//===---------------- AMDGPUAddrSpace.h -------------------------*- C++ -*-===//
+//===---------------- NVPTXAddrSpace.h -------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 //
 /// \file
-/// AMDGPU address space definition
+/// NVPTX address space definition
 ///
 //
 //===----------------------------------------------------------------------===//

>From 401e4f3437e7aeb6a028448b66a07b514900c95b Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra at google.com>
Date: Fri, 25 Oct 2024 11:09:22 -0700
Subject: [PATCH 3/7] Cast directly to AddrSpaceCastOperator

---
 llvm/lib/Analysis/InstructionSimplify.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index b525bc27d72b8b..5090e09f20701f 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -6376,10 +6376,8 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
     if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC) {
       if (auto *ASC = dyn_cast<AddrSpaceCastInst>(Op0))
         AS = ASC->getSrcAddressSpace();
-      else if (auto *CE = dyn_cast<ConstantExpr>(Op0)) {
-        if (CE->getOpcode() == Instruction::AddrSpaceCast)
-          AS = CE->getOperand(0)->getType()->getPointerAddressSpace();
-      }
+      else if (auto *ASCO = dyn_cast<AddrSpaceCastOperator>(Op0))
+        AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace();
     }
     if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
         AS == NVPTXAS::ADDRESS_SPACE_PARAM)

>From fffb81ed92a28c6f43d34bc6e63fe1953ac2bdbc Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra at google.com>
Date: Mon, 28 Oct 2024 14:08:32 -0700
Subject: [PATCH 4/7] Use AS-specific pointers for ASC instruction testing

If we use a pointer to a constant, instcombine collapses it into an ASC operator.
---
 .../Transforms/InstCombine/NVPTX/isspacep.ll  | 112 ++++++++++--------
 1 file changed, 64 insertions(+), 48 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll b/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll
index f53ec0120cfb3e..dedd85e1a8cda8 100644
--- a/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll
+++ b/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll
@@ -23,11 +23,11 @@ declare i1 @llvm.nvvm.isspacep.shared(ptr nocapture)
 declare i1 @llvm.nvvm.isspacep.const(ptr nocapture)
 declare i1 @llvm.nvvm.isspacep.local(ptr nocapture)
 
-define dso_local void @check_global(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr {
+define dso_local void @check_global(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp,
 ; CHECK-LABEL: define dso_local void @check_global(
-; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr {
+; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.global(ptr [[GENERIC_DATA]])
+; CHECK-NEXT:    [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.global(ptr [[GENP]])
 ; CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8
 ; CHECK-NEXT:    store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
 ; CHECK-NEXT:    store i8 1, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
@@ -39,9 +39,13 @@ define dso_local void @check_global(ptr nocapture noundef readnone %out, ptr noc
 ; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
 ; CHECK-NEXT:    ret void
 ;
+  ptr addrspace(1) %gp,
+  ptr addrspace(3) %sp,
+  ptr addrspace(4) %cp,
+  ptr addrspace(5) %lp) local_unnamed_addr {
 entry:
   ; No constant folding for generic pointers of unknown origin.
-  %gen0 = tail call i1 @llvm.nvvm.isspacep.global(ptr %generic_data)
+  %gen0 = tail call i1 @llvm.nvvm.isspacep.global(ptr %genp)
   %storedv = zext i1 %gen0 to i8
   store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
 
@@ -49,8 +53,8 @@ entry:
   %isg18 = zext i1 %isg1 to i8
   store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
 
-  %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr
-  %isg2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %global_data_asc)
+  %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr
+  %isg2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %gp_asc)
   %isg28 = zext i1 %isg2 to i8
   store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1
 
@@ -58,8 +62,8 @@ entry:
   %iss18 = zext i1 %iss1 to i8
   store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1
 
-  %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr
-  %iss2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %shared_data_asc)
+  %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr
+  %iss2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %sp_asc)
   %iss28 = zext i1 %iss2 to i8
   store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1
 
@@ -67,26 +71,26 @@ entry:
   %isc18 = zext i1 %isc1 to i8
   store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1
 
-  %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr
-  %isc2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %const_data_asc)
+  %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr
+  %isc2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %cp_asc)
   %isc28 = zext i1 %isc2 to i8
   store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1
 
   ; Local data can't ihave a constant address, so we can't have a constant ASC expression
   ; We can only use an ASC instruction.
-  %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr
-  %isl = call i1 @llvm.nvvm.isspacep.global(ptr nonnull %local_data_asc)
+  %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr
+  %isl = call i1 @llvm.nvvm.isspacep.global(ptr nonnull %lp_asc)
   %isl8 = zext i1 %isl to i8
   store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
 
   ret void
 }
 
-define dso_local void @check_shared(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr {
+define dso_local void @check_shared(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp,
 ; CHECK-LABEL: define dso_local void @check_shared(
-; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr {
+; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.shared(ptr [[GENERIC_DATA]])
+; CHECK-NEXT:    [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.shared(ptr [[GENP]])
 ; CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8
 ; CHECK-NEXT:    store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
 ; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
@@ -98,9 +102,13 @@ define dso_local void @check_shared(ptr nocapture noundef readnone %out, ptr noc
 ; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
 ; CHECK-NEXT:    ret void
 ;
+  ptr addrspace(1) %gp,
+  ptr addrspace(3) %sp,
+  ptr addrspace(4) %cp,
+  ptr addrspace(5) %lp) local_unnamed_addr {
 entry:
   ; No constant folding for generic pointers of unknown origin.
-  %gen0 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %generic_data)
+  %gen0 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %genp)
   %storedv = zext i1 %gen0 to i8
   store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
 
@@ -108,8 +116,8 @@ entry:
   %isg18 = zext i1 %isg1 to i8
   store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
 
-  %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr
-  %isg2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %global_data_asc)
+  %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr
+  %isg2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %gp_asc)
   %isg28 = zext i1 %isg2 to i8
   store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1
 
@@ -117,8 +125,8 @@ entry:
   %iss18 = zext i1 %iss1 to i8
   store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1
 
-  %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr
-  %iss2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %shared_data_asc)
+  %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr
+  %iss2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %sp_asc)
   %iss28 = zext i1 %iss2 to i8
   store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1
 
@@ -126,26 +134,26 @@ entry:
   %isc18 = zext i1 %isc1 to i8
   store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1
 
-  %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr
-  %isc2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %const_data_asc)
+  %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr
+  %isc2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %cp_asc)
   %isc28 = zext i1 %isc2 to i8
   store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1
 
   ; Local data can't have a constant address, so we can't have a constant ASC expression
   ; We can only use an ASC instruction.
-  %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr
-  %isl = call i1 @llvm.nvvm.isspacep.shared(ptr nonnull %local_data_asc)
+  %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr
+  %isl = call i1 @llvm.nvvm.isspacep.shared(ptr nonnull %lp_asc)
   %isl8 = zext i1 %isl to i8
   store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
 
   ret void
 }
 
-define dso_local void @check_const(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr {
+define dso_local void @check_const(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp,
 ; CHECK-LABEL: define dso_local void @check_const(
-; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr {
+; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.const(ptr [[GENERIC_DATA]])
+; CHECK-NEXT:    [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.const(ptr [[GENP]])
 ; CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8
 ; CHECK-NEXT:    store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
 ; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
@@ -157,9 +165,13 @@ define dso_local void @check_const(ptr nocapture noundef readnone %out, ptr noca
 ; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
 ; CHECK-NEXT:    ret void
 ;
+  ptr addrspace(1) %gp,
+  ptr addrspace(3) %sp,
+  ptr addrspace(4) %cp,
+  ptr addrspace(5) %lp) local_unnamed_addr {
 entry:
   ; No constant folding for generic pointers of unknown origin.
-  %gen0 = tail call i1 @llvm.nvvm.isspacep.const(ptr %generic_data)
+  %gen0 = tail call i1 @llvm.nvvm.isspacep.const(ptr %genp)
   %storedv = zext i1 %gen0 to i8
   store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
 
@@ -167,8 +179,8 @@ entry:
   %isg18 = zext i1 %isg1 to i8
   store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
 
-  %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr
-  %isg2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %global_data_asc)
+  %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr
+  %isg2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %gp_asc)
   %isg28 = zext i1 %isg2 to i8
   store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1
 
@@ -176,8 +188,8 @@ entry:
   %iss18 = zext i1 %iss1 to i8
   store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1
 
-  %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr
-  %iss2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %shared_data_asc)
+  %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr
+  %iss2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %sp_asc)
   %iss28 = zext i1 %iss2 to i8
   store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1
 
@@ -185,26 +197,26 @@ entry:
   %isc18 = zext i1 %isc1 to i8
   store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1
 
-  %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr
-  %isc2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %const_data_asc)
+  %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr
+  %isc2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %cp_asc)
   %isc28 = zext i1 %isc2 to i8
   store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1
 
   ; Local data can't have a constant address, so we can't have a constant ASC expression
   ; We can only use an ASC instruction.
-  %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr
-  %isl = call i1 @llvm.nvvm.isspacep.const(ptr nonnull %local_data_asc)
+  %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr
+  %isl = call i1 @llvm.nvvm.isspacep.const(ptr nonnull %lp_asc)
   %isl8 = zext i1 %isl to i8
   store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
 
   ret void
 }
 
-define dso_local void @check_local(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr {
+define dso_local void @check_local(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp,
 ; CHECK-LABEL: define dso_local void @check_local(
-; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr {
+; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.local(ptr [[GENERIC_DATA]])
+; CHECK-NEXT:    [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.local(ptr [[GENP]])
 ; CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8
 ; CHECK-NEXT:    store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
 ; CHECK-NEXT:    store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
@@ -216,9 +228,13 @@ define dso_local void @check_local(ptr nocapture noundef readnone %out, ptr noca
 ; CHECK-NEXT:    store i8 1, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
 ; CHECK-NEXT:    ret void
 ;
+  ptr addrspace(1) %gp,
+  ptr addrspace(3) %sp,
+  ptr addrspace(4) %cp,
+  ptr addrspace(5) %lp) local_unnamed_addr {
 entry:
   ; No constant folding for generic pointers of unknown origin.
-  %gen0 = tail call i1 @llvm.nvvm.isspacep.local(ptr %generic_data)
+  %gen0 = tail call i1 @llvm.nvvm.isspacep.local(ptr %genp)
   %storedv = zext i1 %gen0 to i8
   store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1
 
@@ -226,8 +242,8 @@ entry:
   %isg18 = zext i1 %isg1 to i8
   store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1
 
-  %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr
-  %isg2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %global_data_asc)
+  %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr
+  %isg2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %gp_asc)
   %isg28 = zext i1 %isg2 to i8
   store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1
 
@@ -235,8 +251,8 @@ entry:
   %iss18 = zext i1 %iss1 to i8
   store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1
 
-  %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr
-  %iss2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %shared_data_asc)
+  %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr
+  %iss2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %sp_asc)
   %iss28 = zext i1 %iss2 to i8
   store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1
 
@@ -244,15 +260,15 @@ entry:
   %isc18 = zext i1 %isc1 to i8
   store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1
 
-  %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr
-  %isc2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %const_data_asc)
+  %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr
+  %isc2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %cp_asc)
   %isc28 = zext i1 %isc2 to i8
   store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1
 
   ; Local data can't have a constant address, so we can't have a constant ASC expression
   ; We can only use an ASC instruction.
-  %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr
-  %isl = call i1 @llvm.nvvm.isspacep.local(ptr nonnull %local_data_asc)
+  %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr
+  %isl = call i1 @llvm.nvvm.isspacep.local(ptr nonnull %lp_asc)
   %isl8 = zext i1 %isl to i8
   store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1
 

>From cc934b31d49b320cc3c55ed39b8ee6896daf0aaa Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra at google.com>
Date: Mon, 28 Oct 2024 14:05:18 -0700
Subject: [PATCH 5/7] Move isspacep intrinsics processing to NVPTX backend

---
 llvm/lib/Analysis/InstructionSimplify.cpp     | 26 --------
 .../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 63 ++++++++++++++++++-
 2 files changed, 60 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 5090e09f20701f..90a92b9781badd 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -6367,32 +6367,6 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
 
     break;
   }
-  case Intrinsic::nvvm_isspacep_global:
-  case Intrinsic::nvvm_isspacep_local:
-  case Intrinsic::nvvm_isspacep_shared:
-  case Intrinsic::nvvm_isspacep_const: {
-    auto *Ty = F->getReturnType();
-    unsigned AS = Op0->getType()->getPointerAddressSpace();
-    if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC) {
-      if (auto *ASC = dyn_cast<AddrSpaceCastInst>(Op0))
-        AS = ASC->getSrcAddressSpace();
-      else if (auto *ASCO = dyn_cast<AddrSpaceCastOperator>(Op0))
-        AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace();
-    }
-    if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
-        AS == NVPTXAS::ADDRESS_SPACE_PARAM)
-      return nullptr; // Got to check at run-time.
-    bool ASMatches = (AS == NVPTXAS::ADDRESS_SPACE_GLOBAL &&
-                      IID == Intrinsic::nvvm_isspacep_global) ||
-                     (AS == NVPTXAS::ADDRESS_SPACE_LOCAL &&
-                      IID == Intrinsic::nvvm_isspacep_local) ||
-                     (AS == NVPTXAS::ADDRESS_SPACE_SHARED &&
-                      IID == Intrinsic::nvvm_isspacep_shared) ||
-                     (AS == NVPTXAS::ADDRESS_SPACE_CONST &&
-                      IID == Intrinsic::nvvm_isspacep_const);
-    return ConstantInt::get(Ty, ASMatches);
-    break;
-  }
   default:
     break;
   }
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index e35ba25b47880f..31087a0054e9fa 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -15,10 +15,12 @@
 #include "llvm/CodeGen/CostTable.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsNVPTX.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Transforms/InstCombine/InstCombiner.h"
 #include <optional>
 using namespace llvm;
@@ -117,7 +119,8 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
 }
 
 // Convert NVVM intrinsics to target-generic LLVM code where possible.
-static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
+static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC,
+                                               IntrinsicInst *II) {
   // Each NVVM intrinsic we can simplify can be replaced with one of:
   //
   //  * an LLVM intrinsic,
@@ -413,11 +416,65 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
   llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
 }
 
+// Returns an instruction pointer (may be nullptr if we do not know the answer).
+// Returns nullopt if `II` is not one of the `isspacep` intrinsics.
+static std::optional<Instruction *>
+handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
+  Value *Op0 = II.getArgOperand(0);
+  // Returns true/false when we know the answer, nullopt otherwise.
+  auto CheckASMatch = [](unsigned IID, unsigned AS) -> std::optional<bool> {
+    if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
+        AS == NVPTXAS::ADDRESS_SPACE_PARAM)
+      return std::nullopt; // Got to check at run-time.
+    switch (IID) {
+    case Intrinsic::nvvm_isspacep_global:
+      return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
+    case Intrinsic::nvvm_isspacep_local:
+      return AS == NVPTXAS::ADDRESS_SPACE_LOCAL;
+    case Intrinsic::nvvm_isspacep_shared:
+      return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
+    case Intrinsic::nvvm_isspacep_shared_cluster:
+      // We can't tell shared from shared_cluster at compile time from AS alone,
+      // but it can't be either is AS is not shared.
+      return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt
+                                                 : std::optional{false};
+    case Intrinsic::nvvm_isspacep_const:
+      return AS == NVPTXAS::ADDRESS_SPACE_CONST;
+    default:
+      llvm_unreachable("Unexpected intrinsic");
+    }
+  };
+
+  switch (auto IID = II.getIntrinsicID()) {
+  case Intrinsic::nvvm_isspacep_global:
+  case Intrinsic::nvvm_isspacep_local:
+  case Intrinsic::nvvm_isspacep_shared:
+  case Intrinsic::nvvm_isspacep_shared_cluster:
+  case Intrinsic::nvvm_isspacep_const: {
+    auto *Ty = II.getType();
+    unsigned AS = Op0->getType()->getPointerAddressSpace();
+    // Peek through ASC to generic AS.
+    // TODO: we could dig deeper through both ASCs and GEPs.
+    if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC)
+      if (auto *ASCO = dyn_cast<AddrSpaceCastOperator>(Op0))
+        AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace();
+
+    if (std::optional<bool> Answer = CheckASMatch(IID, AS))
+      return IC.replaceInstUsesWith(II, ConstantInt::get(Ty, *Answer));
+    return nullptr; // Don't know the answer, got to check at run time.
+  }
+  default:
+    return std::nullopt;
+  }
+}
+
 std::optional<Instruction *>
 NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
-  if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) {
+  if (std::optional<Instruction *> I = handleSpaceCheckIntrinsics(IC, II))
+    return *I;
+  if (Instruction *I = convertNvvmIntrinsicToLlvm(IC, &II))
     return I;
-  }
+
   return std::nullopt;
 }
 

>From fddfffe0db22f8d5ba5b1f009cdb606ab975b5c7 Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra at google.com>
Date: Wed, 30 Oct 2024 14:31:17 -0700
Subject: [PATCH 6/7] revert changes to InstructionSimplify.cpp

---
 llvm/lib/Analysis/InstructionSimplify.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 90a92b9781badd..d08be1e55c853e 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -38,12 +38,10 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicsNVPTX.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Statepoint.h"
 #include "llvm/Support/KnownBits.h"
-#include "llvm/Support/NVPTXAddrSpace.h"
 #include <algorithm>
 #include <optional>
 using namespace llvm;

>From 34e800b7b6eceacc6e32c942d145c5ff6aa5192c Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra at google.com>
Date: Wed, 30 Oct 2024 16:01:05 -0700
Subject: [PATCH 7/7] Do not touch intrinsic operands until we've checked
 intrinsic ID.

Fixes the crash in MLIR and CUDA compilation when they handle some other nvvm intrinsic w/o arguments.
---
 llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 31087a0054e9fa..3507573df1869f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -420,7 +420,6 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC,
 // Returns nullopt if `II` is not one of the `isspacep` intrinsics.
 static std::optional<Instruction *>
 handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
-  Value *Op0 = II.getArgOperand(0);
   // Returns true/false when we know the answer, nullopt otherwise.
   auto CheckASMatch = [](unsigned IID, unsigned AS) -> std::optional<bool> {
     if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
@@ -451,7 +450,7 @@ handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
   case Intrinsic::nvvm_isspacep_shared:
   case Intrinsic::nvvm_isspacep_shared_cluster:
   case Intrinsic::nvvm_isspacep_const: {
-    auto *Ty = II.getType();
+    Value *Op0 = II.getArgOperand(0);
     unsigned AS = Op0->getType()->getPointerAddressSpace();
     // Peek through ASC to generic AS.
     // TODO: we could dig deeper through both ASCs and GEPs.
@@ -460,7 +459,8 @@ handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
         AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace();
 
     if (std::optional<bool> Answer = CheckASMatch(IID, AS))
-      return IC.replaceInstUsesWith(II, ConstantInt::get(Ty, *Answer));
+      return IC.replaceInstUsesWith(II,
+                                    ConstantInt::get(II.getType(), *Answer));
     return nullptr; // Don't know the answer, got to check at run time.
   }
   default:



More information about the llvm-commits mailing list