[llvm] [AMDGPU] Utilities to asan instrument memory instructions. (PR #98863)

Tue Jul 23 09:35:03 PDT 2024

https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/98863

>From a22e111b8034a8c99f9c84ab9fe93d25a8201912 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Mon, 15 Jul 2024 11:44:01 +0530
Subject: [PATCH 1/4] [AMDGPU] Utilities to asan instrument memory
 instructions.

---
 .../Utils/AMDGPUAsanInstrumentation.cpp       | 282 ++++++++++++++++++
 .../AMDGPU/Utils/AMDGPUAsanInstrumentation.h  |  60 ++++
 llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt   |   1 +
 3 files changed, 343 insertions(+)
 create mode 100644 llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp
 create mode 100644 llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.h

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp
new file mode 100644
index 0000000000000..09d674f42d67b
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp
@@ -0,0 +1,282 @@
+#include "AMDGPUAsanInstrumentation.h"
+
+#define DEBUG_TYPE "amdgpu-asan-instrumentation"
+
+using namespace llvm;
+
+namespace llvm {
+namespace AMDGPU {
+
+const char kAMDGPUBallotName[] = "llvm.amdgcn.ballot.i64";
+const char kAMDGPUUnreachableName[] = "llvm.amdgcn.unreachable";
+const char kAMDGPULDSKernelId[] = "llvm.amdgcn.lds.kernel.id";
+
+static const uint64_t kSmallX86_64ShadowOffsetBase = 0x7FFFFFFF;
+static const uint64_t kSmallX86_64ShadowOffsetAlignMask = ~0xFFFULL;
+
+static uint64_t getRedzoneSizeForScale(int AsanScale) {
+  // Redzone used for stack and globals is at least 32 bytes.
+  // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
+  return std::max(32U, 1U << AsanScale);
+}
+
+static uint64_t getMinRedzoneSizeForGlobal(int AsanScale) {
+  return getRedzoneSizeForScale(AsanScale);
+}
+
+uint64_t getRedzoneSizeForGlobal(int AsanScale, uint64_t SizeInBytes) {
+  constexpr uint64_t kMaxRZ = 1 << 18;
+  const uint64_t MinRZ = getMinRedzoneSizeForGlobal(AsanScale);
+
+  uint64_t RZ = 0;
+  if (SizeInBytes <= MinRZ / 2) {
+    // Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is
+    // at least 32 bytes, optimize when SizeInBytes is less than or equal to
+    // half of MinRZ.
+    RZ = MinRZ - SizeInBytes;
+  } else {
+    // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes.
+    RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ);
+
+    // Round up to multiple of MinRZ.
+    if (SizeInBytes % MinRZ)
+      RZ += MinRZ - (SizeInBytes % MinRZ);
+  }
+
+  assert((RZ + SizeInBytes) % MinRZ == 0);
+
+  return RZ;
+}
+
+static size_t TypeStoreSizeToSizeIndex(uint32_t TypeSize) {
+  size_t Res = llvm::countr_zero(TypeSize / 8);
+  return Res;
+}
+
+static Instruction *genAMDGPUReportBlock(Module &M, IRBuilder<> &IRB,
+                                         Value *Cond, bool Recover) {
+  Value *ReportCond = Cond;
+  if (!Recover) {
+    auto Ballot = M.getOrInsertFunction(kAMDGPUBallotName, IRB.getInt64Ty(),
+                                        IRB.getInt1Ty());
+    ReportCond = IRB.CreateIsNotNull(IRB.CreateCall(Ballot, {Cond}));
+  }
+
+  auto *Trm = SplitBlockAndInsertIfThen(
+      ReportCond, &*IRB.GetInsertPoint(), false,
+      MDBuilder(M.getContext()).createBranchWeights(1, 100000));
+  Trm->getParent()->setName("asan.report");
+
+  if (Recover)
+    return Trm;
+
+  Trm = SplitBlockAndInsertIfThen(Cond, Trm, false);
+  IRB.SetInsertPoint(Trm);
+  return IRB.CreateCall(
+      M.getOrInsertFunction(kAMDGPUUnreachableName, IRB.getVoidTy()), {});
+}
+
+static Value *createSlowPathCmp(Module &M, IRBuilder<> &IRB, Value *AddrLong,
+                                Value *ShadowValue, uint32_t TypeStoreSize,
+                                int AsanScale) {
+
+  unsigned int LongSize = M.getDataLayout().getPointerSizeInBits();
+  IntegerType *IntptrTy = Type::getIntNTy(M.getContext(), LongSize);
+  size_t Granularity = static_cast<size_t>(1) << AsanScale;
+  // Addr & (Granularity - 1)
+  Value *LastAccessedByte =
+      IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
+  // (Addr & (Granularity - 1)) + size - 1
+  if (TypeStoreSize / 8 > 1)
+    LastAccessedByte = IRB.CreateAdd(
+        LastAccessedByte, ConstantInt::get(IntptrTy, TypeStoreSize / 8 - 1));
+  // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
+  LastAccessedByte =
+      IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false);
+  // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
+  return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
+}
+
+static Instruction *generateCrashCode(Module &M, IRBuilder<> &IRB,
+                                      Instruction *InsertBefore, Value *Addr,
+                                      bool IsWrite, size_t AccessSizeIndex,
+                                      Value *SizeArgument, bool Recover) {
+  IRB.SetInsertPoint(InsertBefore);
+  CallInst *Call = nullptr;
+  int LongSize = M.getDataLayout().getPointerSizeInBits();
+  Type *IntptrTy = Type::getIntNTy(M.getContext(), LongSize);
+  const char kAsanReportErrorTemplate[] = "__asan_report_";
+  const std::string TypeStr = IsWrite ? "store" : "load";
+  const std::string EndingStr = Recover ? "_noabort" : "";
+  SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy};
+  AttributeList AL2;
+  FunctionCallee AsanErrorCallbackSized = M.getOrInsertFunction(
+      kAsanReportErrorTemplate + TypeStr + "_n" + EndingStr,
+      FunctionType::get(IRB.getVoidTy(), Args2, false), AL2);
+  const std::string Suffix = TypeStr + llvm::itostr(1ULL << AccessSizeIndex);
+  SmallVector<Type *, 2> Args1{1, IntptrTy};
+  AttributeList AL1;
+  FunctionCallee AsanErrorCallback = M.getOrInsertFunction(
+      kAsanReportErrorTemplate + Suffix + EndingStr,
+      FunctionType::get(IRB.getVoidTy(), Args1, false), AL1);
+  if (SizeArgument) {
+    Call = IRB.CreateCall(AsanErrorCallbackSized, {Addr, SizeArgument});
+  } else {
+    Call = IRB.CreateCall(AsanErrorCallback, Addr);
+  }
+
+  Call->setCannotMerge();
+  return Call;
+}
+
+static Value *memToShadow(Module &M, IRBuilder<> &IRB, Value *Shadow,
+                          int AsanScale, uint32_t AsanOffset) {
+  int LongSize = M.getDataLayout().getPointerSizeInBits();
+  Type *IntptrTy = Type::getIntNTy(M.getContext(), LongSize);
+  // Shadow >> scale
+  Shadow = IRB.CreateLShr(Shadow, AsanScale);
+  if (AsanOffset == 0)
+    return Shadow;
+  // (Shadow >> scale) | offset
+  Value *ShadowBase = ConstantInt::get(IntptrTy, AsanOffset);
+  return IRB.CreateAdd(Shadow, ShadowBase);
+}
+
+void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns,
+                       Instruction *InsertBefore, Value *Addr,
+                       MaybeAlign Alignment, uint32_t TypeStoreSize,
+                       bool IsWrite, Value *SizeArgument, bool UseCalls,
+                       bool Recover, int AsanScale, int AsanOffset) {
+  int LongSize = M.getDataLayout().getPointerSizeInBits();
+  Type *IntptrTy = Type::getIntNTy(M.getContext(), LongSize);
+  IRB.SetInsertPoint(InsertBefore);
+  size_t AccessSizeIndex = TypeStoreSizeToSizeIndex(TypeStoreSize);
+  Type *ShadowTy = IntegerType::get(M.getContext(),
+                                    std::max(8U, TypeStoreSize >> AsanScale));
+  Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
+  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+  Value *ShadowPtr = memToShadow(M, IRB, AddrLong, AsanScale, AsanOffset);
+  const uint64_t ShadowAlign =
+      std::max<uint64_t>(Alignment.valueOrOne().value() >> AsanScale, 1);
+  Value *ShadowValue = IRB.CreateAlignedLoad(
+      ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy), Align(ShadowAlign));
+  Value *Cmp = IRB.CreateIsNotNull(ShadowValue);
+  auto *Cmp2 = createSlowPathCmp(M, IRB, AddrLong, ShadowValue, TypeStoreSize,
+                                 AsanScale);
+  Cmp = IRB.CreateAnd(Cmp, Cmp2);
+  Instruction *CrashTerm = genAMDGPUReportBlock(M, IRB, Cmp, Recover);
+  Instruction *Crash =
+      generateCrashCode(M, IRB, CrashTerm, AddrLong, IsWrite, AccessSizeIndex,
+                        SizeArgument, Recover);
+  if (OrigIns->getDebugLoc())
+    Crash->setDebugLoc(OrigIns->getDebugLoc());
+  return;
+}
+
+void getInterestingMemoryOperands(
+    Module &M, Instruction *I,
+    SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
+  const DataLayout &DL = M.getDataLayout();
+  unsigned int LongSize = M.getDataLayout().getPointerSizeInBits();
+  Type *IntptrTy = Type::getIntNTy(M.getContext(), LongSize);
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
+                             LI->getType(), LI->getAlign());
+  } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+    Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
+                             SI->getValueOperand()->getType(), SI->getAlign());
+  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+    Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
+                             RMW->getValOperand()->getType(), std::nullopt);
+  } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
+    Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
+                             XCHG->getCompareOperand()->getType(),
+                             std::nullopt);
+  } else if (auto CI = dyn_cast<CallInst>(I)) {
+    switch (CI->getIntrinsicID()) {
+    case Intrinsic::masked_load:
+    case Intrinsic::masked_store:
+    case Intrinsic::masked_gather:
+    case Intrinsic::masked_scatter: {
+      bool IsWrite = CI->getType()->isVoidTy();
+      // Masked store has an initial operand for the value.
+      unsigned OpOffset = IsWrite ? 1 : 0;
+      Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
+      MaybeAlign Alignment = Align(1);
+      // Otherwise no alignment guarantees. We probably got Undef.
+      if (auto *Op = dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
+        Alignment = Op->getMaybeAlignValue();
+      Value *Mask = CI->getOperand(2 + OpOffset);
+      Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask);
+      break;
+    }
+    case Intrinsic::masked_expandload:
+    case Intrinsic::masked_compressstore: {
+      bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_compressstore;
+      unsigned OpOffset = IsWrite ? 1 : 0;
+      auto BasePtr = CI->getOperand(OpOffset);
+      MaybeAlign Alignment = BasePtr->getPointerAlignment(DL);
+      Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
+      IRBuilder<> IB(I);
+      Value *Mask = CI->getOperand(1 + OpOffset);
+      // Use the popcount of Mask as the effective vector length.
+      Type *ExtTy = VectorType::get(IntptrTy, cast<VectorType>(Ty));
+      Value *ExtMask = IB.CreateZExt(Mask, ExtTy);
+      Value *EVL = IB.CreateAddReduce(ExtMask);
+      Value *TrueMask = ConstantInt::get(Mask->getType(), 1);
+      Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, TrueMask,
+                               EVL);
+      break;
+    }
+    case Intrinsic::vp_load:
+    case Intrinsic::vp_store:
+    case Intrinsic::experimental_vp_strided_load:
+    case Intrinsic::experimental_vp_strided_store: {
+      auto *VPI = cast<VPIntrinsic>(CI);
+      unsigned IID = CI->getIntrinsicID();
+      bool IsWrite = CI->getType()->isVoidTy();
+      unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
+      Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
+      MaybeAlign Alignment = VPI->getOperand(PtrOpNo)->getPointerAlignment(DL);
+      Value *Stride = nullptr;
+      if (IID == Intrinsic::experimental_vp_strided_store ||
+          IID == Intrinsic::experimental_vp_strided_load) {
+        Stride = VPI->getOperand(PtrOpNo + 1);
+        // Use the pointer alignment as the element alignment if the stride is a
+        // mutiple of the pointer alignment. Otherwise, the element alignment
+        // should be Align(1).
+        unsigned PointerAlign = Alignment.valueOrOne().value();
+        if (!isa<ConstantInt>(Stride) ||
+            cast<ConstantInt>(Stride)->getZExtValue() % PointerAlign != 0)
+          Alignment = Align(1);
+      }
+      Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment,
+                               VPI->getMaskParam(), VPI->getVectorLengthParam(),
+                               Stride);
+      break;
+    }
+    case Intrinsic::vp_gather:
+    case Intrinsic::vp_scatter: {
+      auto *VPI = cast<VPIntrinsic>(CI);
+      unsigned IID = CI->getIntrinsicID();
+      bool IsWrite = IID == Intrinsic::vp_scatter;
+      unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
+      Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
+      MaybeAlign Alignment = VPI->getPointerAlignment();
+      Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment,
+                               VPI->getMaskParam(),
+                               VPI->getVectorLengthParam());
+      break;
+    }
+    default:
+      for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
+        if (!CI->isByValArgument(ArgNo))
+          continue;
+        Type *Ty = CI->getParamByValType(ArgNo);
+        Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
+      }
+    }
+  }
+}
+} // end namespace AMDGPU
+} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.h
new file mode 100644
index 0000000000000..fa417bf97205b
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.h
@@ -0,0 +1,60 @@
+//===- AMDGPUAsanInstrumentation.h - Address Sanitizer related helper functions
+//-*- C++ -*----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H
+#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H
+
+#include "AMDGPU.h"
+#include "AMDGPUBaseInfo.h"
+#include "AMDGPUMemoryUtils.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/OptimizedStructLayout.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
+#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+namespace llvm {
+namespace AMDGPU {
+
+/// Given SizeInBytes of the Value to be instrunmented,
+/// Returns the redzone size corresponding to it.
+uint64_t getRedzoneSizeForGlobal(int Scale, uint64_t SizeInBytes);
+
+/// Instrument the memory operand Addr.
+/// Generates report blocks that catch the addressing errors.
+void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns,
+                       Instruction *InsertBefore, Value *Addr,
+                       MaybeAlign Alignment, uint32_t TypeStoreSize,
+                       bool IsWrite, Value *SizeArgument, bool UseCalls,
+                       bool Recover, int Scale, int Offset);
+
+/// Get all the memory operands from the instruction
+/// that needs to be instrumented
+void getInterestingMemoryOperands(
+    Module &M, Instruction *I,
+    SmallVectorImpl<InterestingMemoryOperand> &Interesting);
+
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H
diff --git a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
index 09b8da9f5dd48..4d69fb67db860 100644
--- a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_llvm_component_library(LLVMAMDGPUUtils
+  AMDGPUAsanInstrumentation.cpp
   AMDGPUAsmUtils.cpp
   AMDGPUBaseInfo.cpp
   AMDGPUDelayedMCExpr.cpp

>From e6de5a9281e9a25e4b281ca15a20b225de547c68 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Tue, 16 Jul 2024 22:15:10 +0530
Subject: [PATCH 2/4] [AMDGPU] Update PR as per review feedback:1

---
 .../Utils/AMDGPUAsanInstrumentation.cpp       | 109 ++++++++++--------
 .../AMDGPU/Utils/AMDGPUAsanInstrumentation.h  |   6 +-
 2 files changed, 63 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp
index 09d674f42d67b..f25eed26ba01b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp
@@ -1,3 +1,11 @@
+//===AMDGPUAsanInstrumentation.cpp - ASAN related helper functions===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-------------------------------------------------------------===//
+
 #include "AMDGPUAsanInstrumentation.h"
 
 #define DEBUG_TYPE "amdgpu-asan-instrumentation"
@@ -7,13 +15,6 @@ using namespace llvm;
 namespace llvm {
 namespace AMDGPU {
 
-const char kAMDGPUBallotName[] = "llvm.amdgcn.ballot.i64";
-const char kAMDGPUUnreachableName[] = "llvm.amdgcn.unreachable";
-const char kAMDGPULDSKernelId[] = "llvm.amdgcn.lds.kernel.id";
-
-static const uint64_t kSmallX86_64ShadowOffsetBase = 0x7FFFFFFF;
-static const uint64_t kSmallX86_64ShadowOffsetAlignMask = ~0xFFFULL;
-
 static uint64_t getRedzoneSizeForScale(int AsanScale) {
   // Redzone used for stack and globals is at least 32 bytes.
   // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
@@ -57,14 +58,14 @@ static Instruction *genAMDGPUReportBlock(Module &M, IRBuilder<> &IRB,
                                          Value *Cond, bool Recover) {
   Value *ReportCond = Cond;
   if (!Recover) {
-    auto Ballot = M.getOrInsertFunction(kAMDGPUBallotName, IRB.getInt64Ty(),
-                                        IRB.getInt1Ty());
-    ReportCond = IRB.CreateIsNotNull(IRB.CreateCall(Ballot, {Cond}));
+    auto *Ballot =
+        IRB.CreateIntrinsic(Intrinsic::amdgcn_ballot, IRB.getInt64Ty(), {Cond});
+    ReportCond = IRB.CreateIsNotNull(Ballot);
   }
 
   auto *Trm = SplitBlockAndInsertIfThen(
       ReportCond, &*IRB.GetInsertPoint(), false,
-      MDBuilder(M.getContext()).createBranchWeights(1, 100000));
+      MDBuilder(M.getContext()).createUnlikelyBranchWeights());
   Trm->getParent()->setName("asan.report");
 
   if (Recover)
@@ -72,17 +73,13 @@ static Instruction *genAMDGPUReportBlock(Module &M, IRBuilder<> &IRB,
 
   Trm = SplitBlockAndInsertIfThen(Cond, Trm, false);
   IRB.SetInsertPoint(Trm);
-  return IRB.CreateCall(
-      M.getOrInsertFunction(kAMDGPUUnreachableName, IRB.getVoidTy()), {});
+  return IRB.CreateIntrinsic(Intrinsic::amdgcn_unreachable, {}, {});
 }
 
-static Value *createSlowPathCmp(Module &M, IRBuilder<> &IRB, Value *AddrLong,
-                                Value *ShadowValue, uint32_t TypeStoreSize,
-                                int AsanScale) {
-
-  unsigned int LongSize = M.getDataLayout().getPointerSizeInBits();
-  IntegerType *IntptrTy = Type::getIntNTy(M.getContext(), LongSize);
-  size_t Granularity = static_cast<size_t>(1) << AsanScale;
+static Value *createSlowPathCmp(Module &M, IRBuilder<> &IRB, Type *IntptrTy,
+                                Value *AddrLong, Value *ShadowValue,
+                                uint32_t TypeStoreSize, int AsanScale) {
+  uint64_t Granularity = static_cast<uint64_t>(1) << AsanScale;
   // Addr & (Granularity - 1)
   Value *LastAccessedByte =
       IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
@@ -98,26 +95,36 @@ static Value *createSlowPathCmp(Module &M, IRBuilder<> &IRB, Value *AddrLong,
 }
 
 static Instruction *generateCrashCode(Module &M, IRBuilder<> &IRB,
-                                      Instruction *InsertBefore, Value *Addr,
-                                      bool IsWrite, size_t AccessSizeIndex,
+                                      Type *IntptrTy, Instruction *InsertBefore,
+                                      Value *Addr, bool IsWrite,
+                                      size_t AccessSizeIndex,
                                       Value *SizeArgument, bool Recover) {
   IRB.SetInsertPoint(InsertBefore);
   CallInst *Call = nullptr;
-  int LongSize = M.getDataLayout().getPointerSizeInBits();
-  Type *IntptrTy = Type::getIntNTy(M.getContext(), LongSize);
-  const char kAsanReportErrorTemplate[] = "__asan_report_";
-  const std::string TypeStr = IsWrite ? "store" : "load";
-  const std::string EndingStr = Recover ? "_noabort" : "";
+  SmallString<128> kAsanReportErrorTemplate{"__asan_report_"};
+  SmallString<64> TypeStr{IsWrite ? "store" : "load"};
+  SmallString<64> EndingStr{Recover ? "_noabort" : ""};
+
+  SmallString<128> AsanErrorCallbackSizedString;
+  raw_svector_ostream AsanErrorCallbackSizedOS(AsanErrorCallbackSizedString);
+  AsanErrorCallbackSizedOS << kAsanReportErrorTemplate << TypeStr << "_n"
+                           << EndingStr;
+
   SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy};
   AttributeList AL2;
   FunctionCallee AsanErrorCallbackSized = M.getOrInsertFunction(
-      kAsanReportErrorTemplate + TypeStr + "_n" + EndingStr,
+      AsanErrorCallbackSizedOS.str(),
       FunctionType::get(IRB.getVoidTy(), Args2, false), AL2);
-  const std::string Suffix = TypeStr + llvm::itostr(1ULL << AccessSizeIndex);
   SmallVector<Type *, 2> Args1{1, IntptrTy};
   AttributeList AL1;
+
+  SmallString<128> AsanErrorCallbackString;
+  raw_svector_ostream AsanErrorCallbackOS(AsanErrorCallbackString);
+  AsanErrorCallbackOS << kAsanReportErrorTemplate << TypeStr
+                      << llvm::itostr(1ULL << AccessSizeIndex) << EndingStr;
+
   FunctionCallee AsanErrorCallback = M.getOrInsertFunction(
-      kAsanReportErrorTemplate + Suffix + EndingStr,
+      AsanErrorCallbackOS.str(),
       FunctionType::get(IRB.getVoidTy(), Args1, false), AL1);
   if (SizeArgument) {
     Call = IRB.CreateCall(AsanErrorCallbackSized, {Addr, SizeArgument});
@@ -129,10 +136,8 @@ static Instruction *generateCrashCode(Module &M, IRBuilder<> &IRB,
   return Call;
 }
 
-static Value *memToShadow(Module &M, IRBuilder<> &IRB, Value *Shadow,
-                          int AsanScale, uint32_t AsanOffset) {
-  int LongSize = M.getDataLayout().getPointerSizeInBits();
-  Type *IntptrTy = Type::getIntNTy(M.getContext(), LongSize);
+static Value *memToShadow(Module &M, IRBuilder<> &IRB, Type *IntptrTy,
+                          Value *Shadow, int AsanScale, uint32_t AsanOffset) {
   // Shadow >> scale
   Shadow = IRB.CreateLShr(Shadow, AsanScale);
   if (AsanOffset == 0)
@@ -147,29 +152,31 @@ void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns,
                        MaybeAlign Alignment, uint32_t TypeStoreSize,
                        bool IsWrite, Value *SizeArgument, bool UseCalls,
                        bool Recover, int AsanScale, int AsanOffset) {
-  int LongSize = M.getDataLayout().getPointerSizeInBits();
-  Type *IntptrTy = Type::getIntNTy(M.getContext(), LongSize);
+  Type *AddrTy = Addr->getType();
+  assert(AddrTy->isPointerTy() && "Address should be pointer type.");
+  Type *IntptrTy = M.getDataLayout().getIntPtrType(
+      M.getContext(), AddrTy->getPointerAddressSpace());
   IRB.SetInsertPoint(InsertBefore);
   size_t AccessSizeIndex = TypeStoreSizeToSizeIndex(TypeStoreSize);
   Type *ShadowTy = IntegerType::get(M.getContext(),
                                     std::max(8U, TypeStoreSize >> AsanScale));
   Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
-  Value *ShadowPtr = memToShadow(M, IRB, AddrLong, AsanScale, AsanOffset);
+  Value *ShadowPtr =
+      memToShadow(M, IRB, IntptrTy, AddrLong, AsanScale, AsanOffset);
   const uint64_t ShadowAlign =
       std::max<uint64_t>(Alignment.valueOrOne().value() >> AsanScale, 1);
   Value *ShadowValue = IRB.CreateAlignedLoad(
       ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy), Align(ShadowAlign));
   Value *Cmp = IRB.CreateIsNotNull(ShadowValue);
-  auto *Cmp2 = createSlowPathCmp(M, IRB, AddrLong, ShadowValue, TypeStoreSize,
-                                 AsanScale);
+  auto *Cmp2 = createSlowPathCmp(M, IRB, IntptrTy, AddrLong, ShadowValue,
+                                 TypeStoreSize, AsanScale);
   Cmp = IRB.CreateAnd(Cmp, Cmp2);
   Instruction *CrashTerm = genAMDGPUReportBlock(M, IRB, Cmp, Recover);
   Instruction *Crash =
-      generateCrashCode(M, IRB, CrashTerm, AddrLong, IsWrite, AccessSizeIndex,
-                        SizeArgument, Recover);
-  if (OrigIns->getDebugLoc())
-    Crash->setDebugLoc(OrigIns->getDebugLoc());
+      generateCrashCode(M, IRB, IntptrTy, CrashTerm, AddrLong, IsWrite,
+                        AccessSizeIndex, SizeArgument, Recover);
+  Crash->setDebugLoc(OrigIns->getDebugLoc());
   return;
 }
 
@@ -177,8 +184,6 @@ void getInterestingMemoryOperands(
     Module &M, Instruction *I,
     SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
   const DataLayout &DL = M.getDataLayout();
-  unsigned int LongSize = M.getDataLayout().getPointerSizeInBits();
-  Type *IntptrTy = Type::getIntNTy(M.getContext(), LongSize);
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
                              LI->getType(), LI->getAlign());
@@ -219,6 +224,8 @@ void getInterestingMemoryOperands(
       Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
       IRBuilder<> IB(I);
       Value *Mask = CI->getOperand(1 + OpOffset);
+      Type *IntptrTy = M.getDataLayout().getIntPtrType(
+          M.getContext(), BasePtr->getType()->getPointerAddressSpace());
       // Use the popcount of Mask as the effective vector length.
       Type *ExtTy = VectorType::get(IntptrTy, cast<VectorType>(Ty));
       Value *ExtMask = IB.CreateZExt(Mask, ExtTy);
@@ -270,10 +277,14 @@ void getInterestingMemoryOperands(
     }
     default:
       for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
-        if (!CI->isByValArgument(ArgNo))
-          continue;
-        Type *Ty = CI->getParamByValType(ArgNo);
-        Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
+        Value *Arg = CI->getArgOperand(ArgNo);
+        if (Arg->getType()->isPointerTy()) {
+          Type *Ty = Arg->getType();
+          Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
+        } else if (CI->isByValArgument(ArgNo)) {
+          Type *Ty = CI->getParamByValType(ArgNo);
+          Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
+        }
       }
     }
   }
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.h
index fa417bf97205b..b438004aa6ce7 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.h
@@ -1,11 +1,10 @@
-//===- AMDGPUAsanInstrumentation.h - Address Sanitizer related helper functions
-//-*- C++ -*----===//
+//===AMDGPUAsanInstrumentation.h - ASAN helper functions -*- C++- *===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-//===----------------------------------------------------------------------------------------===//
+//===--------------------------------------------------------------===//
 
 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H
 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H
@@ -21,6 +20,7 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"

>From 84990caf41bdfccef8dc2ebd0d0effe218d0462d Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Wed, 17 Jul 2024 17:14:34 +0530
Subject: [PATCH 3/4] [AMDGPU] Update PR as per review feedback:2

---
 .../Utils/AMDGPUAsanInstrumentation.cpp       | 45 ++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp
index f25eed26ba01b..71fe5ac022e23 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp
@@ -161,7 +161,7 @@ void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns,
   Type *ShadowTy = IntegerType::get(M.getContext(),
                                     std::max(8U, TypeStoreSize >> AsanScale));
   Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
-  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+  Value *AddrLong = IRB.CreatePtrToInt(Addr, IntptrTy);
   Value *ShadowPtr =
       memToShadow(M, IRB, IntptrTy, AddrLong, AsanScale, AsanOffset);
   const uint64_t ShadowAlign =
@@ -275,6 +275,49 @@ void getInterestingMemoryOperands(
                                VPI->getVectorLengthParam());
       break;
     }
+    case Intrinsic::amdgcn_raw_buffer_load:
+    case Intrinsic::amdgcn_raw_ptr_buffer_load:
+    case Intrinsic::amdgcn_raw_buffer_load_format:
+    case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
+    case Intrinsic::amdgcn_raw_tbuffer_load:
+    case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
+    case Intrinsic::amdgcn_struct_buffer_load:
+    case Intrinsic::amdgcn_struct_ptr_buffer_load:
+    case Intrinsic::amdgcn_struct_buffer_load_format:
+    case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
+    case Intrinsic::amdgcn_struct_tbuffer_load:
+    case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
+    case Intrinsic::amdgcn_s_buffer_load:
+    case Intrinsic::amdgcn_global_load_tr_b64:
+    case Intrinsic::amdgcn_global_load_tr_b128: {
+      unsigned PtrOpNo = 0;
+      bool IsWrite = false;
+      Type *Ty = CI->getType();
+      Value *Ptr = CI->getArgOperand(PtrOpNo);
+      MaybeAlign Alignment = Ptr->getPointerAlignment(DL);
+      Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment);
+      break;
+    }
+    case Intrinsic::amdgcn_raw_tbuffer_store:
+    case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
+    case Intrinsic::amdgcn_raw_buffer_store:
+    case Intrinsic::amdgcn_raw_ptr_buffer_store:
+    case Intrinsic::amdgcn_raw_buffer_store_format:
+    case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
+    case Intrinsic::amdgcn_struct_buffer_store:
+    case Intrinsic::amdgcn_struct_ptr_buffer_store:
+    case Intrinsic::amdgcn_struct_buffer_store_format:
+    case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
+    case Intrinsic::amdgcn_struct_tbuffer_store:
+    case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
+      unsigned PtrOpNo = 1;
+      bool IsWrite = true;
+      Value *Ptr = CI->getArgOperand(PtrOpNo);
+      Type *Ty = Ptr->getType();
+      MaybeAlign Alignment = Ptr->getPointerAlignment(DL);
+      Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment);
+      break;
+    }
     default:
       for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
         Value *Arg = CI->getArgOperand(ArgNo);

>From fc002d46eb03c9344f3b90283bd5d06699d6a3b8 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Tue, 23 Jul 2024 21:59:15 +0530
Subject: [PATCH 4/4] [AMDGPU] Update PR as per review feedback:3

---
 .../AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp       | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp
index 71fe5ac022e23..aeae5cd160c24 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp
@@ -121,7 +121,8 @@ static Instruction *generateCrashCode(Module &M, IRBuilder<> &IRB,
   SmallString<128> AsanErrorCallbackString;
   raw_svector_ostream AsanErrorCallbackOS(AsanErrorCallbackString);
   AsanErrorCallbackOS << kAsanReportErrorTemplate << TypeStr
-                      << llvm::itostr(1ULL << AccessSizeIndex) << EndingStr;
+                      << llvm::format("%d", 1ULL << AccessSizeIndex)
+                      << EndingStr;
 
   FunctionCallee AsanErrorCallback = M.getOrInsertFunction(
       AsanErrorCallbackOS.str(),
@@ -153,7 +154,6 @@ void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns,
                        bool IsWrite, Value *SizeArgument, bool UseCalls,
                        bool Recover, int AsanScale, int AsanOffset) {
   Type *AddrTy = Addr->getType();
-  assert(AddrTy->isPointerTy() && "Address should be pointer type.");
   Type *IntptrTy = M.getDataLayout().getIntPtrType(
       M.getContext(), AddrTy->getPointerAddressSpace());
   IRB.SetInsertPoint(InsertBefore);
@@ -320,13 +320,11 @@ void getInterestingMemoryOperands(
     }
     default:
       for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
-        Value *Arg = CI->getArgOperand(ArgNo);
-        if (Arg->getType()->isPointerTy()) {
-          Type *Ty = Arg->getType();
+        if (Type *Ty = CI->getParamByRefType(ArgNo)) {
           Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
         } else if (CI->isByValArgument(ArgNo)) {
-          Type *Ty = CI->getParamByValType(ArgNo);
-          Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
+          Type *ValTy = CI->getParamByValType(ArgNo);
+          Interesting.emplace_back(I, ArgNo, false, ValTy, Align(1));
         }
       }
     }