[llvm] [AMDGPU] Utilities to asan instrument memory instructions. (PR #98863)

Tue Jul 23 04:56:33 PDT 2024

================
@@ -0,0 +1,282 @@
+#include "AMDGPUAsanInstrumentation.h"
+
+#define DEBUG_TYPE "amdgpu-asan-instrumentation"
+
+using namespace llvm;
+
+namespace llvm {
+namespace AMDGPU {
+
+const char kAMDGPUBallotName[] = "llvm.amdgcn.ballot.i64";
+const char kAMDGPUUnreachableName[] = "llvm.amdgcn.unreachable";
+const char kAMDGPULDSKernelId[] = "llvm.amdgcn.lds.kernel.id";
+
+static const uint64_t kSmallX86_64ShadowOffsetBase = 0x7FFFFFFF;
+static const uint64_t kSmallX86_64ShadowOffsetAlignMask = ~0xFFFULL;
+
+static uint64_t getRedzoneSizeForScale(int AsanScale) {
+  // Redzone used for stack and globals is at least 32 bytes.
+  // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
+  return std::max(32U, 1U << AsanScale);
+}
+
+static uint64_t getMinRedzoneSizeForGlobal(int AsanScale) {
+  return getRedzoneSizeForScale(AsanScale);
+}
+
+uint64_t getRedzoneSizeForGlobal(int AsanScale, uint64_t SizeInBytes) {
+  constexpr uint64_t kMaxRZ = 1 << 18;
+  const uint64_t MinRZ = getMinRedzoneSizeForGlobal(AsanScale);
+
+  uint64_t RZ = 0;
+  if (SizeInBytes <= MinRZ / 2) {
+    // Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is
+    // at least 32 bytes, optimize when SizeInBytes is less than or equal to
+    // half of MinRZ.
+    RZ = MinRZ - SizeInBytes;
+  } else {
+    // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes.
+    RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ);
+
+    // Round up to multiple of MinRZ.
+    if (SizeInBytes % MinRZ)
+      RZ += MinRZ - (SizeInBytes % MinRZ);
+  }
+
+  assert((RZ + SizeInBytes) % MinRZ == 0);
+
+  return RZ;
+}
+
+static size_t TypeStoreSizeToSizeIndex(uint32_t TypeSize) {
+  size_t Res = llvm::countr_zero(TypeSize / 8);
+  return Res;
+}
+
+static Instruction *genAMDGPUReportBlock(Module &M, IRBuilder<> &IRB,
+                                         Value *Cond, bool Recover) {
+  Value *ReportCond = Cond;
+  if (!Recover) {
+    auto Ballot = M.getOrInsertFunction(kAMDGPUBallotName, IRB.getInt64Ty(),
+                                        IRB.getInt1Ty());
+    ReportCond = IRB.CreateIsNotNull(IRB.CreateCall(Ballot, {Cond}));
+  }
+
+  auto *Trm = SplitBlockAndInsertIfThen(
+      ReportCond, &*IRB.GetInsertPoint(), false,
+      MDBuilder(M.getContext()).createBranchWeights(1, 100000));
+  Trm->getParent()->setName("asan.report");
+
+  if (Recover)
+    return Trm;
+
+  Trm = SplitBlockAndInsertIfThen(Cond, Trm, false);
+  IRB.SetInsertPoint(Trm);
+  return IRB.CreateCall(
+      M.getOrInsertFunction(kAMDGPUUnreachableName, IRB.getVoidTy()), {});
+}
+
+static Value *createSlowPathCmp(Module &M, IRBuilder<> &IRB, Value *AddrLong,
+                                Value *ShadowValue, uint32_t TypeStoreSize,
+                                int AsanScale) {
+
+  unsigned int LongSize = M.getDataLayout().getPointerSizeInBits();
+  IntegerType *IntptrTy = Type::getIntNTy(M.getContext(), LongSize);
+  size_t Granularity = static_cast<size_t>(1) << AsanScale;
+  // Addr & (Granularity - 1)
+  Value *LastAccessedByte =
+      IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
+  // (Addr & (Granularity - 1)) + size - 1
+  if (TypeStoreSize / 8 > 1)
+    LastAccessedByte = IRB.CreateAdd(
+        LastAccessedByte, ConstantInt::get(IntptrTy, TypeStoreSize / 8 - 1));
+  // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
+  LastAccessedByte =
+      IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false);
+  // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
+  return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
+}
+
+static Instruction *generateCrashCode(Module &M, IRBuilder<> &IRB,
+                                      Instruction *InsertBefore, Value *Addr,
+                                      bool IsWrite, size_t AccessSizeIndex,
+                                      Value *SizeArgument, bool Recover) {
+  IRB.SetInsertPoint(InsertBefore);
+  CallInst *Call = nullptr;
+  int LongSize = M.getDataLayout().getPointerSizeInBits();
+  Type *IntptrTy = Type::getIntNTy(M.getContext(), LongSize);
+  const char kAsanReportErrorTemplate[] = "__asan_report_";
+  const std::string TypeStr = IsWrite ? "store" : "load";
+  const std::string EndingStr = Recover ? "_noabort" : "";
+  SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy};
+  AttributeList AL2;
+  FunctionCallee AsanErrorCallbackSized = M.getOrInsertFunction(
+      kAsanReportErrorTemplate + TypeStr + "_n" + EndingStr,
+      FunctionType::get(IRB.getVoidTy(), Args2, false), AL2);
+  const std::string Suffix = TypeStr + llvm::itostr(1ULL << AccessSizeIndex);
+  SmallVector<Type *, 2> Args1{1, IntptrTy};
+  AttributeList AL1;
+  FunctionCallee AsanErrorCallback = M.getOrInsertFunction(
+      kAsanReportErrorTemplate + Suffix + EndingStr,
+      FunctionType::get(IRB.getVoidTy(), Args1, false), AL1);
+  if (SizeArgument) {
+    Call = IRB.CreateCall(AsanErrorCallbackSized, {Addr, SizeArgument});
+  } else {
+    Call = IRB.CreateCall(AsanErrorCallback, Addr);
+  }
+
+  Call->setCannotMerge();
----------------
skc7 wrote:

These utilities are taken from the AddressSanitizer.cpp and haven't changed them much. I assume, setCannotMerge would make the optimizations to not optimize/fold the instruction.

https://github.com/llvm/llvm-project/pull/98863