[llvm] [hwasan] Optimize outlined memaccess for fixed shadow on Aarch64 (PR #88544)

Thurston Dang via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 12 10:46:29 PDT 2024


https://github.com/thurstond created https://github.com/llvm/llvm-project/pull/88544

The HWASan transform currently always uses x20 to pass the shadow base to hwasan_check_memaccess_shortgranules, even if the shadow base is a constant known at compile time (via -hwasan-mapping-offset). This patch introduces a fixed shadow variant of the hwasan_check_memaccess_shortgranules intrinsic, allowing the shadow base to be materialized inside the memaccess callee.

We currently only support this optimization for AArch64. It is a no-op on other platforms, or if -hwasan-mapping-offset is not specified.

Note: when -hwasan-mapping-offset is specified, it is necessary to specify HWASAN_OPTIONS=fixed_shadow_base=... (see ea991a11b2a3d2bfa545adbefb71cd17e8970a43) to ensure that the runtime will map the shadow appropriately.

>From a8e6fdc2aa9615911512e3447a5d33c7296de996 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Fri, 12 Apr 2024 17:05:42 +0000
Subject: [PATCH] [hwasan] Optimize outlined memaccess for fixed shadow on
 Aarch64

The HWASan transform currently always uses x20 to pass the shadow
base to hwasan_check_memaccess_shortgranules, even if the shadow
base is a constant known at compile time (via -hwasan-mapping-offset).
This patch introduces a fixed shadow variant of the
hwasan_check_memaccess_shortgranules intrinsic, allowing the shadow
base to be materialized inside the memaccess callee.

We currently only support this optimization for AArch64. It is a
no-op on other platforms, or if -hwasan-mapping-offset is not
specified.

Note: when -hwasan-mapping-offset is specified, it is necessary to
specify HWASAN_OPTIONS=fixed_shadow_base=... (see ea991a11b2a3d2bfa545adbefb71cd17e8970a43)
to ensure that the runtime will map the shadow appropriately.
---
 llvm/include/llvm/IR/Intrinsics.td            |  5 ++
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 49 +++++++++++++++----
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  7 +++
 .../Instrumentation/HWAddressSanitizer.cpp    | 28 +++++++++--
 4 files changed, 75 insertions(+), 14 deletions(-)

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index f0723a633f0fc5..dee12d15c69381 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2367,6 +2367,10 @@ def int_hwasan_check_memaccess :
 def int_hwasan_check_memaccess_shortgranules :
   Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
             [ImmArg<ArgIndex<2>>]>;
+def int_hwasan_check_memaccess_fixedshadow_shortgranules :
+  Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i64_ty],
+            [ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
+
 
 // Xray intrinsics
 //===----------------------------------------------------------------------===//
@@ -2666,4 +2670,5 @@ include "llvm/IR/IntrinsicsVE.td"
 include "llvm/IR/IntrinsicsDirectX.td"
 include "llvm/IR/IntrinsicsLoongArch.td"
 
+
 #endif // TEST_INTRINSICS_SUPPRESS_DEFS
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index f6ccd0ecfdc893..5ab54d37f30686 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -117,6 +117,7 @@ class AArch64AsmPrinter : public AsmPrinter {
   void LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, bool Typed);
 
   typedef std::tuple<unsigned, bool, uint32_t> HwasanMemaccessTuple;
+  unsigned long long HwasanFixedShadowBase = (unsigned long long)-1;
   std::map<HwasanMemaccessTuple, MCSymbol *> HwasanMemaccessSymbols;
   void LowerKCFI_CHECK(const MachineInstr &MI);
   void LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI);
@@ -551,8 +552,17 @@ void AArch64AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) {
 void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
   Register Reg = MI.getOperand(0).getReg();
   bool IsShort =
-      MI.getOpcode() == AArch64::HWASAN_CHECK_MEMACCESS_SHORTGRANULES;
+      ((MI.getOpcode() == AArch64::HWASAN_CHECK_MEMACCESS_SHORTGRANULES) ||
+       (MI.getOpcode() == AArch64::HWASAN_CHECK_MEMACCESS_FIXEDSHADOW_SHORTGRANULES));
   uint32_t AccessInfo = MI.getOperand(1).getImm();
+
+  if (MI.getOpcode() == AArch64::HWASAN_CHECK_MEMACCESS_FIXEDSHADOW_SHORTGRANULES) {
+      if (HwasanFixedShadowBase != (unsigned long long)-1)
+        assert(HwasanFixedShadowBase == (unsigned long long) MI.getOperand(2).getImm());
+
+      HwasanFixedShadowBase = MI.getOperand(2).getImm();
+  }
+
   MCSymbol *&Sym =
       HwasanMemaccessSymbols[HwasanMemaccessTuple(Reg, IsShort, AccessInfo)];
   if (!Sym) {
@@ -625,14 +635,34 @@ void AArch64AsmPrinter::emitHwasanMemaccessSymbols(Module &M) {
                                      .addImm(4)
                                      .addImm(55),
                                  *STI);
-    OutStreamer->emitInstruction(
-        MCInstBuilder(AArch64::LDRBBroX)
-            .addReg(AArch64::W16)
-            .addReg(IsShort ? AArch64::X20 : AArch64::X9)
-            .addReg(AArch64::X16)
-            .addImm(0)
-            .addImm(0),
-        *STI);
+
+    if (HwasanFixedShadowBase != (unsigned long long)-1) {
+      assert(IsShort);
+      OutStreamer->emitInstruction(
+          MCInstBuilder(AArch64::MOVZXi)
+              .addReg(AArch64::X17)
+              .addImm(HwasanFixedShadowBase >> 32)
+              .addImm(32),
+          *STI);
+      OutStreamer->emitInstruction(
+          MCInstBuilder(AArch64::LDRBBroX)
+              .addReg(AArch64::W16)
+              .addReg(AArch64::X17)
+              .addReg(AArch64::X16)
+              .addImm(0)
+              .addImm(0),
+          *STI);
+    } else {
+      OutStreamer->emitInstruction(
+          MCInstBuilder(AArch64::LDRBBroX)
+              .addReg(AArch64::W16)
+              .addReg(IsShort ? AArch64::X20 : AArch64::X9)
+              .addReg(AArch64::X16)
+              .addImm(0)
+              .addImm(0),
+          *STI);
+    }
+
     OutStreamer->emitInstruction(
         MCInstBuilder(AArch64::SUBSXrs)
             .addReg(AArch64::XZR)
@@ -1765,6 +1795,7 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
 
   case AArch64::HWASAN_CHECK_MEMACCESS:
   case AArch64::HWASAN_CHECK_MEMACCESS_SHORTGRANULES:
+  case AArch64::HWASAN_CHECK_MEMACCESS_FIXEDSHADOW_SHORTGRANULES:
     LowerHWASAN_CHECK_MEMACCESS(*MI);
     return;
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index e1624f70185e1e..20f1a40263f824 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1818,6 +1818,13 @@ def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
   Sched<[]>;
 }
 
+let Defs = [ X16, X17, LR, NZCV ] in {
+def HWASAN_CHECK_MEMACCESS_FIXEDSHADOW_SHORTGRANULES : Pseudo<
+  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo, i64imm:$shadow_base),
+  [(int_hwasan_check_memaccess_fixedshadow_shortgranules GPR64noip:$ptr, (i32 timm:$accessinfo), (i64 timm:$shadow_base))]>,
+  Sched<[]>;
+}
+
 // The virtual cycle counter register is CNTVCT_EL0.
 def : Pat<(readcyclecounter), (MRS 0xdf02)>;
 
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 82f60f4feed454..41c492a0000f17 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -929,11 +929,29 @@ void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
 
   IRBuilder<> IRB(InsertBefore);
   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
-  IRB.CreateCall(Intrinsic::getDeclaration(
-                     M, UseShortGranules
-                            ? Intrinsic::hwasan_check_memaccess_shortgranules
-                            : Intrinsic::hwasan_check_memaccess),
-                 {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
+
+  // Aarch64 makes it difficult to embed large constants (such as the shadow
+  // offset) in the code. Our intrinsic supports a 16-bit constant (to be left
+  // shifted by 32 bits) - this is not an onerous constraint, since
+  // 1) kShadowBaseAlignment == 32 2) an offset of 4TB (1024 << 32) is
+  // representable, and ought to be good enough for anybody.
+  bool useFixedShadowIntrinsic = true;
+  if (TargetTriple.isAArch64() && ClMappingOffset.getNumOccurrences() > 0 && UseShortGranules) {
+    uint16_t offset_shifted = Mapping.Offset >> 32;
+    if ((uint64_t)offset_shifted << 32 != Mapping.Offset)
+        useFixedShadowIntrinsic = false;
+  } else
+    useFixedShadowIntrinsic = false;
+
+  if (useFixedShadowIntrinsic)
+    IRB.CreateCall(Intrinsic::getDeclaration(
+                       M, Intrinsic::hwasan_check_memaccess_fixedshadow_shortgranules),
+                   {Ptr, ConstantInt::get(Int32Ty, AccessInfo), ConstantInt::get(Int64Ty, Mapping.Offset)});
+  else
+    IRB.CreateCall(Intrinsic::getDeclaration(
+                       M, UseShortGranules ? Intrinsic::hwasan_check_memaccess_shortgranules
+                                           : Intrinsic::hwasan_check_memaccess),
+                   {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
 }
 
 void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,



More information about the llvm-commits mailing list