[llvm] r252573 - AArch64: add experimental support for address tagging.

Tim Northover via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 9 16:44:23 PST 2015


Author: tnorthover
Date: Mon Nov  9 18:44:23 2015
New Revision: 252573

URL: http://llvm.org/viewvc/llvm-project?rev=252573&view=rev
Log:
AArch64: add experimental support for address tagging.

AArch64 has the ability to use the top 8-bits of an "address" for extra
information, with the memory subsystem automatically masking them off for loads
and stores. When that's happening, we can sometimes skip masks on memory
operations in the compiler.

However, this requires the host OS and support stack to preserve those bits so
it can't be enabled everywhere. In principle iOS 8.0 and above do take the
required precautions and but we'll put it under a flag for now.

Added:
    llvm/trunk/test/CodeGen/AArch64/tbi.ll
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp
    llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=252573&r1=252572&r2=252573&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Mon Nov  9 18:44:23 2015
@@ -489,6 +489,8 @@ AArch64TargetLowering::AArch64TargetLowe
   setTargetDAGCombine(ISD::BITCAST);
   setTargetDAGCombine(ISD::CONCAT_VECTORS);
   setTargetDAGCombine(ISD::STORE);
+  if (Subtarget->supportsAddressTopByteIgnored())
+    setTargetDAGCombine(ISD::LOAD);
 
   setTargetDAGCombine(ISD::MUL);
 
@@ -8555,10 +8557,9 @@ static SDValue replaceSplatVectorStore(S
   return NewST1;
 }
 
-static SDValue performSTORECombine(SDNode *N,
-                                   TargetLowering::DAGCombinerInfo &DCI,
-                                   SelectionDAG &DAG,
-                                   const AArch64Subtarget *Subtarget) {
+static SDValue split16BStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+                              SelectionDAG &DAG,
+                              const AArch64Subtarget *Subtarget) {
   if (!DCI.isBeforeLegalize())
     return SDValue();
 
@@ -8720,7 +8721,39 @@ static SDValue performPostLD1Combine(SDN
   return SDValue();
 }
 
-/// This function handles the log2-shuffle pattern produced by the
+/// Simplify \Addr given that the top byte of it is ignored by HW during
+/// address translation.
+static bool performTBISimplification(SDValue Addr,
+                                     TargetLowering::DAGCombinerInfo &DCI,
+                                     SelectionDAG &DAG) {
+  APInt DemandedMask = APInt::getLowBitsSet(64, 56);
+  APInt KnownZero, KnownOne;
+  TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
+                                        DCI.isBeforeLegalizeOps());
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (TLI.SimplifyDemandedBits(Addr, DemandedMask, KnownZero, KnownOne, TLO)) {
+    DCI.CommitTargetLoweringOpt(TLO);
+    return true;
+  }
+  return false;
+}
+
+static SDValue performSTORECombine(SDNode *N,
+                                   TargetLowering::DAGCombinerInfo &DCI,
+                                   SelectionDAG &DAG,
+                                   const AArch64Subtarget *Subtarget) {
+  SDValue Split = split16BStores(N, DCI, DAG, Subtarget);
+  if (Split.getNode())
+    return Split;
+
+  if (Subtarget->supportsAddressTopByteIgnored() &&
+      performTBISimplification(N->getOperand(2), DCI, DAG))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+  /// This function handles the log2-shuffle pattern produced by the
 /// LoopVectorizer for the across vector reduction. It consists of
 /// log2(NumVectorElements) steps and, in each step, 2^(s) elements
 /// are reduced, where s is an induction variable from 0 to
@@ -9575,6 +9608,10 @@ SDValue AArch64TargetLowering::PerformDA
   }
   case ISD::VSELECT:
     return performVSelectCombine(N, DCI.DAG);
+  case ISD::LOAD:
+    if (performTBISimplification(N->getOperand(1), DCI, DAG))
+      return SDValue(N, 0);
+    break;
   case ISD::STORE:
     return performSTORECombine(N, DCI, DAG, Subtarget);
   case AArch64ISD::BRCOND:

Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp?rev=252573&r1=252572&r2=252573&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp Mon Nov  9 18:44:23 2015
@@ -31,6 +31,11 @@ static cl::opt<bool>
 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
                      "converter pass"), cl::init(true), cl::Hidden);
 
+// If OS supports TBI, use this flag to enable it.
+static cl::opt<bool>
+UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
+                         "an address is ignored"), cl::init(false), cl::Hidden);
+
 AArch64Subtarget &
 AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) {
   // Determine default and user-specified characteristics
@@ -125,6 +130,19 @@ bool AArch64Subtarget::enableEarlyIfConv
   return EnableEarlyIfConvert;
 }
 
+bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
+  if (!UseAddressTopByteIgnored)
+    return false;
+
+  if (TargetTriple.isiOS()) {
+    unsigned Major, Minor, Micro;
+    TargetTriple.getiOSVersion(Major, Minor, Micro);
+    return Major >= 8;
+  }
+
+  return false;
+}
+
 std::unique_ptr<PBQPRAConstraint>
 AArch64Subtarget::getCustomPBQPConstraints() const {
   if (!isCortexA57())

Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h?rev=252573&r1=252572&r2=252573&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h Mon Nov  9 18:44:23 2015
@@ -115,6 +115,10 @@ public:
   bool hasNEON() const { return HasNEON; }
   bool hasCrypto() const { return HasCrypto; }
   bool hasCRC() const { return HasCRC; }
+  /// CPU has TBI (top byte of addresses is ignored during HW address
+  /// translation) and OS enables it.
+  bool supportsAddressTopByteIgnored() const;
+
   bool hasPerfMon() const { return HasPerfMon; }
 
   bool isLittleEndian() const { return IsLittle; }

Added: llvm/trunk/test/CodeGen/AArch64/tbi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/tbi.ll?rev=252573&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/tbi.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/tbi.ll Mon Nov  9 18:44:23 2015
@@ -0,0 +1,102 @@
+; RUN: llc -aarch64-use-tbi -mtriple=arm64-apple-ios8.0.0 < %s \
+; RUN:     | FileCheck --check-prefix=TBI    --check-prefix=BOTH %s
+; RUN: llc -aarch64-use-tbi -mtriple=arm64-apple-ios7.1.0 < %s \
+; RUN:     | FileCheck --check-prefix=NO_TBI --check-prefix=BOTH %s
+
+; BOTH-LABEL:ld_and32:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_and32(i64 %p) {
+  %and = and i64 %p, 72057594037927935
+  %cast = inttoptr i64 %and to i32*
+  %load = load i32, i32* %cast
+  ret i32 %load
+}
+
+; load (r & MASK) + 4
+; BOTH-LABEL:ld_and_plus_offset:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_and_plus_offset(i64 %p) {
+  %and = and i64 %p, 72057594037927935
+  %cast = inttoptr i64 %and to i32*
+  %gep = getelementptr i32, i32* %cast, i64 4
+  %load = load i32, i32* %gep
+  ret i32 %load
+}
+
+; load (r & WIDER_MASK)
+; BOTH-LABEL:ld_and32_wider:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_and32_wider(i64 %p) {
+  %and = and i64 %p, 1152921504606846975
+  %cast = inttoptr i64 %and to i32*
+  %load = load i32, i32* %cast
+  ret i32 %load
+}
+
+; BOTH-LABEL:ld_and64:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i64 @ld_and64(i64 %p) {
+  %and = and i64 %p, 72057594037927935
+  %cast = inttoptr i64 %and to i64*
+  %load = load i64, i64* %cast
+  ret i64 %load
+}
+
+; BOTH-LABEL:st_and32:
+; TBI-NOT: and x
+; NO_TBI: and x
+define void @st_and32(i64 %p, i32 %v) {
+  %and = and i64 %p, 72057594037927935
+  %cast = inttoptr i64 %and to i32*
+  store i32 %v, i32* %cast
+  ret void
+}
+
+; load (x1 + x2) & MASK
+; BOTH-LABEL:ld_ro:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_ro(i64 %a, i64 %b) {
+  %p = add i64 %a, %b
+  %and = and i64 %p, 72057594037927935
+  %cast = inttoptr i64 %and to i32*
+  %load = load i32, i32* %cast
+  ret i32 %load
+}
+
+; load (r1 & MASK) + r2
+; BOTH-LABEL:ld_ro2:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_ro2(i64 %a, i64 %b) {
+  %and = and i64 %a, 72057594037927935
+  %p = add i64 %and, %b
+  %cast = inttoptr i64 %p to i32*
+  %load = load i32, i32* %cast
+  ret i32 %load
+}
+
+; load (r1 & MASK) | r2
+; BOTH-LABEL:ld_indirect_and:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_indirect_and(i64 %r1, i64 %r2) {
+  %and = and i64 %r1, 72057594037927935
+  %p = or i64 %and, %r2
+  %cast = inttoptr i64 %p to i32*
+  %load = load i32, i32* %cast
+  ret i32 %load
+}
+
+; BOTH-LABEL:ld_and32_narrower:
+; BOTH: and x
+define i32 @ld_and32_narrower(i64 %p) {
+  %and = and i64 %p, 36028797018963967
+  %cast = inttoptr i64 %and to i32*
+  %load = load i32, i32* %cast
+  ret i32 %load
+}




More information about the llvm-commits mailing list