[llvm] 82b51a1 - [AArch64] Support SLC in ACLE prefetch intrinsics

Archibald Elliott via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 16 06:42:56 PST 2022


Author: Archibald Elliott
Date: 2022-12-16T14:42:27Z
New Revision: 82b51a14280414a53413ed62c001d2c589c649c3

URL: https://github.com/llvm/llvm-project/commit/82b51a14280414a53413ed62c001d2c589c649c3
DIFF: https://github.com/llvm/llvm-project/commit/82b51a14280414a53413ed62c001d2c589c649c3.diff

LOG: [AArch64] Support SLC in ACLE prefetch intrinsics

This change:
- Modifies the ACLE code to allow the new SLC value (3) for the prefetch
  target.

- Introduces a new intrinsic, @llvm.aarch64.prefetch which matches the
  PRFM family instructions much more closely, and can represent all
  values for the PRFM immediate.

  The target-independent @llvm.prefetch intrinsic does not have enough
  information for us to be able to lower to it from the ACLE intrinsics
  correctly.

- Lowers the acle calls to the new intrinsic on aarch64 (the ARM
  lowering is unchanged).

- Implements code generation for the new intrinsic in both SelectionDAG
  and GlobalISel. We specifically choose to continue to support lowering
  the target-independent @llvm.prefetch intrinsic so that other
  frontends can continue to use it.

Differential Revision: https://reviews.llvm.org/D139443

Added: 
    llvm/test/CodeGen/AArch64/arm64-prefetch-new.ll

Modified: 
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Sema/SemaChecking.cpp
    clang/test/CodeGen/arm_acle.c
    clang/test/CodeGen/builtins-arm64.c
    clang/test/Sema/builtins-arm64.c
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/IR/Verifier.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.h
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index bd96108d6dc00..55aa9f6acd0fe 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9749,29 +9749,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
   }
 
-  if (BuiltinID == clang::AArch64::BI__builtin_arm_prefetch) {
-    Value *Address         = EmitScalarExpr(E->getArg(0));
-    Value *RW              = EmitScalarExpr(E->getArg(1));
-    Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
-    Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
-    Value *IsData          = EmitScalarExpr(E->getArg(4));
-
-    Value *Locality = nullptr;
-    if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
-      // Temporal fetch, needs to convert cache level to locality.
-      Locality = llvm::ConstantInt::get(Int32Ty,
-        -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
-    } else {
-      // Streaming fetch.
-      Locality = llvm::ConstantInt::get(Int32Ty, 0);
-    }
-
-    // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
-    // PLDL3STRM or PLDL2STRM.
-    Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
-    return Builder.CreateCall(F, {Address, RW, Locality, IsData});
-  }
-
   if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
     assert((getContext().getTypeSize(E->getType()) == 32) &&
            "rbit of unusual size!");

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 9f0c8491ad666..86f1c3c42598a 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3226,9 +3226,9 @@ bool Sema::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
 
   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
     return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1) ||
-      SemaBuiltinConstantArgRange(TheCall, 2, 0, 2) ||
-      SemaBuiltinConstantArgRange(TheCall, 3, 0, 1) ||
-      SemaBuiltinConstantArgRange(TheCall, 4, 0, 1);
+           SemaBuiltinConstantArgRange(TheCall, 2, 0, 3) ||
+           SemaBuiltinConstantArgRange(TheCall, 3, 0, 1) ||
+           SemaBuiltinConstantArgRange(TheCall, 4, 0, 1);
   }
 
   if (BuiltinID == AArch64::BI__builtin_arm_rsr64 ||

diff  --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index 3697d297f89f1..d3ea9ded6583d 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -168,10 +168,15 @@ void test_swp(uint32_t x, volatile void *p) {
 
 /* 8.6 Memory prefetch intrinsics */
 /* 8.6.1 Data prefetch */
-// ARM-LABEL: @test_pld(
-// ARM-NEXT:  entry:
-// ARM-NEXT:    call void @llvm.prefetch.p0(ptr null, i32 0, i32 3, i32 1)
-// ARM-NEXT:    ret void
+// AArch32-LABEL: @test_pld(
+// AArch32-NEXT:  entry:
+// AArch32-NEXT:    call void @llvm.prefetch.p0(ptr null, i32 0, i32 3, i32 1)
+// AArch32-NEXT:    ret void
+//
+// AArch64-LABEL: @test_pld(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 0, i32 1)
+// AArch64-NEXT:    ret void
 //
 void test_pld() {
   __pld(0);
@@ -184,7 +189,7 @@ void test_pld() {
 //
 // AArch64-LABEL: @test_pldx(
 // AArch64-NEXT:  entry:
-// AArch64-NEXT:    call void @llvm.prefetch.p0(ptr null, i32 1, i32 1, i32 1)
+// AArch64-NEXT:    call void @llvm.aarch64.prefetch(ptr null, i32 1, i32 2, i32 0, i32 1)
 // AArch64-NEXT:    ret void
 //
 void test_pldx() {
@@ -192,10 +197,15 @@ void test_pldx() {
 }
 
 /* 8.6.2 Instruction prefetch */
-// ARM-LABEL: @test_pli(
-// ARM-NEXT:  entry:
-// ARM-NEXT:    call void @llvm.prefetch.p0(ptr null, i32 0, i32 3, i32 0)
-// ARM-NEXT:    ret void
+// AArch32-LABEL: @test_pli(
+// AArch32-NEXT:  entry:
+// AArch32-NEXT:    call void @llvm.prefetch.p0(ptr null, i32 0, i32 3, i32 0)
+// AArch32-NEXT:    ret void
+//
+// AArch64-LABEL: @test_pli(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:    call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 0, i32 0)
+// AArch64-NEXT:    ret void
 //
 void test_pli() {
   __pli(0);
@@ -208,7 +218,7 @@ void test_pli() {
 //
 // AArch64-LABEL: @test_plix(
 // AArch64-NEXT:  entry:
-// AArch64-NEXT:    call void @llvm.prefetch.p0(ptr null, i32 0, i32 1, i32 0)
+// AArch64-NEXT:    call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 2, i32 0, i32 0)
 // AArch64-NEXT:    ret void
 //
 void test_plix() {

diff  --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index 157bb73236a1e..05ea1c719edff 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -47,16 +47,19 @@ void barriers(void) {
 
 void prefetch(void) {
   __builtin_arm_prefetch(0, 1, 2, 0, 1); // pstl3keep
-  // CHECK: call {{.*}} @llvm.prefetch.p0(ptr null, i32 1, i32 1, i32 1)
+  // CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 1, i32 2, i32 0, i32 1)
 
   __builtin_arm_prefetch(0, 0, 0, 1, 1); // pldl1keep
-  // CHECK: call {{.*}} @llvm.prefetch.p0(ptr null, i32 0, i32 0, i32 1)
+  // CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 1, i32 1)
 
   __builtin_arm_prefetch(0, 0, 0, 1, 1); // pldl1strm
-  // CHECK: call {{.*}} @llvm.prefetch.p0(ptr null, i32 0, i32 0, i32 1)
+  // CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 1, i32 1)
 
   __builtin_arm_prefetch(0, 0, 0, 0, 0); // plil1keep
-  // CHECK: call {{.*}} @llvm.prefetch.p0(ptr null, i32 0, i32 3, i32 0)
+  // CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 0, i32 0)
+
+  __builtin_arm_prefetch(0, 0, 3, 0, 1); // pldslckeep
+  // CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 3, i32 0, i32 1)
 }
 
 __attribute__((target("v8.5a")))

diff  --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c
index ba4076027bbdc..e711121f7260f 100644
--- a/clang/test/Sema/builtins-arm64.c
+++ b/clang/test/Sema/builtins-arm64.c
@@ -25,7 +25,7 @@ void test_memory_barriers(void) {
 
 void test_prefetch(void) {
   __builtin_arm_prefetch(0, 2, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  __builtin_arm_prefetch(0, 0, 3, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  __builtin_arm_prefetch(0, 0, 4, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
   __builtin_arm_prefetch(0, 0, 0, 2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
   __builtin_arm_prefetch(0, 0, 0, 0, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 }

diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index c65446e8bafab..a6ecbb66a5d7b 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -65,6 +65,14 @@ def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
 def int_aarch64_break : Intrinsic<[], [llvm_i32_ty],
     [IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, ImmArg<ArgIndex<0>>]>;
 
+
+def int_aarch64_prefetch : Intrinsic<[],
+    [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+    [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly<ArgIndex<0>>,
+     ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>
+     ]>,
+    ClangBuiltin<"__builtin_arm_prefetch">;
+
 //===----------------------------------------------------------------------===//
 // Data Barrier Instructions
 

diff  --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 093b30d3b55ef..910d1db1ccfb1 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5754,6 +5754,17 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
           &Call);
     break;
   }
+  case Intrinsic::aarch64_prefetch: {
+    Check(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2,
+          "write argument to llvm.aarch64.prefetch must be 0 or 1", Call);
+    Check(cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 4,
+          "target argument to llvm.aarch64.prefetch must be 0-3", Call);
+    Check(cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue() < 2,
+          "stream argument to llvm.aarch64.prefetch must be 0 or 1", Call);
+    Check(cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue() < 2,
+          "isdata argument to llvm.aarch64.prefetch must be 0 or 1", Call);
+    break;
+  }
   };
 }
 

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f6501a4439d14..0289da50db366 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1189,9 +1189,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     }
   }
 
-  if (Subtarget->hasSME())
-    setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
-
   if (Subtarget->hasSVE()) {
     for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
       setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -1520,6 +1517,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
   }
 
+  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+
   PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
 
   IsStrictFPEnabled = true;
@@ -4724,6 +4723,44 @@ static std::optional<SMEAttrs> getCalleeAttrsFromExternalFunction(SDValue V) {
   return std::nullopt;
 }
 
+SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  unsigned IntNo = Op.getConstantOperandVal(1);
+  SDLoc DL(Op);
+  switch (IntNo) {
+  default:
+    return SDValue(); // Don't custom lower most intrinsics.
+  case Intrinsic::aarch64_prefetch: {
+    SDValue Chain = Op.getOperand(0);
+    SDValue Addr = Op.getOperand(2);
+
+    unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+    unsigned Locality = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+    unsigned IsStream = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+    unsigned IsData = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+    unsigned PrfOp = (IsWrite << 4) |    // Load/Store bit
+                     (!IsData << 3) |    // IsDataCache bit
+                     (Locality << 1) |   // Cache level bits
+                     (unsigned)IsStream; // Stream bit
+
+    return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Chain,
+                       DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr);
+  }
+  case Intrinsic::aarch64_sme_za_enable:
+    return DAG.getNode(
+        AArch64ISD::SMSTART, DL, MVT::Other,
+        Op->getOperand(0), // Chain
+        DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
+        DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+  case Intrinsic::aarch64_sme_za_disable:
+    return DAG.getNode(
+        AArch64ISD::SMSTOP, DL, MVT::Other,
+        Op->getOperand(0), // Chain
+        DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
+        DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
+  }
+}
+
 SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
                                                       SelectionDAG &DAG) const {
   unsigned IntNo = Op.getConstantOperandVal(1);
@@ -4754,18 +4791,6 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
     // changed.
     return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL);
   }
-  case Intrinsic::aarch64_sme_za_enable:
-    return DAG.getNode(
-        AArch64ISD::SMSTART, DL, MVT::Other,
-        Op->getOperand(0), // Chain
-        DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
-        DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
-  case Intrinsic::aarch64_sme_za_disable:
-    return DAG.getNode(
-        AArch64ISD::SMSTOP, DL, MVT::Other,
-        Op->getOperand(0), // Chain
-        DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
-        DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
   }
 }
 
@@ -5872,11 +5897,12 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED);
   case ISD::MULHU:
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED);
-  case ISD::INTRINSIC_VOID:
   case ISD::INTRINSIC_W_CHAIN:
     return LowerINTRINSIC_W_CHAIN(Op, DAG);
   case ISD::INTRINSIC_WO_CHAIN:
     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::INTRINSIC_VOID:
+    return LowerINTRINSIC_VOID(Op, DAG);
   case ISD::ATOMIC_STORE:
     if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
       assert(Subtarget->hasLSE2());

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 49d43d5bce707..2869604815a78 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -960,6 +960,7 @@ class AArch64TargetLowering : public TargetLowering {
 
   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
 
   bool
   isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 48d6553f59bf1..8a760a6cb5655 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1070,6 +1070,24 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
     MI.eraseFromParent();
     return true;
   }
+  case Intrinsic::aarch64_prefetch: {
+    MachineIRBuilder MIB(MI);
+    auto &AddrVal = MI.getOperand(1);
+
+    int64_t IsWrite = MI.getOperand(2).getImm();
+    int64_t Target = MI.getOperand(3).getImm();
+    int64_t IsStream = MI.getOperand(4).getImm();
+    int64_t IsData = MI.getOperand(5).getImm();
+
+    unsigned PrfOp = (IsWrite << 4) |    // Load/Store bit
+                     (!IsData << 3) |    // IsDataCache bit
+                     (Target << 1) |     // Cache level bits
+                     (unsigned)IsStream; // Stream bit
+
+    MIB.buildInstr(AArch64::G_PREFETCH).addImm(PrfOp).add(AddrVal);
+    MI.eraseFromParent();
+    return true;
+  }
   }
 
   return true;

diff  --git a/llvm/test/CodeGen/AArch64/arm64-prefetch-new.ll b/llvm/test/CodeGen/AArch64/arm64-prefetch-new.ll
new file mode 100644
index 0000000000000..da7d74afe56be
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-prefetch-new.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=0 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+v8.9a --global-isel=1 --global-isel-abort=1 < %s | FileCheck %s
+
+ at a = internal global ptr null, align 8
+ at b = external global ptr, align 8
+
+define void @test(ptr %i, i32 %j) nounwind ssp {
+entry:
+  ; CHECK-LABEL: @test
+  %j.addr = alloca i32, align 4
+  store i32 %j, ptr %j.addr, align 4, !tbaa !0
+  %tmp = bitcast ptr %j.addr to ptr
+
+  %i.next = getelementptr i8, ptr %i, i64 2
+
+  ; Verify prefetching works for all the 
diff erent kinds of pointers we might
+  ; want to prefetch.
+
+  ; CHECK: prfm pldl1keep,
+  call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 0, i32 1)
+
+  ; CHECK: prfum pldl1keep,
+  call void @llvm.aarch64.prefetch(ptr %tmp, i32 0, i32 0, i32 0, i32 1)
+
+  ; CHECK: prfm pldl1keep,
+  call void @llvm.aarch64.prefetch(ptr %i, i32 0, i32 0, i32 0, i32 1)
+
+  ; CHECK: prfum pldl1keep,
+  call void @llvm.aarch64.prefetch(ptr %i.next, i32 0, i32 0, i32 0, i32 1)
+
+  ; CHECK: prfm pldl1keep,
+  call void @llvm.aarch64.prefetch(ptr @a, i32 0, i32 0, i32 0, i32 1)
+
+  ; CHECK: prfm pldl1keep,
+  call void @llvm.aarch64.prefetch(ptr @b, i32 0, i32 0, i32 0, i32 1)
+
+  ; Verify that we can generate every single valid prefetch value.
+
+  ; CHECK: prfm pstl1keep,
+  call void @llvm.aarch64.prefetch(ptr null, i32 1, i32 0, i32 0, i32 1)
+
+  ; CHECK: prfm pldl2keep,
+  call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 1, i32 0, i32 1)
+
+  ; CHECK: prfm pldl3keep,
+  call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 2, i32 0, i32 1)
+
+  ; CHECK: prfm pldslckeep,
+  call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 3, i32 0, i32 1)
+
+  ; CHECK: prfm pldl1strm,
+  call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 1, i32 1)
+
+  ; CHECK: prfm plil1keep,
+  call void @llvm.aarch64.prefetch(ptr null, i32 0, i32 0, i32 0, i32 0)
+
+  ret void
+}
+
+declare void @llvm.aarch64.prefetch(ptr readonly, i32 immarg, i32 immarg, i32 immarg, i32 immarg) #0
+
+attributes #0 = { inaccessiblemem_or_argmemonly nounwind willreturn }
+
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"any pointer", !1}


        


More information about the llvm-commits mailing list