[flang-commits] [flang] 212527c - [Flang] Add FIR and LLVM lowering support for prefetch directive (#167272)
via flang-commits
flang-commits at lists.llvm.org
Sun Jan 4 23:54:15 PST 2026
Author: Thirumalai Shaktivel
Date: 2026-01-05T13:24:10+05:30
New Revision: 212527c00ba60aa5677a1b1acdd0f15b32b8fd01
URL: https://github.com/llvm/llvm-project/commit/212527c00ba60aa5677a1b1acdd0f15b32b8fd01
DIFF: https://github.com/llvm/llvm-project/commit/212527c00ba60aa5677a1b1acdd0f15b32b8fd01.diff
LOG: [Flang] Add FIR and LLVM lowering support for prefetch directive (#167272)
Implementation details:
* Add PrefetchOp in FirOps
* Handle PrefetchOp in FIR Lowering and also pass required default
values
* Handle PrefetchOp in CodeGen.cpp
* Add required tests
Added:
flang/test/Fir/prefetch.fir
flang/test/Integration/prefetch.f90
flang/test/Lower/HLFIR/prefetch.f90
Modified:
flang/include/flang/Optimizer/Dialect/FIROps.td
flang/lib/Lower/Bridge.cpp
flang/lib/Optimizer/CodeGen/CodeGen.cpp
flang/lib/Optimizer/Dialect/FIROps.cpp
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 126de599336a9..610084d5fdbfb 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -363,6 +363,43 @@ def fir_StoreOp : fir_Op<"store", [
}];
}
+// NOTE on memory effects: The allocation effect (MemAlloc) is to ensure
+// this operation is not removed by dead code elimination as a dead read
+// on the argument: the prefetch happens as a side effect.
+def fir_PrefetchOp : fir_Op<"prefetch", [MemoryEffects<[MemAlloc<DefaultResource>]>]> {
+ let summary = "prefetch a memory reference";
+
+ let description = [{
+ The prefetch is a hint to the code generator that the memory reference will
+ be used in the near future. The prefetch is not guaranteed to be executed.
+
+ ```
+ %a = ... -> !fir.ref<i32>
+ fir.prefetch %a {read, data, localityHint = 3 : i32} : !fir.ref<i32>
+ // ...
+ fir.load %a : !fir.ref<i32> // use the prefetched value
+ ```
+ Here,
+ `memref' : address to be prefetched
+ `rw' : rw specifier >
+ represented as read (default) or write with values
+ 0 and 1, respectively
+ `localityHint': temporal locality specifier >
+ value ranging from 0 - no locality to 3 - extremely local
+ `cacheType' : cache type specifier >
+ represented as instruction or data (default) with values
+ 0 and 1, respectively
+ }];
+
+ /// NOTE: The numerical values used here is in reference to the LLVM LangRef
+ let arguments =
+ (ins AnyReferenceLike : $memref, UnitAttr : $rw,
+ ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<3>]> : $localityHint,
+ UnitAttr : $cacheType);
+
+ let hasCustomAssemblyFormat = 1;
+}
+
def fir_CopyOp : fir_Op<"copy", [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
let summary = "copy constant size memory";
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 9e3ad5ef9261f..d4c066da44835 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -3475,7 +3475,24 @@ class FirConverter : public Fortran::lower::AbstractConverter {
attachInliningDirectiveToStmt(dir, &eval);
},
[&](const Fortran::parser::CompilerDirective::Prefetch &prefetch) {
- TODO(getCurrentLocation(), "!$dir prefetch");
+ for (const auto &p : prefetch.v) {
+ Fortran::evaluate::ExpressionAnalyzer ea{
+ bridge.getSemanticsContext()};
+ Fortran::lower::SomeExpr expr{*ea.Analyze(
+ std::get<Fortran::parser::DataRef>(p.value().u))};
+ Fortran::lower::StatementContext stmtCtx;
+ mlir::Location loc = genLocation(dir.source);
+ hlfir::Entity var = Fortran::lower::convertExprToHLFIR(
+ loc, *this, expr, localSymbols, stmtCtx);
+ mlir::Value memRef =
+ hlfir::genVariableRawAddress(loc, *builder, var);
+
+ // TODO: Don't use default value, instead get the following
+ // info from the directive
+ uint32_t isRead{0}, localityHint{3}, isData{1};
+ fir::PrefetchOp::create(*builder, loc, memRef, isRead,
+ localityHint, isData);
+ }
},
[&](const Fortran::parser::CompilerDirective::IVDep &) {
attachDirectiveToLoop(dir, &eval);
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 6dff368b68254..cf046a0bfcea8 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3359,6 +3359,26 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
}
};
+/// `fir.prefetch` --> `llvm.prefetch`
+struct PrefetchOpConversion : public fir::FIROpConversion<fir::PrefetchOp> {
+ using FIROpConversion::FIROpConversion;
+
+ llvm::LogicalResult
+ matchAndRewrite(fir::PrefetchOp prefetch, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const override {
+ mlir::IntegerAttr rw = mlir::IntegerAttr::get(rewriter.getI32Type(),
+ prefetch.getRwAttr() ? 1 : 0);
+ mlir::IntegerAttr localityHint = prefetch.getLocalityHintAttr();
+ mlir::IntegerAttr cacheType = mlir::IntegerAttr::get(
+ rewriter.getI32Type(), prefetch.getCacheTypeAttr() ? 1 : 0);
+ mlir::LLVM::Prefetch::create(rewriter, prefetch.getLoc(),
+ adaptor.getOperands().front(), rw,
+ localityHint, cacheType);
+ rewriter.eraseOp(prefetch);
+ return mlir::success();
+ }
+};
+
/// `fir.load` --> `llvm.load`
struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
using FIROpConversion::FIROpConversion;
@@ -4457,15 +4477,15 @@ void fir::populateFIRToLLVMConversionPatterns(
FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion,
GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion,
LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion,
- NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion,
- SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion,
- ShapeOpConversion, ShapeShiftOpConversion, ShiftOpConversion,
- SliceOpConversion, StoreOpConversion, StringLitOpConversion,
- SubcOpConversion, TypeDescOpConversion, TypeInfoOpConversion,
- UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion,
- UnreachableOpConversion, UseStmtOpConversion, XArrayCoorOpConversion,
- XEmboxOpConversion, XReboxOpConversion, ZeroOpConversion>(converter,
- options);
+ NegcOpConversion, NoReassocOpConversion, PrefetchOpConversion,
+ SelectCaseOpConversion, SelectOpConversion, SelectRankOpConversion,
+ SelectTypeOpConversion, ShapeOpConversion, ShapeShiftOpConversion,
+ ShiftOpConversion, SliceOpConversion, StoreOpConversion,
+ StringLitOpConversion, SubcOpConversion, TypeDescOpConversion,
+ TypeInfoOpConversion, UnboxCharOpConversion, UnboxProcOpConversion,
+ UndefOpConversion, UnreachableOpConversion, UseStmtOpConversion,
+ XArrayCoorOpConversion, XEmboxOpConversion, XReboxOpConversion,
+ ZeroOpConversion>(converter, options);
// Patterns that are populated without a type converter do not trigger
// target materializations for the operands of the root op.
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index c2a3d52fe88d2..90c960843787e 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -4401,6 +4401,84 @@ void fir::StoreOp::getEffects(
addVolatileMemoryEffects({getMemref().getType()}, effects);
}
+//===----------------------------------------------------------------------===//
+// PrefetchOp
+//===----------------------------------------------------------------------===//
+
+mlir::ParseResult fir::PrefetchOp::parse(mlir::OpAsmParser &parser,
+ mlir::OperationState &result) {
+ mlir::OpAsmParser::UnresolvedOperand memref;
+ if (parser.parseOperand(memref))
+ return mlir::failure();
+
+ if (mlir::succeeded(parser.parseLBrace())) {
+ llvm::StringRef kw;
+ if (parser.parseKeyword(&kw))
+ return mlir::failure();
+
+ if (kw == "read")
+ result.addAttribute("rw", parser.getBuilder().getBoolAttr(false));
+ else if (kw == "write")
+ result.addAttribute("rw", parser.getBuilder().getUnitAttr());
+ else
+ return parser.emitError(parser.getCurrentLocation(),
+ "Expected either read or write keyword");
+
+ if (parser.parseComma())
+ return mlir::failure();
+
+ if (parser.parseKeyword(&kw))
+ return mlir::failure();
+ if (kw == "instruction") {
+ result.addAttribute("cacheType", parser.getBuilder().getBoolAttr(false));
+ } else if (kw == "data") {
+ result.addAttribute("cacheType", parser.getBuilder().getUnitAttr());
+ } else
+ return parser.emitError(parser.getCurrentLocation(),
+ "Expected either intruction or data keyword");
+
+ if (parser.parseComma())
+ return mlir::failure();
+
+ if (mlir::succeeded(parser.parseKeyword("localityHint"))) {
+ if (parser.parseEqual())
+ return mlir::failure();
+ mlir::Attribute intAttr;
+ if (parser.parseAttribute(intAttr))
+ return mlir::failure();
+ result.addAttribute("localityHint", intAttr);
+ }
+ if (parser.parseRBrace())
+ return mlir::failure();
+ }
+ mlir::Type type;
+ if (parser.parseColonType(type))
+ return mlir::failure();
+
+ if (parser.resolveOperand(memref, type, result.operands))
+ return mlir::failure();
+ return mlir::success();
+}
+
+void fir::PrefetchOp::print(mlir::OpAsmPrinter &p) {
+ p << " ";
+ p.printOperand(getMemref());
+ p << " {";
+ if (getRw())
+ p << "write";
+ else
+ p << "read";
+ p << ", ";
+ if (getCacheType())
+ p << "data";
+ else
+ p << "instruction";
+ p << ", localityHint = ";
+ p << getLocalityHint();
+ p << " : " << getLocalityHintAttr().getType();
+ p << "} : " << getMemref().getType();
+}
+
//===----------------------------------------------------------------------===//
// CopyOp
//===----------------------------------------------------------------------===//
diff --git a/flang/test/Fir/prefetch.fir b/flang/test/Fir/prefetch.fir
new file mode 100644
index 0000000000000..613226c00abc6
--- /dev/null
+++ b/flang/test/Fir/prefetch.fir
@@ -0,0 +1,25 @@
+// Test lowering of prefetch directive from FIR to LLVM IR
+// RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck %s
+
+func.func @_QPtest(%arg0: !fir.ref<i32>, %arg1: !fir.box<!fir.array<?xf32>>, %arg2: !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,4>>>>>) {
+ // CHECK: call void @llvm.prefetch.p0(ptr %{{.*}}, i32 0, i32 3, i32 1)
+ fir.prefetch %arg0#0 {read, data, localityHint = 3 : i32} : !fir.ref<i32>
+ %c2 = arith.constant 2 : index
+ %4 = hlfir.designate %arg1#0 (%c2) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+
+ // CHECK: call void @llvm.prefetch.p0(ptr %{{.*}}, i32 0, i32 3, i32 1)
+ fir.prefetch %4 {read, data, localityHint = 3 : i32} : !fir.ref<f32>
+ %5 = fir.load %arg2#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,4>>>>>
+ %6 = fir.box_addr %5 : (!fir.box<!fir.heap<!fir.array<?x!fir.char<1,4>>>>) -> !fir.heap<!fir.array<?x!fir.char<1,4>>>
+
+ // CHECK: call void @llvm.prefetch.p0(ptr %{{.*}}, i32 0, i32 3, i32 1)
+ fir.prefetch %6 {read, data, localityHint = 3 : i32} : !fir.heap<!fir.array<?x!fir.char<1,4>>>
+ %7 = fir.load %arg2#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,4>>>>>
+ %c3 = arith.constant 3 : index
+ %c4 = arith.constant 4 : index
+ %8 = hlfir.designate %7 (%c3) typeparams %c4 : (!fir.box<!fir.heap<!fir.array<?x!fir.char<1,4>>>>, index, index) -> !fir.ref<!fir.char<1,4>>
+
+ // CHECK: call void @llvm.prefetch.p0(ptr %{{.*}}, i32 0, i32 3, i32 1)
+ fir.prefetch %8 {read, data, localityHint = 3 : i32} : !fir.ref<!fir.char<1,4>>
+ return
+}
diff --git a/flang/test/Integration/prefetch.f90 b/flang/test/Integration/prefetch.f90
new file mode 100644
index 0000000000000..f3fb7a950e328
--- /dev/null
+++ b/flang/test/Integration/prefetch.f90
@@ -0,0 +1,39 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+! RUN: %flang_fc1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=LLVM
+
+!===============================================================================
+! Test lowering of prefetch directive
+!===============================================================================
+
+subroutine test_prefetch_01()
+ ! LLVM: {{.*}} = alloca i32, i64 1, align 4
+ ! LLVM: %[[VAR_J:.*]] = alloca i32, i64 1, align 4
+ ! LLVM: %[[VAR_I:.*]] = alloca i32, i64 1, align 4
+ ! LLVM: %[[VAR_A:.*]] = alloca [256 x i32], i64 1, align 4
+
+ integer :: i, j
+ integer :: a(256)
+
+ a = 23
+ ! LLVM: call void @llvm.prefetch.p0(ptr %[[VAR_A]], i32 0, i32 3, i32 1)
+ !dir$ prefetch a
+ i = sum(a)
+
+ ! LLVM: %[[LOAD_I:.*]] = load i32, ptr %[[VAR_I]], align 4
+ ! LLVM: %{{.*}} = add nsw i32 %[[LOAD_I]], 64
+ ! LLVM: %[[GEP_A:.*]] = getelementptr i32, ptr %[[VAR_A]], i64 {{.*}}
+
+ ! LLVM: call void @llvm.prefetch.p0(ptr %[[GEP_A]], i32 0, i32 3, i32 1)
+ ! LLVM: call void @llvm.prefetch.p0(ptr %[[VAR_J]], i32 0, i32 3, i32 1)
+ do i = 1, (256 - 64)
+ !dir$ prefetch a(i+64), j
+ a(i) = a(i-32) + a(i+32) + j
+ end do
+end subroutine test_prefetch_01
diff --git a/flang/test/Lower/HLFIR/prefetch.f90 b/flang/test/Lower/HLFIR/prefetch.f90
new file mode 100644
index 0000000000000..b51babc522ff1
--- /dev/null
+++ b/flang/test/Lower/HLFIR/prefetch.f90
@@ -0,0 +1,73 @@
+! Test lowering of prefetch directive
+! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s --check-prefixes=HLFIR
+
+module test_prefetch_mod
+ implicit none
+ type :: t
+ integer :: a(256, 256)
+ end type t
+end module test_prefetch_mod
+
+subroutine test_prefetch_01()
+ ! HLFIR: %[[H_A:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ea"} : (!fir.ref<!fir.array<256xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<256xi32>>, !fir.ref<!fir.array<256xi32>>)
+ ! HLFIR: %[[H_I:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ ! HLFIR: %[[H_J:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+ integer :: i, j
+ integer :: a(256)
+
+ a = 23
+
+ ! HLFIR: fir.prefetch %[[H_A]]#0 {read, data, localityHint = 3 : i32} : !fir.ref<!fir.array<256xi32>>
+ !dir$ prefetch a
+ i = sum(a)
+
+ ! HLFIR: %[[H_LOAD:.*]] = fir.load %[[H_I]]#0 : !fir.ref<i32>
+ ! HLFIR: %[[H_C64:.*]] = arith.constant 64 : i32
+ ! HLFIR: %[[H_ADD:.*]] = arith.addi %[[H_LOAD]], %[[H_C64]] overflow<nsw> : i32
+ ! HLFIR: %[[H_CON:.*]] = fir.convert %[[H_ADD]] : (i32) -> i64
+ ! HLFIR: %[[H_DESIG:.*]] = hlfir.designate %[[H_A]]#0 (%[[H_CON]]) : (!fir.ref<!fir.array<256xi32>>, i64) -> !fir.ref<i32>
+
+ ! HLFIR: fir.prefetch %[[H_DESIG]] {read, data, localityHint = 3 : i32} : !fir.ref<i32>
+ ! HLFIR: fir.prefetch %[[H_J]]#0 {read, data, localityHint = 3 : i32} : !fir.ref<i32>
+
+ do i = 1, (256 - 64)
+ !dir$ prefetch a(i+64), j
+ a(i) = a(i-32) + a(i+32) + j
+ end do
+end subroutine test_prefetch_01
+
+subroutine test_prefetch_02(t1)
+ use test_prefetch_mod
+ ! HLFIR: %[[H_A:.*]]:2 = hlfir.declare {{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_prefetch_02Ea"}
+ ! HLFIR: %[[H_ARG0:.*]]:2 = hlfir.declare {{.*}} dummy_scope {{.*}} {fortran_attrs = #fir.var_attrs<intent_inout>, uniq_name = "_QFtest_prefetch_02Et1"}
+ type(t), intent(inout) :: t1
+ integer, allocatable :: a(:, :)
+
+ ! HLFIR: %[[H_DESIG_01:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} shape {{.*}}
+ ! HLFIR: fir.prefetch %[[H_DESIG_01]] {read, data, localityHint = 3 : i32} : !fir.ref<!fir.array<256x256xi32>>
+ !dir$ prefetch t1%a
+ a = t1%a ** 2
+
+ do i = 1, 256
+ ! HLFIR: %[[A_LOAD:.*]] = fir.load %[[H_A]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+ ! HLFIR: %[[A_BOX:.*]] = fir.box_addr %[[A_LOAD]] : (!fir.box<!fir.heap<!fir.array<?x?xi32>>>) -> !fir.heap<!fir.array<?x?xi32>>
+ ! HLFIR: fir.prefetch %[[A_BOX]] {read, data, localityHint = 3 : i32} : !fir.heap<!fir.array<?x?xi32>>
+ !dir$ prefetch a
+ a(i, :) = a(i, :) + i
+ do j = 1, 256
+ ! HLFIR: %[[H_DESIG_02:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} {{.*}}
+ ! HLFIR: fir.prefetch %[[H_DESIG_02]] {read, data, localityHint = 3 : i32} : !fir.ref<i32>
+ !dir$ prefetch t1%a(i, j)
+ t1%a(i, j) = (a(i, j) + i*j) / t1%a(i, j)
+ end do
+ end do
+end subroutine test_prefetch_02
+
+subroutine test_prefetch_03(a)
+ integer :: a(:)
+ ! HLFIR: %[[BOX:.*]] = fir.box_addr {{.*}} : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+ ! HLFIR: fir.prefetch %[[BOX]] {read, data, localityHint = 3 : i32} : !fir.ref<!fir.array<?xi32>>
+ !dir$ prefetch a
+ a = sum(a)
+end subroutine test_prefetch_03
More information about the flang-commits
mailing list