[flang-commits] [flang] [Flang] Add FIR and LLVM lowering support for prefetch directive (PR #167272)

Thirumalai Shaktivel via flang-commits flang-commits at lists.llvm.org
Mon Dec 8 22:14:41 PST 2025


https://github.com/Thirumalai-Shaktivel updated https://github.com/llvm/llvm-project/pull/167272

>From 79adc2b620b6cbecff043a581412eeab3ca4a255 Mon Sep 17 00:00:00 2001
From: Thirumalai-Shaktivel <thirumalaishaktivel at gmail.com>
Date: Mon, 10 Nov 2025 06:11:37 +0000
Subject: [PATCH 1/9] [Flang] Add FIR and LLVM lowering support for prefetch
 directive

* Add PrefetchOp in FirOps
* Handle PrefetchOp in FIR Lowering and also pass required
default values
* Handle PrefetchOp in CodeGen.cpp
* Add required tests
---
 .../include/flang/Optimizer/Dialect/FIROps.td | 30 +++++++++
 flang/lib/Lower/Bridge.cpp                    | 19 +++++-
 flang/lib/Optimizer/CodeGen/CodeGen.cpp       | 36 ++++++++---
 flang/test/Integration/prefetch.f90           | 39 ++++++++++++
 flang/test/Lower/HLFIR/prefetch.f90           | 63 +++++++++++++++++++
 5 files changed, 178 insertions(+), 9 deletions(-)
 create mode 100644 flang/test/Integration/prefetch.f90
 create mode 100644 flang/test/Lower/HLFIR/prefetch.f90

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index bae52d63fda45..84d7ed29292ae 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -351,6 +351,36 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface,
   }];
 }
 
+def fir_PrefetchOp : fir_Op<"prefetch", []> {
+  let summary = "prefetch a memory reference";
+
+  let description = [{
+    The prefetch is a hint to the code generator that the memory reference will
+    be used in the near future. The prefetch is not guaranteed to be executed.
+
+    ```
+      %a = ... -> !fir.ref<i32>
+      fir.prefetch %a {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
+      // ...
+      fir.load %a : !fir.ref<i32> // use the prefetched value
+    ```
+  }];
+
+  /// `memref' is the address to be prefetched
+  /// `rw'          : rw specifier >
+  ///                  read is 0, write is 1
+  /// `localityHint': temporal locality specifier >
+  ///                  value ranging from 0 - no locality to 3 - extremely local
+  /// `cacheType'   : cache type specifier >
+  ///                  instruction cache is 0, data cache is 1
+  let arguments = (ins AnyReferenceLike:$memref,
+      ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<1>]>:$rw,
+      ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<3>]>:$localityHint,
+      ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<1>]>:$cacheType);
+
+  let assemblyFormat = "$memref attr-dict `:` type(operands)";
+}
+
 def fir_CopyOp : fir_Op<"copy", [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
   let summary = "copy constant size memory";
 
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 5779bcd5d293c..9d8e765e8adea 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -3276,7 +3276,24 @@ class FirConverter : public Fortran::lower::AbstractConverter {
               attachInliningDirectiveToStmt(dir, &eval);
             },
             [&](const Fortran::parser::CompilerDirective::Prefetch &prefetch) {
-              TODO(getCurrentLocation(), "!$dir prefetch");
+              for (const auto &p : prefetch.v) {
+                Fortran::evaluate::ExpressionAnalyzer ea{
+                    bridge.getSemanticsContext()};
+                Fortran::lower::SomeExpr expr{*ea.Analyze(
+                    std::get<Fortran::parser::DataRef>(p.value().u))};
+                Fortran::lower::StatementContext stmtCtx;
+                mlir::Value memRef{Fortran::lower::convertExprToHLFIR(
+                                       genLocation(dir.source), *this, expr,
+                                       localSymbols, stmtCtx)
+                                       .getBase()};
+
+                // TODO: Don't use default value, instead get the following
+                //       info from the directive
+                uint32_t isWrite{0}, localityHint{3}, isData{1};
+                builder->create<fir::PrefetchOp>(genLocation(dir.source),
+                                                 memRef, isWrite, localityHint,
+                                                 isData);
+              }
             },
             [&](const auto &) {}},
         dir.u);
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index ca4aefb653d2a..69734a2ae443d 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3346,6 +3346,25 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
   }
 };
 
+/// `fir.prefetch` --> `llvm.prefetch`
+struct PrefetchOpConversion : public fir::FIROpConversion<fir::PrefetchOp> {
+  using FIROpConversion::FIROpConversion;
+
+  llvm::LogicalResult
+  matchAndRewrite(fir::PrefetchOp prefetch, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    llvm::errs() << "prefetch\n";
+    mlir::IntegerAttr rw = prefetch.getRwAttr();
+    mlir::IntegerAttr localityHint = prefetch.getLocalityHintAttr();
+    mlir::IntegerAttr cacheType = prefetch.getCacheTypeAttr();
+    mlir::LLVM::Prefetch::create(rewriter, prefetch.getLoc(),
+                                 adaptor.getOperands().front(), rw,
+                                 localityHint, cacheType);
+    rewriter.eraseOp(prefetch);
+    return mlir::success();
+  }
+};
+
 /// `fir.load` --> `llvm.load`
 struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
   using FIROpConversion::FIROpConversion;
@@ -4423,14 +4442,15 @@ void fir::populateFIRToLLVMConversionPatterns(
       FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion,
       GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion,
       LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion,
-      NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion,
-      SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion,
-      ShapeOpConversion, ShapeShiftOpConversion, ShiftOpConversion,
-      SliceOpConversion, StoreOpConversion, StringLitOpConversion,
-      SubcOpConversion, TypeDescOpConversion, TypeInfoOpConversion,
-      UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion,
-      UnreachableOpConversion, XArrayCoorOpConversion, XEmboxOpConversion,
-      XReboxOpConversion, ZeroOpConversion>(converter, options);
+      NegcOpConversion, NoReassocOpConversion, PrefetchOpConversion,
+      SelectCaseOpConversion, SelectOpConversion, SelectRankOpConversion,
+      SelectTypeOpConversion, ShapeOpConversion, ShapeShiftOpConversion,
+      ShiftOpConversion, SliceOpConversion, StoreOpConversion,
+      StringLitOpConversion, SubcOpConversion, TypeDescOpConversion,
+      TypeInfoOpConversion, UnboxCharOpConversion, UnboxProcOpConversion,
+      UndefOpConversion, UnreachableOpConversion, XArrayCoorOpConversion,
+      XEmboxOpConversion, XReboxOpConversion, ZeroOpConversion>(converter,
+                                                                options);
 
   // Patterns that are populated without a type converter do not trigger
   // target materializations for the operands of the root op.
diff --git a/flang/test/Integration/prefetch.f90 b/flang/test/Integration/prefetch.f90
new file mode 100644
index 0000000000000..1f7f6d091cfaa
--- /dev/null
+++ b/flang/test/Integration/prefetch.f90
@@ -0,0 +1,39 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+! RUN: %flang_fc1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=LLVM
+
+!===============================================================================
+! Test lowering of prefetch directive
+!===============================================================================
+
+subroutine test_prefetch_01()
+    ! LLVM: {{.*}} = alloca i32, i64 1, align 4
+    ! LLVM: %[[L_J:.*]] = alloca i32, i64 1, align 4
+    ! LLVM: %[[L_I:.*]] = alloca i32, i64 1, align 4
+    ! LLVM: %[[L_A:.*]] = alloca [256 x i32], i64 1, align 4
+
+    integer :: i, j
+    integer :: a(256)
+
+    a = 23
+    ! LLVM: call void @llvm.prefetch.p0(ptr %6, i32 0, i32 3, i32 1)
+    !dir$ prefetch a
+    i = sum(a)
+    ! LLVM: %[[L_LOAD:.*]] = load i32, ptr %5, align 4
+    ! LLVM: %[[L_ADD:.*]] = add nsw i32 %[[L_LOAD]], 64
+    ! LLVM: %[[L_GEP:.*]] = getelementptr i32, ptr %[[L_A]], i64 {{.*}}
+
+    ! LLVM: call void @llvm.prefetch.p0(ptr %[[L_GEP]], i32 0, i32 3, i32 1)
+    ! LLVM: call void @llvm.prefetch.p0(ptr %[[L_J]], i32 0, i32 3, i32 1)
+
+    do i = 1, (256 - 64)
+      !dir$ prefetch a(i+64), j
+      a(i) = a(i-32) + a(i+32) + j
+    end do
+end subroutine test_prefetch_01
diff --git a/flang/test/Lower/HLFIR/prefetch.f90 b/flang/test/Lower/HLFIR/prefetch.f90
new file mode 100644
index 0000000000000..3fe0a1a18c4c3
--- /dev/null
+++ b/flang/test/Lower/HLFIR/prefetch.f90
@@ -0,0 +1,63 @@
+! Test lowering of prefetch directive
+! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s --check-prefixes=HLFIR
+
+module test_prefetch_mod
+  implicit none
+  type :: t
+    integer :: a(256, 256)
+  end type t
+end module test_prefetch_mod
+
+subroutine test_prefetch_01()
+  ! HLFIR: %[[H_A:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ea"} : (!fir.ref<!fir.array<256xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<256xi32>>, !fir.ref<!fir.array<256xi32>>)
+  ! HLFIR: %[[H_I:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  ! HLFIR: %[[H_J:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+  integer :: i, j
+  integer :: a(256)
+
+  a = 23
+
+  ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<!fir.array<256xi32>>
+  !dir$ prefetch a
+  i = sum(a)
+
+  ! HLFIR: %[[H_LOAD:.*]] = fir.load %[[H_I]]#0 : !fir.ref<i32>
+  ! HLFIR: %[[H_C64:.*]] = arith.constant 64 : i32
+  ! HLFIR: %[[H_ADD:.*]] = arith.addi %[[H_LOAD]], %[[H_C64]] overflow<nsw> : i32
+  ! HLFIR: %[[H_CON:.*]] = fir.convert %[[H_ADD]] : (i32) -> i64
+  ! HLFIR: %[[H_DESIG:.*]] = hlfir.designate %[[H_A]]#0 (%[[H_CON]])  : (!fir.ref<!fir.array<256xi32>>, i64) -> !fir.ref<i32>
+
+  ! HLFIR: fir.prefetch %[[H_DESIG]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
+  ! HLFIR: fir.prefetch %[[H_J]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
+
+  do i = 1, (256 - 64)
+    !dir$ prefetch a(i+64), j
+    a(i) = a(i-32) + a(i+32) + j
+  end do
+end subroutine test_prefetch_01
+
+subroutine test_prefetch_02(t1)
+  use test_prefetch_mod
+  ! HLFIR: %[[H_A:.*]]:2 = hlfir.declare {{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_prefetch_02Ea"}
+  ! HLFIR: %[[H_ARG0:.*]]:2 = hlfir.declare {{.*}} dummy_scope {{.*}} {fortran_attrs = #fir.var_attrs<intent_inout>, uniq_name = "_QFtest_prefetch_02Et1"}
+  type(t), intent(inout) :: t1
+  integer, allocatable :: a(:, :)
+
+  ! HLFIR: %[[H_DESIG_01:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"}   shape {{.*}}
+  ! HLFIR: fir.prefetch %[[H_DESIG_01]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<!fir.array<256x256xi32>>
+  !dir$ prefetch t1%a
+  a = t1%a ** 2
+
+  do i = 1, 256
+    ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+    !dir$ prefetch a
+    a(i, :) = a(i, :) + i
+    do j = 1, 256
+      ! HLFIR: %[[H_DESIG_02:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} {{.*}}
+      ! HLFIR: fir.prefetch %[[H_DESIG_02]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
+      !dir$ prefetch t1%a(i, j)
+      t1%a(i, j) = (a(i, j) + i*j) / t1%a(i, j)
+    end do
+  end do
+end subroutine test_prefetch_02

>From 32738eb0c716e7d8e19a28f28c861577af804c14 Mon Sep 17 00:00:00 2001
From: Thirumalai-Shaktivel <thirumalaishaktivel at gmail.com>
Date: Mon, 10 Nov 2025 15:49:33 +0000
Subject: [PATCH 2/9] Fix the build failure

---
 flang/lib/Lower/Bridge.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 9d8e765e8adea..ce6b9f58bbfd7 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -3290,9 +3290,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
                 // TODO: Don't use default value, instead get the following
                 //       info from the directive
                 uint32_t isWrite{0}, localityHint{3}, isData{1};
-                builder->create<fir::PrefetchOp>(genLocation(dir.source),
-                                                 memRef, isWrite, localityHint,
-                                                 isData);
+                fir::PrefetchOp::create(*builder, genLocation(dir.source),
+                                        memRef, isWrite, localityHint, isData);
               }
             },
             [&](const auto &) {}},

>From 9557cd0f4b118d9b95e932a0d7c61b238c49d4d0 Mon Sep 17 00:00:00 2001
From: Thirumalai-Shaktivel <thirumalaishaktivel at gmail.com>
Date: Tue, 11 Nov 2025 03:08:15 +0000
Subject: [PATCH 3/9] Remove debug print

---
 flang/lib/Optimizer/CodeGen/CodeGen.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 69734a2ae443d..adc5a50c45fff 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3353,7 +3353,6 @@ struct PrefetchOpConversion : public fir::FIROpConversion<fir::PrefetchOp> {
   llvm::LogicalResult
   matchAndRewrite(fir::PrefetchOp prefetch, OpAdaptor adaptor,
                   mlir::ConversionPatternRewriter &rewriter) const override {
-    llvm::errs() << "prefetch\n";
     mlir::IntegerAttr rw = prefetch.getRwAttr();
     mlir::IntegerAttr localityHint = prefetch.getLocalityHintAttr();
     mlir::IntegerAttr cacheType = prefetch.getCacheTypeAttr();

>From c28bbc29f184f024ea31e113be9a718c252d64c7 Mon Sep 17 00:00:00 2001
From: Thirumalai-Shaktivel <thirumalaishaktivel at gmail.com>
Date: Tue, 11 Nov 2025 08:38:05 +0000
Subject: [PATCH 4/9] Use UnitAttr instead of I32Attr

---
 flang/include/flang/Optimizer/Dialect/FIROps.td | 12 ++++++------
 flang/lib/Optimizer/CodeGen/CodeGen.cpp         |  6 ++++--
 flang/test/Lower/HLFIR/prefetch.f90             | 12 ++++++------
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 84d7ed29292ae..4f7abb1d052b7 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -368,15 +368,15 @@ def fir_PrefetchOp : fir_Op<"prefetch", []> {
 
   /// `memref' is the address to be prefetched
   /// `rw'          : rw specifier >
-  ///                  read is 0, write is 1
+  ///                  read is 0 (default), write is 1
   /// `localityHint': temporal locality specifier >
   ///                  value ranging from 0 - no locality to 3 - extremely local
   /// `cacheType'   : cache type specifier >
-  ///                  instruction cache is 0, data cache is 1
-  let arguments = (ins AnyReferenceLike:$memref,
-      ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<1>]>:$rw,
+  ///                  instruction cache is 0 (default), data cache is 1
+  /// NOTE: The numerical values used here is in reference to the LLVM LangRef
+  let arguments = (ins AnyReferenceLike:$memref, UnitAttr:$rw,
       ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<3>]>:$localityHint,
-      ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<1>]>:$cacheType);
+      UnitAttr:$cacheType);
 
   let assemblyFormat = "$memref attr-dict `:` type(operands)";
 }
@@ -974,7 +974,7 @@ def fir_ReboxAssumedRankOp : fir_Op<"rebox_assumed_rank",
     Example:
     ```
       fir.rebox_assumed_rank %1 lbs zeroes : (!fir.box<!fir.array<*:f32>>) -> !fir.box<!fir.array<*:f32>>
-    ```    
+    ```
   }];
 
   let arguments = (ins
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index adc5a50c45fff..8a1fe5fc5d988 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3353,9 +3353,11 @@ struct PrefetchOpConversion : public fir::FIROpConversion<fir::PrefetchOp> {
   llvm::LogicalResult
   matchAndRewrite(fir::PrefetchOp prefetch, OpAdaptor adaptor,
                   mlir::ConversionPatternRewriter &rewriter) const override {
-    mlir::IntegerAttr rw = prefetch.getRwAttr();
+    mlir::IntegerAttr rw = mlir::IntegerAttr::get(rewriter.getI32Type(),
+                                                  prefetch.getRwAttr() ? 1 : 0);
     mlir::IntegerAttr localityHint = prefetch.getLocalityHintAttr();
-    mlir::IntegerAttr cacheType = prefetch.getCacheTypeAttr();
+    mlir::IntegerAttr cacheType = mlir::IntegerAttr::get(
+        rewriter.getI32Type(), prefetch.getCacheTypeAttr() ? 1 : 0);
     mlir::LLVM::Prefetch::create(rewriter, prefetch.getLoc(),
                                  adaptor.getOperands().front(), rw,
                                  localityHint, cacheType);
diff --git a/flang/test/Lower/HLFIR/prefetch.f90 b/flang/test/Lower/HLFIR/prefetch.f90
index 3fe0a1a18c4c3..2f33a78b0b396 100644
--- a/flang/test/Lower/HLFIR/prefetch.f90
+++ b/flang/test/Lower/HLFIR/prefetch.f90
@@ -18,7 +18,7 @@ subroutine test_prefetch_01()
 
   a = 23
 
-  ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<!fir.array<256xi32>>
+  ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType, localityHint = 3 : i32} : !fir.ref<!fir.array<256xi32>>
   !dir$ prefetch a
   i = sum(a)
 
@@ -28,8 +28,8 @@ subroutine test_prefetch_01()
   ! HLFIR: %[[H_CON:.*]] = fir.convert %[[H_ADD]] : (i32) -> i64
   ! HLFIR: %[[H_DESIG:.*]] = hlfir.designate %[[H_A]]#0 (%[[H_CON]])  : (!fir.ref<!fir.array<256xi32>>, i64) -> !fir.ref<i32>
 
-  ! HLFIR: fir.prefetch %[[H_DESIG]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
-  ! HLFIR: fir.prefetch %[[H_J]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
+  ! HLFIR: fir.prefetch %[[H_DESIG]] {cacheType, localityHint = 3 : i32} : !fir.ref<i32>
+  ! HLFIR: fir.prefetch %[[H_J]]#0 {cacheType, localityHint = 3 : i32} : !fir.ref<i32>
 
   do i = 1, (256 - 64)
     !dir$ prefetch a(i+64), j
@@ -45,17 +45,17 @@ subroutine test_prefetch_02(t1)
   integer, allocatable :: a(:, :)
 
   ! HLFIR: %[[H_DESIG_01:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"}   shape {{.*}}
-  ! HLFIR: fir.prefetch %[[H_DESIG_01]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<!fir.array<256x256xi32>>
+  ! HLFIR: fir.prefetch %[[H_DESIG_01]] {cacheType, localityHint = 3 : i32} : !fir.ref<!fir.array<256x256xi32>>
   !dir$ prefetch t1%a
   a = t1%a ** 2
 
   do i = 1, 256
-    ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+    ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType, localityHint = 3 : i32} : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
     !dir$ prefetch a
     a(i, :) = a(i, :) + i
     do j = 1, 256
       ! HLFIR: %[[H_DESIG_02:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} {{.*}}
-      ! HLFIR: fir.prefetch %[[H_DESIG_02]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
+      ! HLFIR: fir.prefetch %[[H_DESIG_02]] {cacheType, localityHint = 3 : i32} : !fir.ref<i32>
       !dir$ prefetch t1%a(i, j)
       t1%a(i, j) = (a(i, j) + i*j) / t1%a(i, j)
     end do

>From 7c921f805899c8d46cec3c5a268373bf0174e01c Mon Sep 17 00:00:00 2001
From: Thirumalai-Shaktivel <thirumalaishaktivel at gmail.com>
Date: Tue, 11 Nov 2025 08:38:30 +0000
Subject: [PATCH 5/9] Fix the tests

---
 flang/test/Integration/prefetch.f90 | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/flang/test/Integration/prefetch.f90 b/flang/test/Integration/prefetch.f90
index 1f7f6d091cfaa..f3fb7a950e328 100644
--- a/flang/test/Integration/prefetch.f90
+++ b/flang/test/Integration/prefetch.f90
@@ -14,24 +14,24 @@
 
 subroutine test_prefetch_01()
     ! LLVM: {{.*}} = alloca i32, i64 1, align 4
-    ! LLVM: %[[L_J:.*]] = alloca i32, i64 1, align 4
-    ! LLVM: %[[L_I:.*]] = alloca i32, i64 1, align 4
-    ! LLVM: %[[L_A:.*]] = alloca [256 x i32], i64 1, align 4
+    ! LLVM: %[[VAR_J:.*]] = alloca i32, i64 1, align 4
+    ! LLVM: %[[VAR_I:.*]] = alloca i32, i64 1, align 4
+    ! LLVM: %[[VAR_A:.*]] = alloca [256 x i32], i64 1, align 4
 
     integer :: i, j
     integer :: a(256)
 
     a = 23
-    ! LLVM: call void @llvm.prefetch.p0(ptr %6, i32 0, i32 3, i32 1)
+    ! LLVM: call void @llvm.prefetch.p0(ptr %[[VAR_A]], i32 0, i32 3, i32 1)
     !dir$ prefetch a
     i = sum(a)
-    ! LLVM: %[[L_LOAD:.*]] = load i32, ptr %5, align 4
-    ! LLVM: %[[L_ADD:.*]] = add nsw i32 %[[L_LOAD]], 64
-    ! LLVM: %[[L_GEP:.*]] = getelementptr i32, ptr %[[L_A]], i64 {{.*}}
 
-    ! LLVM: call void @llvm.prefetch.p0(ptr %[[L_GEP]], i32 0, i32 3, i32 1)
-    ! LLVM: call void @llvm.prefetch.p0(ptr %[[L_J]], i32 0, i32 3, i32 1)
+    ! LLVM: %[[LOAD_I:.*]] = load i32, ptr %[[VAR_I]], align 4
+    ! LLVM: %{{.*}} = add nsw i32 %[[LOAD_I]], 64
+    ! LLVM: %[[GEP_A:.*]] = getelementptr i32, ptr %[[VAR_A]], i64 {{.*}}
 
+    ! LLVM: call void @llvm.prefetch.p0(ptr %[[GEP_A]], i32 0, i32 3, i32 1)
+    ! LLVM: call void @llvm.prefetch.p0(ptr %[[VAR_J]], i32 0, i32 3, i32 1)
     do i = 1, (256 - 64)
       !dir$ prefetch a(i+64), j
       a(i) = a(i-32) + a(i+32) + j

>From 7c98c645fe7c4978a4d4ede15f65c3b04a25fbe5 Mon Sep 17 00:00:00 2001
From: Thirumalai-Shaktivel <thirumalaishaktivel at gmail.com>
Date: Thu, 13 Nov 2025 06:12:20 +0000
Subject: [PATCH 6/9] Revert a space fix

---
 flang/include/flang/Optimizer/Dialect/FIROps.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 4f7abb1d052b7..4b6eb98bc3530 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -974,7 +974,7 @@ def fir_ReboxAssumedRankOp : fir_Op<"rebox_assumed_rank",
     Example:
     ```
       fir.rebox_assumed_rank %1 lbs zeroes : (!fir.box<!fir.array<*:f32>>) -> !fir.box<!fir.array<*:f32>>
-    ```
+    ```    
   }];
 
   let arguments = (ins

>From 1da8a9517cec8ba2babad0750408a8f811c951b2 Mon Sep 17 00:00:00 2001
From: Thirumalai-Shaktivel <thirumalaishaktivel at gmail.com>
Date: Thu, 13 Nov 2025 15:52:08 +0000
Subject: [PATCH 7/9] Use Data instead of Box for allocatable array

---
 flang/lib/Lower/Bridge.cpp          | 13 +++++++++----
 flang/test/Lower/HLFIR/prefetch.f90 |  4 +++-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index ce6b9f58bbfd7..d6062bbabf821 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -3282,16 +3282,21 @@ class FirConverter : public Fortran::lower::AbstractConverter {
                 Fortran::lower::SomeExpr expr{*ea.Analyze(
                     std::get<Fortran::parser::DataRef>(p.value().u))};
                 Fortran::lower::StatementContext stmtCtx;
+                mlir::Location loc = genLocation(dir.source);
                 mlir::Value memRef{Fortran::lower::convertExprToHLFIR(
-                                       genLocation(dir.source), *this, expr,
-                                       localSymbols, stmtCtx)
+                                       loc, *this, expr, localSymbols, stmtCtx)
                                        .getBase()};
+                if (mlir::isa<fir::BaseBoxType>(
+                        fir::unwrapRefType(memRef.getType()))) {
+                  memRef = fir::LoadOp::create(*builder, loc, memRef);
+                  memRef = fir::BoxAddrOp::create(*builder, loc, memRef);
+                }
 
                 // TODO: Don't use default value, instead get the following
                 //       info from the directive
                 uint32_t isWrite{0}, localityHint{3}, isData{1};
-                fir::PrefetchOp::create(*builder, genLocation(dir.source),
-                                        memRef, isWrite, localityHint, isData);
+                fir::PrefetchOp::create(*builder, loc, memRef, isWrite,
+                                        localityHint, isData);
               }
             },
             [&](const auto &) {}},
diff --git a/flang/test/Lower/HLFIR/prefetch.f90 b/flang/test/Lower/HLFIR/prefetch.f90
index 2f33a78b0b396..2a30584d94563 100644
--- a/flang/test/Lower/HLFIR/prefetch.f90
+++ b/flang/test/Lower/HLFIR/prefetch.f90
@@ -50,7 +50,9 @@ subroutine test_prefetch_02(t1)
   a = t1%a ** 2
 
   do i = 1, 256
-    ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType, localityHint = 3 : i32} : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+    ! HLFIR: %[[A_LOAD:.*]] = fir.load %[[H_A]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+    ! HLFIR: %[[A_BOX:.*]] = fir.box_addr %[[A_LOAD]] : (!fir.box<!fir.heap<!fir.array<?x?xi32>>>) -> !fir.heap<!fir.array<?x?xi32>>
+    ! HLFIR: fir.prefetch %[[A_BOX]] {cacheType, localityHint = 3 : i32} : !fir.heap<!fir.array<?x?xi32>>
     !dir$ prefetch a
     a(i, :) = a(i, :) + i
     do j = 1, 256

>From ed117745c512deea862dd923287a130310f52bb7 Mon Sep 17 00:00:00 2001
From: Thirumalai-Shaktivel <thirumalaishaktivel at gmail.com>
Date: Wed, 26 Nov 2025 12:15:04 +0530
Subject: [PATCH 8/9] Fix the FIROps prefetch description

---
 flang/include/flang/Optimizer/Dialect/FIROps.td | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 4b6eb98bc3530..78376a3ce17bc 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -360,7 +360,7 @@ def fir_PrefetchOp : fir_Op<"prefetch", []> {
 
     ```
       %a = ... -> !fir.ref<i32>
-      fir.prefetch %a {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
+      fir.prefetch %a {cacheType, localityHint = 3 : i32} : !fir.ref<i32>
       // ...
       fir.load %a : !fir.ref<i32> // use the prefetched value
     ```
@@ -372,11 +372,14 @@ def fir_PrefetchOp : fir_Op<"prefetch", []> {
   /// `localityHint': temporal locality specifier >
   ///                  value ranging from 0 - no locality to 3 - extremely local
   /// `cacheType'   : cache type specifier >
-  ///                  instruction cache is 0 (default), data cache is 1
+  ///                  instruction cache is 0, data cache is 1 (default)
   /// NOTE: The numerical values used here is in reference to the LLVM LangRef
-  let arguments = (ins AnyReferenceLike:$memref, UnitAttr:$rw,
-      ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<3>]>:$localityHint,
-      UnitAttr:$cacheType);
+  let arguments =
+      (ins Arg<AnyReferenceLike,
+               "prefetch memory address", [MemWrite]> : $memref,
+       UnitAttr : $rw,
+       ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<3>]> : $localityHint,
+       UnitAttr : $cacheType);
 
   let assemblyFormat = "$memref attr-dict `:` type(operands)";
 }
@@ -974,7 +977,7 @@ def fir_ReboxAssumedRankOp : fir_Op<"rebox_assumed_rank",
     Example:
     ```
       fir.rebox_assumed_rank %1 lbs zeroes : (!fir.box<!fir.array<*:f32>>) -> !fir.box<!fir.array<*:f32>>
-    ```    
+    ```
   }];
 
   let arguments = (ins

>From e7ab97474bf1514a9f3b8455e1555917cbc8dd25 Mon Sep 17 00:00:00 2001
From: Thirumalai-Shaktivel <thirumalaishaktivel at gmail.com>
Date: Tue, 9 Dec 2025 09:47:32 +0530
Subject: [PATCH 9/9] Improve the arguments repesentation in FIR

---
 .../include/flang/Optimizer/Dialect/FIROps.td |  4 +-
 flang/lib/Optimizer/Dialect/FIROps.cpp        | 81 +++++++++++++++++++
 2 files changed, 83 insertions(+), 2 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 78376a3ce17bc..55ce0be027e59 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -360,7 +360,7 @@ def fir_PrefetchOp : fir_Op<"prefetch", []> {
 
     ```
       %a = ... -> !fir.ref<i32>
-      fir.prefetch %a {cacheType, localityHint = 3 : i32} : !fir.ref<i32>
+      fir.prefetch %a {read, data, localityHint = 3 : i32} : !fir.ref<i32>
       // ...
       fir.load %a : !fir.ref<i32> // use the prefetched value
     ```
@@ -381,7 +381,7 @@ def fir_PrefetchOp : fir_Op<"prefetch", []> {
        ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<3>]> : $localityHint,
        UnitAttr : $cacheType);
 
-  let assemblyFormat = "$memref attr-dict `:` type(operands)";
+  let hasCustomAssemblyFormat = 1;
 }
 
 def fir_CopyOp : fir_Op<"copy", [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 4f97acaa88b7a..20bcbedda7fc9 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -4264,6 +4264,87 @@ void fir::StoreOp::getEffects(
   addVolatileMemoryEffects({getMemref().getType()}, effects);
 }
 
+//===----------------------------------------------------------------------===//
+// PrefetchOp
+//===----------------------------------------------------------------------===//
+
+mlir::ParseResult fir::PrefetchOp::parse(mlir::OpAsmParser &parser,
+                                         mlir::OperationState &result) {
+  mlir::OpAsmParser::UnresolvedOperand memref;
+  // mlir::Type type = parser.getBuilder().getIntegerType(64);
+  if (parser.parseOperand(memref))
+    return mlir::failure();
+
+  if (mlir::succeeded(parser.parseLBrace())) {
+    llvm::StringRef kw;
+    if (parser.parseKeyword(&kw))
+      return mlir::failure();
+
+    if (kw == "read")
+      result.addAttribute("rw", parser.getBuilder().getBoolAttr(false));
+    else if (kw == "write")
+      result.addAttribute("rw", parser.getBuilder().getUnitAttr());
+    else
+      return parser.emitError(parser.getCurrentLocation(),
+                              "Expected either read or write keyword");
+
+    if (parser.parseComma())
+      return mlir::failure();
+
+    if (parser.parseKeyword(&kw))
+      return mlir::failure();
+    if (kw == "instruction") {
+      llvm::errs() << "intruc\n";
+      result.addAttribute("cacheType", parser.getBuilder().getBoolAttr(false));
+    } else if (kw == "data") {
+      llvm::errs() << "data\n";
+      result.addAttribute("cacheType", parser.getBuilder().getUnitAttr());
+    } else
+      return parser.emitError(parser.getCurrentLocation(),
+                              "Expected either read or write keyword");
+
+    if (parser.parseComma())
+      return mlir::failure();
+
+    if (mlir::succeeded(parser.parseKeyword("localityHint"))) {
+      if (parser.parseEqual())
+        return mlir::failure();
+      mlir::Attribute intAttr;
+      if (parser.parseAttribute(intAttr))
+        return mlir::failure();
+      result.addAttribute("localityHint", intAttr);
+    }
+    if (parser.parseRBrace())
+      return mlir::failure();
+  }
+  mlir::Type type;
+  if (parser.parseColonType(type))
+    return mlir::failure();
+
+  if (parser.resolveOperand(memref, type, result.operands))
+    return mlir::failure();
+  return ::mlir::success();
+}
+
+void fir::PrefetchOp::print(mlir::OpAsmPrinter &p) {
+  p << " ";
+  p.printOperand(getMemref());
+  p << " {";
+  if (getRw())
+    p << "write";
+  else
+    p << "read";
+  p << ", ";
+  if (getCacheType())
+    p << "data";
+  else
+    p << "instruction";
+  p << ", localityHint = ";
+  p << getLocalityHint();
+  p << " : " << getLocalityHintAttr().getType();
+  p << "} : " << getMemref().getType();
+}
+
 //===----------------------------------------------------------------------===//
 // CopyOp
 //===----------------------------------------------------------------------===//



More information about the flang-commits mailing list