[flang-commits] [flang] [flang] Inline scalar-to-scalar TRANSFER for same-size trivial types (PR #191589)

via flang-commits flang-commits at lists.llvm.org
Fri Apr 10 19:48:39 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-fir-hlfir

Author: Zhen Wang (wangzpgi)

<details>
<summary>Changes</summary>

Inline the TRANSFER intrinsic for scalar-to-scalar cases where the result is a trivial type (integer, real, etc.) and source and result have the same storage size. Instead of calling _FortranATransfer, the lowering now emits a fir.convert on the source address followed by a fir.load, effectively performing a reinterpret cast.

---
Full diff: https://github.com/llvm/llvm-project/pull/191589.diff


2 Files Affected:

- (modified) flang/lib/Optimizer/Builder/IntrinsicCall.cpp (+25-2) 
- (modified) flang/test/Lower/Intrinsics/transfer.f90 (+46-18) 


``````````diff
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index d6dee88f422e0..6b040fd342ad1 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -8680,6 +8680,31 @@ IntrinsicLibrary::genTransfer(mlir::Type resultType,
 
   assert(args.size() >= 2); // args.size() == 2 when size argument is omitted.
 
+  bool absentSize = (args.size() == 2);
+
+  // Inline scalar-to-scalar transfers when the result is a trivial type
+  // (integer, real, etc.) and both source and result have the same storage
+  // size.
+  if (absentSize && fir::isa_trivial(resultType)) {
+    mlir::Value sourceBase = fir::getBase(args[0]);
+    mlir::Type sourceType = fir::unwrapRefType(sourceBase.getType());
+    if (fir::isa_ref_type(sourceBase.getType()) &&
+        !mlir::isa<fir::SequenceType>(sourceType)) {
+      auto &dl = builder.getDataLayout();
+      auto &kindMap = builder.getKindMap();
+      auto sourceSizeAndAlign =
+          fir::getTypeSizeAndAlignment(loc, sourceType, dl, kindMap);
+      auto resultSizeAndAlign =
+          fir::getTypeSizeAndAlignment(loc, resultType, dl, kindMap);
+      if (sourceSizeAndAlign && resultSizeAndAlign &&
+          sourceSizeAndAlign->first == resultSizeAndAlign->first) {
+        auto refTy = builder.getRefType(resultType);
+        auto cast = builder.createConvert(loc, refTy, sourceBase);
+        return fir::LoadOp::create(builder, loc, cast);
+      }
+    }
+  }
+
   // Handle source argument
   mlir::Value source = builder.createBox(loc, args[0]);
 
@@ -8688,8 +8713,6 @@ IntrinsicLibrary::genTransfer(mlir::Type resultType,
   fir::BoxValue moldTmp = mold;
   unsigned moldRank = moldTmp.rank();
 
-  bool absentSize = (args.size() == 2);
-
   // Create mutable fir.box to be passed to the runtime for the result.
   mlir::Type type = (moldRank == 0 && absentSize)
                         ? resultType
diff --git a/flang/test/Lower/Intrinsics/transfer.f90 b/flang/test/Lower/Intrinsics/transfer.f90
index a792c8e91ba01..a44a1e645c813 100644
--- a/flang/test/Lower/Intrinsics/transfer.f90
+++ b/flang/test/Lower/Intrinsics/transfer.f90
@@ -3,24 +3,9 @@
 subroutine trans_test(store, word)
     ! CHECK-LABEL: func @_QPtrans_test(
     ! CHECK-SAME:                      %[[VAL_0:.*]]: !fir.ref<i32>{{.*}}, %[[VAL_1:.*]]: !fir.ref<f32>{{.*}}) {
-    ! CHECK:         %[[VAL_2:.*]] = fir.alloca !fir.box<!fir.heap<i32>>
-    ! CHECK:         %[[VAL_3:.*]] = fir.embox %[[VAL_1]] : (!fir.ref<f32>) -> !fir.box<f32>
-    ! CHECK:         %[[VAL_4:.*]] = fir.embox %[[VAL_0]] : (!fir.ref<i32>) -> !fir.box<i32>
-    ! CHECK:         %[[VAL_5:.*]] = fir.zero_bits !fir.heap<i32>
-    ! CHECK:         %[[VAL_6:.*]] = fir.embox %[[VAL_5]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
-    ! CHECK:         fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref<!fir.box<!fir.heap<i32>>>
-    ! CHECK:         %[[VAL_7:.*]] = fir.address_of(@_QQclX{{.*}}) : !fir.ref<!fir.char<1,{{.*}}>>
-    ! CHECK:         %[[VAL_8:.*]] = arith.constant {{.*}} : i32
-    ! CHECK:         %[[VAL_9:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> !fir.ref<!fir.box<none>>
-    ! CHECK:         %[[VAL_10:.*]] = fir.convert %[[VAL_3]] : (!fir.box<f32>) -> !fir.box<none>
-    ! CHECK:         %[[VAL_11:.*]] = fir.convert %[[VAL_4]] : (!fir.box<i32>) -> !fir.box<none>
-    ! CHECK:         %[[VAL_12:.*]] = fir.convert %[[VAL_7]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-    ! CHECK:         fir.call @_FortranATransfer(%[[VAL_9]], %[[VAL_10]], %[[VAL_11]], %[[VAL_12]], %[[VAL_8]]) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> ()
-    ! CHECK:         %[[VAL_14:.*]] = fir.load %[[VAL_2]] : !fir.ref<!fir.box<!fir.heap<i32>>>
-    ! CHECK:         %[[VAL_15:.*]] = fir.box_addr %[[VAL_14]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
-    ! CHECK:         %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.heap<i32>
-    ! CHECK:         fir.freemem %[[VAL_15]]
-    ! CHECK:         fir.store %[[VAL_16]] to %[[VAL_0]] : !fir.ref<i32>
+    ! CHECK:         %[[CAST:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<f32>) -> !fir.ref<i32>
+    ! CHECK:         %[[VAL:.*]] = fir.load %[[CAST]] : !fir.ref<i32>
+    ! CHECK:         fir.store %[[VAL]] to %[[VAL_0]] : !fir.ref<i32>
     ! CHECK:         return
     ! CHECK:       }
     integer :: store
@@ -120,3 +105,46 @@ integer function trans_test3(p)
     t = transfer(p, t)
     trans_test3 = t%x
   end function
+
+  ! Scalar same-size transfer (f64 -> i64) is inlined as fir.convert + fir.load.
+  subroutine trans_test_r8_to_i8(store, word)
+    ! CHECK-LABEL: func @_QPtrans_test_r8_to_i8(
+    ! CHECK-SAME:    %[[RES:.*]]: !fir.ref<i64>{{.*}}, %[[SRC:.*]]: !fir.ref<f64>{{.*}}) {
+    ! CHECK:         %[[CAST:.*]] = fir.convert %[[SRC]] : (!fir.ref<f64>) -> !fir.ref<i64>
+    ! CHECK:         %[[VAL:.*]] = fir.load %[[CAST]] : !fir.ref<i64>
+    ! CHECK:         fir.store %[[VAL]] to %[[RES]] : !fir.ref<i64>
+    ! CHECK-NOT:     fir.call @_FortranATransfer
+    ! CHECK:         return
+    ! CHECK:       }
+    integer(8) :: store
+    real(8) :: word
+    store = transfer(word, store)
+  end subroutine
+
+  ! BIND(C) struct (c_ptr) to integer(8): same byte size, inlined.
+  ! This covers the c_devptr pattern on CUDA device code.
+  subroutine trans_test_cptr_to_i8(store, src)
+    ! CHECK-LABEL: func @_QPtrans_test_cptr_to_i8(
+    ! CHECK-SAME:    %[[RES:.*]]: !fir.ref<i64>{{.*}}, %[[SRC:.*]]: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>{{.*}}) {
+    ! CHECK:         %[[CAST:.*]] = fir.convert %[[SRC]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) -> !fir.ref<i64>
+    ! CHECK:         %[[VAL:.*]] = fir.load %[[CAST]] : !fir.ref<i64>
+    ! CHECK:         fir.store %[[VAL]] to %[[RES]] : !fir.ref<i64>
+    ! CHECK-NOT:     fir.call @_FortranATransfer
+    ! CHECK:         return
+    ! CHECK:       }
+    use iso_c_binding
+    integer(8) :: store
+    type(c_ptr) :: src
+    store = transfer(src, store)
+  end subroutine
+
+  ! Different-size scalar transfer (i32 -> i64) falls back to runtime.
+  subroutine trans_test_diff_size(store, src)
+    ! CHECK-LABEL: func @_QPtrans_test_diff_size(
+    ! CHECK:         fir.call @_FortranATransfer(
+    ! CHECK:         return
+    ! CHECK:       }
+    integer(8) :: store
+    integer(4) :: src
+    store = transfer(src, store)
+  end subroutine

``````````

</details>


https://github.com/llvm/llvm-project/pull/191589


More information about the flang-commits mailing list