[flang-commits] [flang] ca3bc44 - [flang] Inline scalar-to-scalar TRANSFER for same-size trivial types (#191589)

via flang-commits flang-commits at lists.llvm.org
Thu Apr 16 12:10:53 PDT 2026


Author: Zhen Wang
Date: 2026-04-16T19:10:47Z
New Revision: ca3bc44c3090481615bd8fc4b3e64358b845c8bf

URL: https://github.com/llvm/llvm-project/commit/ca3bc44c3090481615bd8fc4b3e64358b845c8bf
DIFF: https://github.com/llvm/llvm-project/commit/ca3bc44c3090481615bd8fc4b3e64358b845c8bf.diff

LOG: [flang] Inline scalar-to-scalar TRANSFER for same-size trivial types (#191589)

Inline the TRANSFER intrinsic for scalar-to-scalar cases where the
result is a trivial type (integer, real, etc.) and source and result
have the same storage size. Instead of calling _FortranATransfer, the
lowering now emits a fir.convert on the source address followed by a
fir.load, effectively performing a reinterpret cast.

Added: 
    

Modified: 
    flang/lib/Optimizer/Builder/IntrinsicCall.cpp
    flang/test/Lower/Intrinsics/transfer.f90

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index d6dee88f422e0..3623323e8cf3d 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -8680,6 +8680,39 @@ IntrinsicLibrary::genTransfer(mlir::Type resultType,
 
   assert(args.size() >= 2); // args.size() == 2 when size argument is omitted.
 
+  bool absentSize = (args.size() == 2);
+
+  // Inline scalar-to-scalar transfers when the result is a trivial type
+  // (integer, real, etc.) and both source and result have the same storage
+  // size.
+  if (absentSize && fir::isa_trivial(resultType)) {
+    mlir::Value sourceBase = fir::getBase(args[0]);
+    mlir::Type sourceType = fir::unwrapRefType(sourceBase.getType());
+    mlir::Type moldType = fir::unwrapRefType(fir::getBase(args[1]).getType());
+    if (fir::isa_ref_type(sourceBase.getType()) &&
+        (fir::isa_trivial(sourceType) ||
+         mlir::isa<fir::RecordType>(sourceType)) &&
+        fir::isa_trivial(moldType)) {
+      auto sourceSizeAndAlign = fir::getTypeSizeAndAlignment(
+          loc, sourceType, builder.getDataLayout(), builder.getKindMap());
+      auto resultSizeAndAlign = fir::getTypeSizeAndAlignment(
+          loc, resultType, builder.getDataLayout(), builder.getKindMap());
+      if (sourceSizeAndAlign && resultSizeAndAlign &&
+          sourceSizeAndAlign->first == resultSizeAndAlign->first) {
+        if (mlir::isa<mlir::IntegerType, mlir::FloatType>(sourceType) &&
+            mlir::isa<mlir::IntegerType, mlir::FloatType>(resultType)) {
+          mlir::Value val = fir::LoadOp::create(builder, loc, sourceBase);
+          if (sourceType != resultType)
+            val = mlir::arith::BitcastOp::create(builder, loc, resultType, val);
+          return val;
+        }
+        mlir::Type refTy = builder.getRefType(resultType);
+        mlir::Value cast = builder.createConvert(loc, refTy, sourceBase);
+        return fir::LoadOp::create(builder, loc, cast);
+      }
+    }
+  }
+
   // Handle source argument
   mlir::Value source = builder.createBox(loc, args[0]);
 
@@ -8688,8 +8721,6 @@ IntrinsicLibrary::genTransfer(mlir::Type resultType,
   fir::BoxValue moldTmp = mold;
   unsigned moldRank = moldTmp.rank();
 
-  bool absentSize = (args.size() == 2);
-
   // Create mutable fir.box to be passed to the runtime for the result.
   mlir::Type type = (moldRank == 0 && absentSize)
                         ? resultType

diff  --git a/flang/test/Lower/Intrinsics/transfer.f90 b/flang/test/Lower/Intrinsics/transfer.f90
index 6a9ea14570fb3..7afdfd28c2ae1 100644
--- a/flang/test/Lower/Intrinsics/transfer.f90
+++ b/flang/test/Lower/Intrinsics/transfer.f90
@@ -3,17 +3,12 @@
 subroutine trans_test(store, word)
     ! CHECK-LABEL: func @_QPtrans_test(
     ! CHECK-SAME:                      %[[VAL_0:.*]]: !fir.ref<i32>{{.*}}, %[[VAL_1:.*]]: !fir.ref<f32>{{.*}}) {
-    ! CHECK-DAG:     %[[RESULT_BOX:.*]] = fir.alloca !fir.box<!fir.heap<i32>>
     ! CHECK-DAG:     %[[store:.*]]:2 = hlfir.declare %[[VAL_0]] {{.*}}{uniq_name = "_QFtrans_testEstore"}
     ! CHECK-DAG:     %[[word:.*]]:2 = hlfir.declare %[[VAL_1]] {{.*}}{uniq_name = "_QFtrans_testEword"}
-    ! CHECK:         %[[VAL_3:.*]] = fir.embox %[[word]]#0 : (!fir.ref<f32>) -> !fir.box<f32>
-    ! CHECK:         %[[VAL_4:.*]] = fir.embox %[[store]]#0 : (!fir.ref<i32>) -> !fir.box<i32>
-    ! CHECK:         fir.call @_FortranATransfer({{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> ()
-    ! CHECK:         %[[LOADED:.*]] = fir.load %[[RESULT_BOX]] : !fir.ref<!fir.box<!fir.heap<i32>>>
-    ! CHECK:         %[[ADDR:.*]] = fir.box_addr %[[LOADED]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
-    ! CHECK:         %[[VAL:.*]] = fir.load %[[ADDR]] : !fir.heap<i32>
-    ! CHECK:         fir.freemem %[[ADDR]]
+    ! CHECK:         %[[LOADED:.*]] = fir.load %[[word]]#0 : !fir.ref<f32>
+    ! CHECK:         %[[VAL:.*]] = arith.bitcast %[[LOADED]] : f32 to i32
     ! CHECK:         hlfir.assign %[[VAL]] to %[[store]]#0 : i32, !fir.ref<i32>
+    ! CHECK-NOT:     fir.call @_FortranATransfer
     ! CHECK:         return
     ! CHECK:       }
     integer :: store
@@ -54,3 +49,105 @@ integer function trans_test3(p)
     t = transfer(p, t)
     trans_test3 = t%x
   end function
+
+  ! Scalar same-size transfer (f64 -> i64) is inlined as fir.load + arith.bitcast.
+  subroutine trans_test_r8_to_i8(store, word)
+    ! CHECK-LABEL: func @_QPtrans_test_r8_to_i8(
+    ! CHECK-SAME:    %[[RES:.*]]: !fir.ref<i64>{{.*}}, %[[SRC:.*]]: !fir.ref<f64>{{.*}}) {
+    ! CHECK-DAG:     %[[store:.*]]:2 = hlfir.declare %[[RES]] {{.*}}{uniq_name = "_QFtrans_test_r8_to_i8Estore"}
+    ! CHECK-DAG:     %[[word:.*]]:2 = hlfir.declare %[[SRC]] {{.*}}{uniq_name = "_QFtrans_test_r8_to_i8Eword"}
+    ! CHECK:         %[[LOADED:.*]] = fir.load %[[word]]#0 : !fir.ref<f64>
+    ! CHECK:         %[[VAL:.*]] = arith.bitcast %[[LOADED]] : f64 to i64
+    ! CHECK:         hlfir.assign %[[VAL]] to %[[store]]#0 : i64, !fir.ref<i64>
+    ! CHECK-NOT:     fir.call @_FortranATransfer
+    ! CHECK:         return
+    ! CHECK:       }
+    integer(8) :: store
+    real(8) :: word
+    store = transfer(word, store)
+  end subroutine
+
+  ! BIND(C) struct (c_ptr) to integer(8): same byte size, inlined via
+  ! address-level reinterpret. Covers the c_devptr pattern on CUDA device code.
+  subroutine trans_test_cptr_to_i8(store, src)
+    ! CHECK-LABEL: func @_QPtrans_test_cptr_to_i8(
+    ! CHECK:         %[[srcDecl:.*]]:2 = hlfir.declare {{.*}}{uniq_name = "_QFtrans_test_cptr_to_i8Esrc"}
+    ! CHECK:         %[[storeDecl:.*]]:2 = hlfir.declare {{.*}}{uniq_name = "_QFtrans_test_cptr_to_i8Estore"}
+    ! CHECK:         %[[CAST:.*]] = fir.convert %[[srcDecl]]#0 : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) -> !fir.ref<i64>
+    ! CHECK:         %[[VAL:.*]] = fir.load %[[CAST]] : !fir.ref<i64>
+    ! CHECK:         hlfir.assign %[[VAL]] to %[[storeDecl]]#0 : i64, !fir.ref<i64>
+    ! CHECK-NOT:     fir.call @_FortranATransfer
+    ! CHECK:         return
+    ! CHECK:       }
+    use iso_c_binding
+    integer(8) :: store
+    type(c_ptr) :: src
+    store = transfer(src, store)
+  end subroutine
+
+  ! Different-size scalar transfer (i32 -> i64) falls back to runtime.
+  subroutine trans_test_
diff _size(store, src)
+    ! CHECK-LABEL: func @_QPtrans_test_
diff _size(
+    ! CHECK:         fir.call @_FortranATransfer(
+    ! CHECK:         return
+    ! CHECK:       }
+    integer(8) :: store
+    integer(4) :: src
+    store = transfer(src, store)
+  end subroutine
+
+  ! Array mold without SIZE: result is rank-1 array, must use runtime.
+  subroutine trans_test_array_mold(src, result)
+    ! CHECK-LABEL: func @_QPtrans_test_array_mold(
+    ! CHECK:         fir.call @_FortranATransfer(
+    ! CHECK:         return
+    ! CHECK:       }
+    real :: src
+    integer, allocatable :: result(:)
+    integer :: mold(4)
+    result = transfer(src, mold)
+  end subroutine
+
+  ! Allocatable mold: must use runtime.
+  subroutine trans_test_alloc_mold(src, result)
+    ! CHECK-LABEL: func @_QPtrans_test_alloc_mold(
+    ! CHECK:         fir.call @_FortranATransfer(
+    ! CHECK:         return
+    ! CHECK:       }
+    real :: src
+    integer, allocatable :: mold(:)
+    integer, allocatable :: result(:)
+    result = transfer(src, mold)
+  end subroutine
+
+  ! POINTER source: descriptor is unpacked before reaching genTransfer,
+  ! so the inline optimization applies.
+  subroutine trans_test_pointer_source(store, src)
+    ! CHECK-LABEL: func @_QPtrans_test_pointer_source(
+    ! CHECK:         fir.load {{.*}} : !fir.ref<!fir.box<!fir.ptr<f32>>>
+    ! CHECK:         fir.box_addr
+    ! CHECK:         %[[VAL:.*]] = fir.load {{.*}} : !fir.ptr<f32>
+    ! CHECK:         arith.bitcast %[[VAL]] : f32 to i32
+    ! CHECK-NOT:     fir.call @_FortranATransfer
+    ! CHECK:         return
+    ! CHECK:       }
+    integer :: store
+    real, pointer :: src
+    store = transfer(src, store)
+  end subroutine
+
+  ! ALLOCATABLE source: descriptor is unpacked before reaching genTransfer,
+  ! so the inline optimization applies.
+  subroutine trans_test_alloc_source(store, src)
+    ! CHECK-LABEL: func @_QPtrans_test_alloc_source(
+    ! CHECK:         fir.load {{.*}} : !fir.ref<!fir.box<!fir.heap<f32>>>
+    ! CHECK:         fir.box_addr
+    ! CHECK:         %[[VAL:.*]] = fir.load {{.*}} : !fir.heap<f32>
+    ! CHECK:         arith.bitcast %[[VAL]] : f32 to i32
+    ! CHECK-NOT:     fir.call @_FortranATransfer
+    ! CHECK:         return
+    ! CHECK:       }
+    integer :: store
+    real, allocatable :: src
+    store = transfer(src, store)
+  end subroutine


        


More information about the flang-commits mailing list