[flang] [llvm] [flang] implement `split` for fortran 2023 (PR #161484)
Connector Switch via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 1 21:20:49 PDT 2025
https://github.com/c8ef updated https://github.com/llvm/llvm-project/pull/161484
>From 9cd317766fef220aaa7351c6c779caada5ca32cf Mon Sep 17 00:00:00 2001
From: c8ef <c8ef at outlook.com>
Date: Wed, 1 Oct 2025 14:56:58 +0800
Subject: [PATCH 1/2] [flang] implement split for fortran 2023
---
flang-rt/lib/runtime/character.cpp | 37 ++++++++++++++++
.../flang/Optimizer/Builder/IntrinsicCall.h | 1 +
.../Optimizer/Builder/Runtime/Character.h | 8 ++++
flang/include/flang/Runtime/character.h | 7 +++
flang/lib/Evaluate/intrinsics.cpp | 4 ++
flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 42 ++++++++++++++++++
.../Optimizer/Builder/Runtime/Character.cpp | 27 ++++++++++++
flang/test/Lower/Intrinsics/split.f90 | 43 +++++++++++++++++++
.../Builder/Runtime/CharacterTest.cpp | 23 ++++++++++
9 files changed, 192 insertions(+)
create mode 100644 flang/test/Lower/Intrinsics/split.f90
diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index 98a225dbec9f9..64aa8a24c0e64 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -570,6 +570,30 @@ static RT_API_ATTRS void MaxMin(Descriptor &accumulator, const Descriptor &x,
}
}
+template <typename CHAR>
+inline RT_API_ATTRS std::size_t Split(const CHAR *x, std::size_t xLen,
+ const CHAR *set, std::size_t setLen, std::size_t pos, bool back) {
+ if (!back) {
+ for (std::size_t i{pos + 1}; i <= xLen; ++i) {
+ for (std::size_t j{0}; j < setLen; ++j) {
+ if (x[i - 1] == set[j]) {
+ return i;
+ }
+ }
+ }
+ return xLen + 1;
+ } else {
+ for (std::size_t i{pos - 1}; i != 0; --i) {
+ for (std::size_t j{0}; j < setLen; ++j) {
+ if (x[i - 1] == set[j]) {
+ return i;
+ }
+ }
+ }
+ return 0;
+ }
+}
+
extern "C" {
RT_EXT_API_GROUP_BEGIN
@@ -917,6 +941,19 @@ void RTDEF(CharacterMin)(Descriptor &accumulator, const Descriptor &x,
MaxMin<true>(accumulator, x, sourceFile, sourceLine);
}
+std::size_t RTDEF(Split1)(const char *x, std::size_t xLen, const char *set,
+ std::size_t setLen, std::size_t pos, bool back) {
+ return Split<char>(x, xLen, set, setLen, pos, back);
+}
+std::size_t RTDEF(Split2)(const char16_t *x, std::size_t xLen,
+ const char16_t *set, std::size_t setLen, std::size_t pos, bool back) {
+ return Split<char16_t>(x, xLen, set, setLen, pos, back);
+}
+std::size_t RTDEF(Split4)(const char32_t *x, std::size_t xLen,
+ const char32_t *set, std::size_t setLen, std::size_t pos, bool back) {
+ return Split<char32_t>(x, xLen, set, setLen, pos, back);
+}
+
RT_EXT_API_GROUP_END
}
} // namespace Fortran::runtime
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index cd73798d71262..6c668af9adf2c 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -430,6 +430,7 @@ struct IntrinsicLibrary {
fir::ExtendedValue genSizeOf(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genSpacing(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args);
+ void genSplit(llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genSpread(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genStorageSize(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Character.h b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
index d1c521de94438..a5ac3e175fe64 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Character.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
@@ -120,6 +120,14 @@ mlir::Value genVerify(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
mlir::Value setBase, mlir::Value setLen,
mlir::Value back);
+/// Generate call to the split runtime routine that is specialized on
+/// \param kind.
+/// The \param kind represents the kind of the elements in the strings.
+mlir::Value genSplit(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
+ mlir::Value stringBase, mlir::Value stringLen,
+ mlir::Value setBase, mlir::Value setLen, mlir::Value pos,
+ mlir::Value back);
+
} // namespace fir::runtime
#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_CHARACTER_H
diff --git a/flang/include/flang/Runtime/character.h b/flang/include/flang/Runtime/character.h
index dd47686fe858f..2b94b3cc2d1d9 100644
--- a/flang/include/flang/Runtime/character.h
+++ b/flang/include/flang/Runtime/character.h
@@ -127,6 +127,13 @@ std::size_t RTDECL(Verify4)(const char32_t *, std::size_t, const char32_t *set,
void RTDECL(Verify)(Descriptor &result, const Descriptor &string,
const Descriptor &set, const Descriptor *back /*can be null*/, int kind,
const char *sourceFile = nullptr, int sourceLine = 0);
+
+std::size_t RTDECL(Split1)(const char *, std::size_t, const char *set,
+ std::size_t, std::size_t, bool back = false);
+std::size_t RTDECL(Split2)(const char16_t *, std::size_t, const char16_t *set,
+ std::size_t, std::size_t, bool back = false);
+std::size_t RTDECL(Split4)(const char32_t *, std::size_t, const char32_t *set,
+ std::size_t, std::size_t, bool back = false);
}
} // namespace Fortran::runtime
#endif // FORTRAN_RUNTIME_CHARACTER_H_
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index abe53c31210d0..c80778ccaa547 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -1721,6 +1721,10 @@ static const IntrinsicInterface intrinsicSubroutine[]{
{{"seconds", AnyInt, Rank::scalar, Optionality::required,
common::Intent::In}},
{}, Rank::elemental, IntrinsicClass::impureSubroutine},
+ {"split",
+ {{"string", SameCharNoLen}, {"set", SameCharNoLen}, {"pos", AnyInt},
+ {"back", AnyLogical, Rank::elemental, Optionality::optional}},
+ {}, Rank::elemental, IntrinsicClass::pureSubroutine},
{"unlink",
{{"path", DefaultChar, Rank::scalar, Optionality::required,
common::Intent::In},
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index e1c9520592de6..a9d2ccc1ff868 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -926,6 +926,12 @@ static constexpr IntrinsicHandler handlers[]{
/*isElemental=*/false},
{"sleep", &I::genSleep, {{{"seconds", asValue}}}, /*isElemental=*/false},
{"spacing", &I::genSpacing},
+ {"split",
+ &I::genSplit,
+ {{{"string", asAddr},
+ {"set", asAddr},
+ {"pos", asAddr},
+ {"back", asValue, handleDynamicOptional}}}},
{"spread",
&I::genSpread,
{{{"source", asBox}, {"dim", asValue}, {"ncopies", asValue}}},
@@ -8655,6 +8661,42 @@ mlir::Value IntrinsicLibrary::genSpacing(mlir::Type resultType,
fir::runtime::genSpacing(builder, loc, fir::getBase(args[0])));
}
+// SPLIT
+void IntrinsicLibrary::genSplit(llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 4);
+
+ // Handle required string base arg
+ mlir::Value stringBase = fir::getBase(args[0]);
+
+ // Handle required set string base arg
+ mlir::Value setBase = fir::getBase(args[1]);
+
+ // Handle kind argument; it is the kind of character in this case
+ fir::KindTy kind =
+ fir::factory::CharacterExprHelper{builder, loc}.getCharacterKind(
+ stringBase.getType());
+
+ // Handle string length argument
+ mlir::Value stringLen = fir::getLen(args[0]);
+
+ // Handle set string length argument
+ mlir::Value setLen = fir::getLen(args[1]);
+
+ // Handle pos argument
+ mlir::Value posAddr = fir::getBase(args[2]);
+ mlir::Value pos = fir::LoadOp::create(builder, loc, posAddr);
+
+ // Handle optional back argument
+ mlir::Value back =
+ isStaticallyAbsent(args[3])
+ ? builder.createIntegerConstant(loc, builder.getI1Type(), 0)
+ : fir::getBase(args[3]);
+
+ pos = fir::runtime::genSplit(builder, loc, kind, stringBase, stringLen,
+ setBase, setLen, pos, back);
+ builder.createStoreWithConvert(loc, pos, posAddr);
+}
+
// SPREAD
fir::ExtendedValue
IntrinsicLibrary::genSpread(mlir::Type resultType,
diff --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
index 57fb0cccf6863..bd112082711d6 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
@@ -276,3 +276,30 @@ mlir::Value fir::runtime::genVerify(fir::FirOpBuilder &builder,
stringLen, setBase, setLen, back);
return fir::CallOp::create(builder, loc, func, args).getResult(0);
}
+
+mlir::Value fir::runtime::genSplit(fir::FirOpBuilder &builder,
+ mlir::Location loc, int kind,
+ mlir::Value stringBase,
+ mlir::Value stringLen, mlir::Value setBase,
+ mlir::Value setLen, mlir::Value pos,
+ mlir::Value back) {
+ mlir::func::FuncOp func;
+ switch (kind) {
+ case 1:
+ func = fir::runtime::getRuntimeFunc<mkRTKey(Split1)>(loc, builder);
+ break;
+ case 2:
+ func = fir::runtime::getRuntimeFunc<mkRTKey(Split2)>(loc, builder);
+ break;
+ case 4:
+ func = fir::runtime::getRuntimeFunc<mkRTKey(Split4)>(loc, builder);
+ break;
+ default:
+ fir::emitFatalError(
+ loc, "unsupported CHARACTER kind value. Runtime expects 1, 2, or 4.");
+ }
+ auto fTy = func.getFunctionType();
+ auto args = fir::runtime::createArguments(
+ builder, loc, fTy, stringBase, stringLen, setBase, setLen, pos, back);
+ return fir::CallOp::create(builder, loc, func, args).getResult(0);
+}
diff --git a/flang/test/Lower/Intrinsics/split.f90 b/flang/test/Lower/Intrinsics/split.f90
new file mode 100644
index 0000000000000..d0d2f48cddf16
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/split.f90
@@ -0,0 +1,43 @@
+! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPsplit_test1(
+! CHECK-SAME: %[[s1:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[s2:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[p:[^:]+]]: !fir.ref<i32>{{.*}})
+subroutine split_test1(s1, s2, p)
+character(*) :: s1, s2
+integer :: p
+! CHECK: %[[c1:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[c2:.*]]:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[pos:.*]] = fir.load %arg2 : !fir.ref<i32>
+! CHECK: %false = arith.constant false
+! CHECK: %[[c1base:.*]] = fir.convert %[[c1]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c1len:.*]] = fir.convert %[[c1]]#1 : (index) -> i64
+! CHECK: %[[c2base:.*]] = fir.convert %[[c2]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c2len:.*]] = fir.convert %[[c2]]#1 : (index) -> i64
+! CHECK: %[[pos1:.*]] = fir.convert %[[pos]] : (i32) -> i64
+! CHECK: %[[pos2:.*]] = fir.call @_FortranASplit1(%[[c1base]], %[[c1len]], %[[c2base]], %[[c2len]], %[[pos1]], %false) fastmath<contract> : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1) -> i64
+! CHECK: %[[pos3:.*]] = fir.convert %[[pos2]] : (i64) -> i32
+! CHECK: fir.store %[[pos3]] to %[[p]] : !fir.ref<i32>
+! CHECK: return
+call split(s1, s2, p)
+end subroutine split_test1
+
+! CHECK-LABEL: func @_QPsplit_test2(
+! CHECK-SAME: %[[s1:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[s2:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[p:[^:]+]]: !fir.ref<i32>{{.*}})
+subroutine split_test2(s1, s2, p)
+character(*) :: s1, s2
+integer :: p
+! CHECK: %[[c1:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[c2:.*]]:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %true = arith.constant true
+! CHECK: %[[pos:.*]] = fir.load %arg2 : !fir.ref<i32>
+! CHECK: %[[c1base:.*]] = fir.convert %[[c1]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c1len:.*]] = fir.convert %[[c1]]#1 : (index) -> i64
+! CHECK: %[[c2base:.*]] = fir.convert %[[c2]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c2len:.*]] = fir.convert %[[c2]]#1 : (index) -> i64
+! CHECK: %[[pos1:.*]] = fir.convert %[[pos]] : (i32) -> i64
+! CHECK: %[[pos2:.*]] = fir.call @_FortranASplit1(%[[c1base]], %[[c1len]], %[[c2base]], %[[c2len]], %[[pos1]], %true) fastmath<contract> : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1) -> i64
+! CHECK: %[[pos3:.*]] = fir.convert %[[pos2]] : (i64) -> i32
+! CHECK: fir.store %[[pos3]] to %[[p]] : !fir.ref<i32>
+! CHECK: return
+call split(s1, s2, p, .true.)
+end subroutine split_test2
diff --git a/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp b/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp
index f3b0fde175bac..f8db671ad845c 100644
--- a/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp
+++ b/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp
@@ -209,3 +209,26 @@ TEST_F(RuntimeCallTest, genVerifyTest) {
checkGenVerify(*firBuilder, "_FortranAVerify2", 2);
checkGenVerify(*firBuilder, "_FortranAVerify4", 4);
}
+
+void checkGenSplit(
+ fir::FirOpBuilder &builder, llvm::StringRef fctName, unsigned kind) {
+ auto loc = builder.getUnknownLoc();
+ mlir::Type charTy = fir::CharacterType::get(builder.getContext(), kind, 10);
+ mlir::Type boxTy = fir::BoxType::get(charTy);
+ mlir::Type i32Ty = IntegerType::get(builder.getContext(), 32);
+ mlir::Value stringBase = fir::UndefOp::create(builder, loc, boxTy);
+ mlir::Value stringLen = fir::UndefOp::create(builder, loc, i32Ty);
+ mlir::Value setBase = fir::UndefOp::create(builder, loc, boxTy);
+ mlir::Value setLen = fir::UndefOp::create(builder, loc, i32Ty);
+ mlir::Value pos = fir::UndefOp::create(builder, loc, i32Ty);
+ mlir::Value back = fir::UndefOp::create(builder, loc, i32Ty);
+ mlir::Value res = fir::runtime::genSplit(
+ builder, loc, kind, stringBase, stringLen, setBase, setLen, pos, back);
+ checkCallOp(res.getDefiningOp(), fctName, 6, /*addLocArgs=*/false);
+}
+
+TEST_F(RuntimeCallTest, genSplitTest) {
+ checkGenSplit(*firBuilder, "_FortranASplit1", 1);
+ checkGenSplit(*firBuilder, "_FortranASplit2", 2);
+ checkGenSplit(*firBuilder, "_FortranASplit4", 4);
+}
>From 0cd3fed529bcebffe4b45adb8b90d799a7c16e30 Mon Sep 17 00:00:00 2001
From: c8ef <c8ef at outlook.com>
Date: Thu, 2 Oct 2025 12:20:22 +0800
Subject: [PATCH 2/2] address review comments, add runtime check for pos as per
standard
---
flang-rt/lib/runtime/character.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index 64aa8a24c0e64..3becdb577e563 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -573,7 +573,10 @@ static RT_API_ATTRS void MaxMin(Descriptor &accumulator, const Descriptor &x,
template <typename CHAR>
inline RT_API_ATTRS std::size_t Split(const CHAR *x, std::size_t xLen,
const CHAR *set, std::size_t setLen, std::size_t pos, bool back) {
+ Terminator terminator{__FILE__, __LINE__};
+
if (!back) {
+ RUNTIME_CHECK(terminator, pos <= xLen);
for (std::size_t i{pos + 1}; i <= xLen; ++i) {
for (std::size_t j{0}; j < setLen; ++j) {
if (x[i - 1] == set[j]) {
@@ -583,6 +586,7 @@ inline RT_API_ATTRS std::size_t Split(const CHAR *x, std::size_t xLen,
}
return xLen + 1;
} else {
+ RUNTIME_CHECK(terminator, pos >= 1 && pos <= xLen + 1);
for (std::size_t i{pos - 1}; i != 0; --i) {
for (std::size_t j{0}; j < setLen; ++j) {
if (x[i - 1] == set[j]) {
More information about the llvm-commits
mailing list