[flang-commits] [flang] [llvm] [flang] implement `split` for fortran 2023 (PR #161484)
Connector Switch via flang-commits
flang-commits at lists.llvm.org
Thu Oct 2 05:41:39 PDT 2025
https://github.com/c8ef updated https://github.com/llvm/llvm-project/pull/161484
>From e801c74be52dd396fb09a995762af1f48b07a944 Mon Sep 17 00:00:00 2001
From: c8ef <c8ef at outlook.com>
Date: Thu, 2 Oct 2025 20:41:28 +0800
Subject: [PATCH] implement split for f2023
---
flang-rt/lib/runtime/character.cpp | 47 +++++++++++++++++
flang-rt/unittests/Runtime/CharacterTest.cpp | 50 +++++++++++++++++++
.../flang/Optimizer/Builder/IntrinsicCall.h | 1 +
.../Optimizer/Builder/Runtime/Character.h | 8 +++
flang/include/flang/Runtime/character.h | 10 ++++
flang/lib/Evaluate/intrinsics.cpp | 4 ++
flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 42 ++++++++++++++++
.../Optimizer/Builder/Runtime/Character.cpp | 31 ++++++++++++
flang/test/Lower/Intrinsics/split.f90 | 43 ++++++++++++++++
.../Builder/Runtime/CharacterTest.cpp | 23 +++++++++
10 files changed, 259 insertions(+)
create mode 100644 flang/test/Lower/Intrinsics/split.f90
diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index 98a225dbec9f9..e26f9ef4c5310 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -570,6 +570,35 @@ static RT_API_ATTRS void MaxMin(Descriptor &accumulator, const Descriptor &x,
}
}
+template <typename CHAR>
+inline RT_API_ATTRS std::size_t Split(const CHAR *x, std::size_t xLen,
+ const CHAR *set, std::size_t setLen, std::size_t pos, bool back,
+ const char *sourceFile, int sourceLine) {
+ Terminator terminator{sourceFile, sourceLine};
+
+ if (!back) {
+ RUNTIME_CHECK(terminator, pos <= xLen);
+ for (std::size_t i{pos + 1}; i <= xLen; ++i) {
+ for (std::size_t j{0}; j < setLen; ++j) {
+ if (x[i - 1] == set[j]) {
+ return i;
+ }
+ }
+ }
+ return xLen + 1;
+ } else {
+ RUNTIME_CHECK(terminator, pos >= 1 && pos <= xLen + 1);
+ for (std::size_t i{pos - 1}; i != 0; --i) {
+ for (std::size_t j{0}; j < setLen; ++j) {
+ if (x[i - 1] == set[j]) {
+ return i;
+ }
+ }
+ }
+ return 0;
+ }
+}
+
extern "C" {
RT_EXT_API_GROUP_BEGIN
@@ -917,6 +946,24 @@ void RTDEF(CharacterMin)(Descriptor &accumulator, const Descriptor &x,
MaxMin<true>(accumulator, x, sourceFile, sourceLine);
}
+std::size_t RTDEF(Split1)(const char *x, std::size_t xLen, const char *set,
+ std::size_t setLen, std::size_t pos, bool back, const char *sourceFile,
+ int sourceLine) {
+ return Split<char>(x, xLen, set, setLen, pos, back, sourceFile, sourceLine);
+}
+std::size_t RTDEF(Split2)(const char16_t *x, std::size_t xLen,
+ const char16_t *set, std::size_t setLen, std::size_t pos, bool back,
+ const char *sourceFile, int sourceLine) {
+ return Split<char16_t>(
+ x, xLen, set, setLen, pos, back, sourceFile, sourceLine);
+}
+std::size_t RTDEF(Split4)(const char32_t *x, std::size_t xLen,
+ const char32_t *set, std::size_t setLen, std::size_t pos, bool back,
+ const char *sourceFile, int sourceLine) {
+ return Split<char32_t>(
+ x, xLen, set, setLen, pos, back, sourceFile, sourceLine);
+}
+
RT_EXT_API_GROUP_END
}
} // namespace Fortran::runtime
diff --git a/flang-rt/unittests/Runtime/CharacterTest.cpp b/flang-rt/unittests/Runtime/CharacterTest.cpp
index 2c7af27b9da77..4c2d8b3ecad5f 100644
--- a/flang-rt/unittests/Runtime/CharacterTest.cpp
+++ b/flang-rt/unittests/Runtime/CharacterTest.cpp
@@ -430,3 +430,53 @@ TYPED_TEST(RepeatTests, Repeat) {
RunRepeatTest<TypeParam>(t.ncopies, t.input, t.output);
}
}
+
+// Test SPLIT()
+template <typename CHAR>
+using SplitFunction = std::function<std::size_t(const CHAR *, std::size_t,
+ const CHAR *, std::size_t, std::size_t, bool, const char *, int)>;
+using SplitFunctions = CharTypedFunctions<SplitFunction>;
+template <typename CHAR> struct SplitTests : public ::testing::Test {};
+TYPED_TEST_SUITE(SplitTests, CharacterTypes, );
+
+struct SplitTestCase {
+ const char *x, *y;
+ std::size_t pos;
+ bool back;
+ std::size_t expect;
+};
+
+template <typename CHAR>
+void RunSplitTests(const std::vector<SplitTestCase> &testCases,
+ const SplitFunction<CHAR> &function) {
+ for (const auto &t : testCases) {
+ // Convert default character to desired kind
+ std::size_t xLen{std::strlen(t.x)}, yLen{std::strlen(t.y)};
+ std::basic_string<CHAR> x{t.x, t.x + xLen};
+ std::basic_string<CHAR> y{t.y, t.y + yLen};
+ auto got{function(x.data(), xLen, y.data(), yLen, t.pos, t.back, "", 0)};
+ ASSERT_EQ(got, t.expect)
+ << "SPLIT('" << t.x << "','" << t.y << "',pos=" << t.pos
+ << ",back=" << t.back << ") for CHARACTER(kind=" << sizeof(CHAR)
+ << "): got " << got << ", expected " << t.expect;
+ }
+}
+
+TYPED_TEST(SplitTests, Split) {
+ static SplitFunctions functions{
+ RTNAME(Split1), RTNAME(Split2), RTNAME(Split4)};
+ static std::vector<SplitTestCase> testcases{
+ {" one,last example,", ", ", 0, false, 1},
+ {" one,last example,", ", ", 1, false, 5},
+ {" one,last example,", ", ", 5, false, 10},
+ {" one,last example,", ", ", 10, false, 18},
+ {" one,last example,", ", ", 18, false, 19},
+ {" one,last example,", ", ", 19, true, 18},
+ {" one,last example,", ", ", 18, true, 10},
+ {" one,last example,", ", ", 10, true, 5},
+ {" one,last example,", ", ", 5, true, 1},
+ {" one,last example,", ", ", 1, true, 0},
+ };
+ RunSplitTests<TypeParam>(
+ testcases, std::get<SplitFunction<TypeParam>>(functions));
+}
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 320f913858956..8b539b164726c 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -436,6 +436,7 @@ struct IntrinsicLibrary {
fir::ExtendedValue genSizeOf(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genSpacing(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args);
+ void genSplit(llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genSpread(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genStorageSize(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Character.h b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
index 261ac348a4024..2ab0652ee0f7d 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Character.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
@@ -128,6 +128,14 @@ mlir::Value genVerify(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
mlir::Value setBase, mlir::Value setLen,
mlir::Value back);
+/// Generate call to the split runtime routine that is specialized on
+/// \param kind.
+/// The \param kind represents the kind of the elements in the strings.
+mlir::Value genSplit(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
+ mlir::Value stringBase, mlir::Value stringLen,
+ mlir::Value setBase, mlir::Value setLen, mlir::Value pos,
+ mlir::Value back);
+
} // namespace fir::runtime
#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_CHARACTER_H
diff --git a/flang/include/flang/Runtime/character.h b/flang/include/flang/Runtime/character.h
index dd47686fe858f..f9bbd6d727d4e 100644
--- a/flang/include/flang/Runtime/character.h
+++ b/flang/include/flang/Runtime/character.h
@@ -127,6 +127,16 @@ std::size_t RTDECL(Verify4)(const char32_t *, std::size_t, const char32_t *set,
void RTDECL(Verify)(Descriptor &result, const Descriptor &string,
const Descriptor &set, const Descriptor *back /*can be null*/, int kind,
const char *sourceFile = nullptr, int sourceLine = 0);
+
+std::size_t RTDECL(Split1)(const char *, std::size_t, const char *set,
+ std::size_t, std::size_t, bool back = false,
+ const char *sourceFile = nullptr, int sourceLine = 0);
+std::size_t RTDECL(Split2)(const char16_t *, std::size_t, const char16_t *set,
+ std::size_t, std::size_t, bool back = false,
+ const char *sourceFile = nullptr, int sourceLine = 0);
+std::size_t RTDECL(Split4)(const char32_t *, std::size_t, const char32_t *set,
+ std::size_t, std::size_t, bool back = false,
+ const char *sourceFile = nullptr, int sourceLine = 0);
}
} // namespace Fortran::runtime
#endif // FORTRAN_RUNTIME_CHARACTER_H_
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index fe679da4ff98b..310c1a3cd3957 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -1725,6 +1725,10 @@ static const IntrinsicInterface intrinsicSubroutine[]{
{{"seconds", AnyInt, Rank::scalar, Optionality::required,
common::Intent::In}},
{}, Rank::elemental, IntrinsicClass::impureSubroutine},
+ {"split",
+ {{"string", SameCharNoLen}, {"set", SameCharNoLen}, {"pos", AnyInt},
+ {"back", AnyLogical, Rank::elemental, Optionality::optional}},
+ {}, Rank::elemental, IntrinsicClass::pureSubroutine},
{"unlink",
{{"path", DefaultChar, Rank::scalar, Optionality::required,
common::Intent::In},
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 71d35e37bbe94..06aab4b3c466d 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -958,6 +958,12 @@ static constexpr IntrinsicHandler handlers[]{
/*isElemental=*/false},
{"sleep", &I::genSleep, {{{"seconds", asValue}}}, /*isElemental=*/false},
{"spacing", &I::genSpacing},
+ {"split",
+ &I::genSplit,
+ {{{"string", asAddr},
+ {"set", asAddr},
+ {"pos", asAddr},
+ {"back", asValue, handleDynamicOptional}}}},
{"spread",
&I::genSpread,
{{{"source", asBox}, {"dim", asValue}, {"ncopies", asValue}}},
@@ -8763,6 +8769,42 @@ mlir::Value IntrinsicLibrary::genSpacing(mlir::Type resultType,
fir::runtime::genSpacing(builder, loc, fir::getBase(args[0])));
}
+// SPLIT
+void IntrinsicLibrary::genSplit(llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 4);
+
+ // Handle required string base arg
+ mlir::Value stringBase = fir::getBase(args[0]);
+
+ // Handle required set string base arg
+ mlir::Value setBase = fir::getBase(args[1]);
+
+ // Handle kind argument; it is the kind of character in this case
+ fir::KindTy kind =
+ fir::factory::CharacterExprHelper{builder, loc}.getCharacterKind(
+ stringBase.getType());
+
+ // Handle string length argument
+ mlir::Value stringLen = fir::getLen(args[0]);
+
+ // Handle set string length argument
+ mlir::Value setLen = fir::getLen(args[1]);
+
+ // Handle pos argument
+ mlir::Value posAddr = fir::getBase(args[2]);
+ mlir::Value pos = fir::LoadOp::create(builder, loc, posAddr);
+
+ // Handle optional back argument
+ mlir::Value back =
+ isStaticallyAbsent(args[3])
+ ? builder.createIntegerConstant(loc, builder.getI1Type(), 0)
+ : fir::getBase(args[3]);
+
+ pos = fir::runtime::genSplit(builder, loc, kind, stringBase, stringLen,
+ setBase, setLen, pos, back);
+ builder.createStoreWithConvert(loc, pos, posAddr);
+}
+
// SPREAD
fir::ExtendedValue
IntrinsicLibrary::genSpread(mlir::Type resultType,
diff --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
index 540ecba299dc3..6d4942e65d77a 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
@@ -290,3 +290,34 @@ mlir::Value fir::runtime::genVerify(fir::FirOpBuilder &builder,
stringLen, setBase, setLen, back);
return fir::CallOp::create(builder, loc, func, args).getResult(0);
}
+
+mlir::Value fir::runtime::genSplit(fir::FirOpBuilder &builder,
+ mlir::Location loc, int kind,
+ mlir::Value stringBase,
+ mlir::Value stringLen, mlir::Value setBase,
+ mlir::Value setLen, mlir::Value pos,
+ mlir::Value back) {
+ mlir::func::FuncOp func;
+ switch (kind) {
+ case 1:
+ func = fir::runtime::getRuntimeFunc<mkRTKey(Split1)>(loc, builder);
+ break;
+ case 2:
+ func = fir::runtime::getRuntimeFunc<mkRTKey(Split2)>(loc, builder);
+ break;
+ case 4:
+ func = fir::runtime::getRuntimeFunc<mkRTKey(Split4)>(loc, builder);
+ break;
+ default:
+ fir::emitFatalError(
+ loc, "unsupported CHARACTER kind value. Runtime expects 1, 2, or 4.");
+ }
+ auto fTy = func.getFunctionType();
+ auto sourceFile = fir::factory::locationToFilename(builder, loc);
+ auto sourceLine =
+ fir::factory::locationToLineNo(builder, loc, fTy.getInput(7));
+ auto args = fir::runtime::createArguments(builder, loc, fTy, stringBase,
+ stringLen, setBase, setLen, pos,
+ back, sourceFile, sourceLine);
+ return fir::CallOp::create(builder, loc, func, args).getResult(0);
+}
diff --git a/flang/test/Lower/Intrinsics/split.f90 b/flang/test/Lower/Intrinsics/split.f90
new file mode 100644
index 0000000000000..6d643aa54452a
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/split.f90
@@ -0,0 +1,43 @@
+! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPsplit_test1(
+! CHECK-SAME: %[[s1:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[s2:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[p:[^:]+]]: !fir.ref<i32>{{.*}})
+subroutine split_test1(s1, s2, p)
+character(*) :: s1, s2
+integer :: p
+! CHECK: %[[c1:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[c2:.*]]:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[pos:.*]] = fir.load %arg2 : !fir.ref<i32>
+! CHECK: %false = arith.constant false
+! CHECK: %[[c1base:.*]] = fir.convert %[[c1]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c1len:.*]] = fir.convert %[[c1]]#1 : (index) -> i64
+! CHECK: %[[c2base:.*]] = fir.convert %[[c2]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c2len:.*]] = fir.convert %[[c2]]#1 : (index) -> i64
+! CHECK: %[[pos1:.*]] = fir.convert %[[pos]] : (i32) -> i64
+! CHECK: %[[pos2:.*]] = fir.call @_FortranASplit1(%[[c1base]], %[[c1len]], %[[c2base]], %[[c2len]], %[[pos1]], %false, {{.*}}) {{.*}}: (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1, !fir.ref<i8>, i32) -> i64
+! CHECK: %[[pos3:.*]] = fir.convert %[[pos2]] : (i64) -> i32
+! CHECK: fir.store %[[pos3]] to %[[p]] : !fir.ref<i32>
+! CHECK: return
+call split(s1, s2, p)
+end subroutine split_test1
+
+! CHECK-LABEL: func @_QPsplit_test2(
+! CHECK-SAME: %[[s1:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[s2:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[p:[^:]+]]: !fir.ref<i32>{{.*}})
+subroutine split_test2(s1, s2, p)
+character(*) :: s1, s2
+integer :: p
+! CHECK: %[[c1:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[c2:.*]]:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %true = arith.constant true
+! CHECK: %[[pos:.*]] = fir.load %arg2 : !fir.ref<i32>
+! CHECK: %[[c1base:.*]] = fir.convert %[[c1]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c1len:.*]] = fir.convert %[[c1]]#1 : (index) -> i64
+! CHECK: %[[c2base:.*]] = fir.convert %[[c2]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c2len:.*]] = fir.convert %[[c2]]#1 : (index) -> i64
+! CHECK: %[[pos1:.*]] = fir.convert %[[pos]] : (i32) -> i64
+! CHECK: %[[pos2:.*]] = fir.call @_FortranASplit1(%[[c1base]], %[[c1len]], %[[c2base]], %[[c2len]], %[[pos1]], %true, {{.*}}) {{.*}}: (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1, !fir.ref<i8>, i32) -> i64
+! CHECK: %[[pos3:.*]] = fir.convert %[[pos2]] : (i64) -> i32
+! CHECK: fir.store %[[pos3]] to %[[p]] : !fir.ref<i32>
+! CHECK: return
+call split(s1, s2, p, .true.)
+end subroutine split_test2
diff --git a/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp b/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp
index f3b0fde175bac..d2344ad64e810 100644
--- a/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp
+++ b/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp
@@ -209,3 +209,26 @@ TEST_F(RuntimeCallTest, genVerifyTest) {
checkGenVerify(*firBuilder, "_FortranAVerify2", 2);
checkGenVerify(*firBuilder, "_FortranAVerify4", 4);
}
+
+void checkGenSplit(
+ fir::FirOpBuilder &builder, llvm::StringRef fctName, unsigned kind) {
+ auto loc = builder.getUnknownLoc();
+ mlir::Type charTy = fir::CharacterType::get(builder.getContext(), kind, 10);
+ mlir::Type boxTy = fir::BoxType::get(charTy);
+ mlir::Type i32Ty = IntegerType::get(builder.getContext(), 32);
+ mlir::Value stringBase = fir::UndefOp::create(builder, loc, boxTy);
+ mlir::Value stringLen = fir::UndefOp::create(builder, loc, i32Ty);
+ mlir::Value setBase = fir::UndefOp::create(builder, loc, boxTy);
+ mlir::Value setLen = fir::UndefOp::create(builder, loc, i32Ty);
+ mlir::Value pos = fir::UndefOp::create(builder, loc, i32Ty);
+ mlir::Value back = fir::UndefOp::create(builder, loc, i32Ty);
+ mlir::Value res = fir::runtime::genSplit(
+ builder, loc, kind, stringBase, stringLen, setBase, setLen, pos, back);
+ checkCallOp(res.getDefiningOp(), fctName, 6);
+}
+
+TEST_F(RuntimeCallTest, genSplitTest) {
+ checkGenSplit(*firBuilder, "_FortranASplit1", 1);
+ checkGenSplit(*firBuilder, "_FortranASplit2", 2);
+ checkGenSplit(*firBuilder, "_FortranASplit4", 4);
+}
More information about the flang-commits
mailing list