[flang] [llvm] [flang] Implement SPLIT intrinsic subroutine with tests (PR #185584)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 9 23:58:24 PDT 2026
https://github.com/laoshd updated https://github.com/llvm/llvm-project/pull/185584
>From 335b9515c2e41b78b44cd1221c9391c3a3fc9422 Mon Sep 17 00:00:00 2001
From: Shandong Lao <shandong.lao at hpe.com>
Date: Tue, 10 Mar 2026 01:07:14 -0500
Subject: [PATCH 1/2] [flang] Implement SPLIT intrinsic subroutine with tests
---
flang-rt/lib/runtime/character.cpp | 64 ++++++++++++++
flang-rt/unittests/Runtime/CharacterTest.cpp | 85 +++++++++++++++++++
flang/docs/F202X.md | 53 +++---------
flang/docs/FortranStandardsSupport.md | 2 +-
flang/docs/Intrinsics.md | 18 ++++
.../flang/Optimizer/Builder/IntrinsicCall.h | 1 +
.../Optimizer/Builder/Runtime/Character.h | 9 ++
flang/include/flang/Runtime/character.h | 9 ++
flang/lib/Evaluate/intrinsics.cpp | 10 +++
flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 51 +++++++++++
.../Optimizer/Builder/Runtime/Character.cpp | 28 ++++++
flang/test/Lower/Intrinsics/split.f90 | 37 ++++++++
flang/test/Semantics/split.f90 | 69 +++++++++++++++
13 files changed, 394 insertions(+), 42 deletions(-)
create mode 100644 flang/test/Lower/Intrinsics/split.f90
create mode 100644 flang/test/Semantics/split.f90
diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index a663643fa18cc..39b58465e9156 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -992,6 +992,55 @@ static RT_API_ATTRS void TokenizePositionsImpl(Descriptor &first,
}
}
+// SPLIT - scans for the next separator character in STRING.
+// When BACK is false (or absent), returns the position of the leftmost
+// character in SET whose position in STRING is greater than POS, or
+// LEN(STRING)+1 if no such character exists.
+// When BACK is true, returns the position of the rightmost character in
+// SET whose position in STRING is less than POS, or 0 if no such
+// character exists.
+template <typename CHAR>
+static RT_API_ATTRS std::size_t SplitImpl(const CHAR *string,
+ std::size_t stringLen, const CHAR *set, std::size_t setLen,
+ std::size_t pos, bool back) {
+ if (back) {
+ // Scan backwards from position pos-1 (1-indexed pos means index pos-2)
+ // looking for the rightmost separator at position < pos.
+ if (pos <= 1) {
+ return 0;
+ }
+ std::size_t scanLen = pos - 1; // number of characters to scan
+ if (scanLen > stringLen) {
+ scanLen = stringLen;
+ }
+ for (std::size_t i = scanLen; i > 0; --i) {
+ CHAR ch = string[i - 1];
+ for (std::size_t j = 0; j < setLen; ++j) {
+ if (set[j] == ch) {
+ return i; // 1-indexed position
+ }
+ }
+ }
+ return 0;
+ } else {
+ // Scan forward from position pos+1 (1-indexed) looking for the
+ // leftmost separator at position > pos.
+ if (pos >= stringLen) {
+ return stringLen + 1;
+ }
+ std::size_t startIdx = pos; // 0-indexed start = pos (since pos is 1-indexed and we want pos+1)
+ for (std::size_t i = startIdx; i < stringLen; ++i) {
+ CHAR ch = string[i];
+ for (std::size_t j = 0; j < setLen; ++j) {
+ if (set[j] == ch) {
+ return i + 1; // convert to 1-indexed
+ }
+ }
+ }
+ return stringLen + 1;
+ }
+}
+
extern "C" {
RT_EXT_API_GROUP_BEGIN
@@ -1375,6 +1424,21 @@ void RTDEF(TokenizePositions)(Descriptor &first, Descriptor &last,
TokenizePositionsImpl(first, last, string, set, terminator);
}
+std::size_t RTDEF(Split1)(const char *string, std::size_t stringLen,
+ const char *set, std::size_t setLen, std::size_t pos, bool back) {
+ return SplitImpl(string, stringLen, set, setLen, pos, back);
+}
+
+std::size_t RTDEF(Split2)(const char16_t *string, std::size_t stringLen,
+ const char16_t *set, std::size_t setLen, std::size_t pos, bool back) {
+ return SplitImpl(string, stringLen, set, setLen, pos, back);
+}
+
+std::size_t RTDEF(Split4)(const char32_t *string, std::size_t stringLen,
+ const char32_t *set, std::size_t setLen, std::size_t pos, bool back) {
+ return SplitImpl(string, stringLen, set, setLen, pos, back);
+}
+
RT_EXT_API_GROUP_END
}
} // namespace Fortran::runtime
diff --git a/flang-rt/unittests/Runtime/CharacterTest.cpp b/flang-rt/unittests/Runtime/CharacterTest.cpp
index 4b304a98ada1b..6577876057c3c 100644
--- a/flang-rt/unittests/Runtime/CharacterTest.cpp
+++ b/flang-rt/unittests/Runtime/CharacterTest.cpp
@@ -392,6 +392,91 @@ TYPED_TEST(SearchTests, VerifyTests) {
"VERIFY", tests, std::get<SearchFunction<TypeParam>>(functions));
}
+// Test SPLIT()
+template <typename CHAR>
+using SplitFunction = std::function<std::size_t(
+ const CHAR *, std::size_t, const CHAR *, std::size_t, std::size_t, bool)>;
+using SplitFunctions = std::tuple<SplitFunction<char>, SplitFunction<char16_t>,
+ SplitFunction<char32_t>>;
+struct SplitTestCase {
+ const char *string, *set;
+ std::size_t pos;
+ bool back;
+ std::size_t expect;
+};
+
+template <typename CHAR>
+void RunSplitTests(const char *which,
+ const std::vector<SplitTestCase> &testCases,
+ const SplitFunction<CHAR> &function) {
+ for (const auto &t : testCases) {
+ std::size_t strLen{std::strlen(t.string)}, setLen{std::strlen(t.set)};
+ std::basic_string<CHAR> str{t.string, t.string + strLen};
+ std::basic_string<CHAR> set{t.set, t.set + setLen};
+ auto got{function(str.data(), strLen, set.data(), setLen, t.pos, t.back)};
+ ASSERT_EQ(got, t.expect)
+ << which << "('" << t.string << "','" << t.set << "',pos=" << t.pos
+ << ",back=" << t.back << ") for CHARACTER(kind=" << sizeof(CHAR)
+ << "): got " << got << ", expected " << t.expect;
+ }
+}
+
+template <typename CHAR> struct SplitTests : public ::testing::Test {};
+TYPED_TEST_SUITE(SplitTests, CharacterTypes, );
+
+TYPED_TEST(SplitTests, SplitForward) {
+ static SplitFunctions functions{
+ RTNAME(Split1), RTNAME(Split2), RTNAME(Split4)};
+ static std::vector<SplitTestCase> tests{
+ // "one,two,three" with set=","
+ // Forward scanning: from pos=0, find first ',' at position 4
+ {"one,two,three", ",", 0, false, 4},
+ // From pos=4, find next ',' at position 8
+ {"one,two,three", ",", 4, false, 8},
+ // From pos=8, no more ',', return len+1=14
+ {"one,two,three", ",", 8, false, 14},
+ // Empty string
+ {"", ",", 0, false, 1},
+ // No delimiters in string
+ {"abc", ",", 0, false, 4},
+ // String is all delimiters
+ {",,", ",", 0, false, 1},
+ {",,", ",", 1, false, 2},
+ {",,", ",", 2, false, 3},
+ // pos at end of string
+ {"abc", ",", 3, false, 4},
+ // Multiple delimiter characters in set
+ {"a,b;c", ",;", 0, false, 2},
+ {"a,b;c", ",;", 2, false, 4},
+ {"a,b;c", ",;", 4, false, 6},
+ };
+ RunSplitTests(
+ "SPLIT(forward)", tests, std::get<SplitFunction<TypeParam>>(functions));
+}
+
+TYPED_TEST(SplitTests, SplitBackward) {
+ static SplitFunctions functions{
+ RTNAME(Split1), RTNAME(Split2), RTNAME(Split4)};
+ static std::vector<SplitTestCase> tests{
+ // "one,two,three" with set=","
+ // Backward scanning: from pos=14 (len+1), find last ',' at position 8
+ {"one,two,three", ",", 14, true, 8},
+ // From pos=8, find previous ',' at position 4
+ {"one,two,three", ",", 8, true, 4},
+ // From pos=4, no ',' before position 4, return 0
+ {"one,two,three", ",", 4, true, 0},
+ // Empty string
+ {"", ",", 1, true, 0},
+ // pos=0 or pos=1 should return 0
+ {"abc", ",", 0, true, 0},
+ {"abc", ",", 1, true, 0},
+ // No delimiters in string
+ {"abc", ",", 4, true, 0},
+ };
+ RunSplitTests(
+ "SPLIT(backward)", tests, std::get<SplitFunction<TypeParam>>(functions));
+}
+
// Test REPEAT()
template <typename CHAR> struct RepeatTests : public ::testing::Test {};
TYPED_TEST_SUITE(RepeatTests, CharacterTypes, );
diff --git a/flang/docs/F202X.md b/flang/docs/F202X.md
index d1940a1858db1..988c0e9f083e0 100644
--- a/flang/docs/F202X.md
+++ b/flang/docs/F202X.md
@@ -284,47 +284,18 @@ arguments or results with conversion factors.
`SELECTED_LOGICAL_KIND` maps a bit size to a kind of `LOGICAL`
-There are two new character utility intrinsic
-functions whose implementations have very low priority: `SPLIT` and `TOKENIZE`.
-`TOKENIZE` requires memory allocation to return its results,
-and could and should have been implemented once in some Fortran utility
-library for those who need a slow tokenization facility rather than
-requiring implementations in each vendor's runtime support library with
-all the extra cost and compatibility risk that entails.
-
-`SPLIT` is worse -- not only could it, like `TOKENIZE`,
-have been supplied by a Fortran utility library rather than being
-added to the standard, it's redundant;
-it provides nothing that cannot be already accomplished by
-composing today's `SCAN` intrinsic function with substring indexing:
-
-```
-module m
- interface split
- module procedure :: split
- end interface
- !instantiate for all possible ck/ik/lk combinations
- integer, parameter :: ck = kind(''), ik = kind(0), lk = kind(.true.)
- contains
- simple elemental subroutine split(string, set, pos, back)
- character(*, kind=ck), intent(in) :: string, set
- integer(kind=ik), intent(in out) :: pos
- logical(kind=lk), intent(in), optional :: back
- if (present(back)) then
- if (back) then
- pos = scan(string(:pos-1), set, .true.)
- return
- end if
- end if
- npos = scan(string(pos+1:), set)
- pos = merge(pos + npos, len(string) + 1, npos /= 0)
- end
-end
-```
-
-(The code above isn't a proposed implementation for `SPLIT`, just a
-demonstration of how programs could use `SCAN` to accomplish the same
-results today.)
+There are two new character utility intrinsic subroutines,
+`SPLIT` and `TOKENIZE`, both of which are now implemented.
+
+`SPLIT` scans for separator characters in a string.
+When `BACK` is absent or false, it returns the position of the leftmost
+character in `SET` whose position in `STRING` is greater than `POS`,
+or `LEN(STRING)+1` if no such character exists.
+When `BACK` is true, it returns the position of the rightmost character
+in `SET` whose position in `STRING` is less than `POS`, or 0 if no
+such character exists.
+
+`TOKENIZE` extracts tokens from a string separated by characters in a set.
## Source limitations
diff --git a/flang/docs/FortranStandardsSupport.md b/flang/docs/FortranStandardsSupport.md
index f57956cd6d6b8..db66df5670e65 100644
--- a/flang/docs/FortranStandardsSupport.md
+++ b/flang/docs/FortranStandardsSupport.md
@@ -39,7 +39,7 @@ status of all important Fortran 2023 features. The table entries are based on th
| The specifiers typeof and classof | N | |
| Conditional expressions and arguments | N | |
| More use of boz constants | P | All usages other than enum are supported |
-| Intrinsics for extracting tokens from a string | N | |
+| Intrinsics for extracting tokens from a string | Y | SPLIT, TOKENIZE |
| Intrinsics for Trig functions that work in degrees | Y | |
| Intrinsics for Trig functions that work in half revolutions| Y | |
| Changes to system_clock | N | |
diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md
index 615d2746284ab..330fcf303de0e 100644
--- a/flang/docs/Intrinsics.md
+++ b/flang/docs/Intrinsics.md
@@ -361,6 +361,24 @@ that is present in `SET`, or zero if none is.
`VERIFY` is essentially the opposite: it returns the index of the first (or last) character
in `STRING` that is *not* present in `SET`, or zero if all are.
+### Character intrinsic subroutines (Fortran 2023)
+```
+CALL SPLIT(CHARACTER(k,n) STRING, CHARACTER(k,m) SET, INTEGER(any) POS, LOGICAL(any) BACK=.FALSE.)
+CALL TOKENIZE(CHARACTER(k,n) STRING, CHARACTER(k,m) SET, CHARACTER(k,:) TOKENS(:) [, SEPARATOR])
+CALL TOKENIZE(CHARACTER(k,n) STRING, CHARACTER(k,m) SET, INTEGER FIRST(:), INTEGER LAST(:))
+```
+
+`SPLIT` scans for separator characters in `STRING` from the set `SET`.
+When `BACK` is absent or `.FALSE.`, it returns (in `POS`) the position of the
+leftmost character in `SET` whose position in `STRING` is greater than `POS`,
+or `LEN(STRING)+1` if no such character exists.
+When `BACK` is `.TRUE.`, it returns the position of the rightmost character in
+`SET` whose position in `STRING` is less than `POS`, or 0 if no such character exists.
+
+`TOKENIZE` extracts tokens from `STRING` delimited by characters in `SET`.
+In Form 1, it returns the tokens as an array of characters and optionally the separator characters.
+In Form 2, it returns the starting and ending positions of each token.
+
## Transformational intrinsic functions
This category comprises a large collection of intrinsic functions that
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 3ef4045518cc4..ca9677a8cb2b1 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -390,6 +390,7 @@ struct IntrinsicLibrary {
fir::ExtendedValue genSizeOf(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genSpacing(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args);
+ void genSplit(llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genSpread(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genStorageSize(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Character.h b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
index 684b7498e725a..b365b0bce31df 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Character.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
@@ -142,6 +142,15 @@ mlir::Value genVerify(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
mlir::Value setBase, mlir::Value setLen,
mlir::Value back);
+/// Generate call to the SPLIT runtime routine that is specialized on
+/// \param kind.
+/// The \param kind represents the kind of the elements in the strings.
+/// Updates \p pos to the next separator position.
+mlir::Value genSplit(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
+ mlir::Value stringBase, mlir::Value stringLen,
+ mlir::Value setBase, mlir::Value setLen, mlir::Value pos,
+ mlir::Value back);
+
/// Generate call to TOKENIZE runtime (Form 1).
/// Splits \p stringBox into tokens based on separator characters in \p setBox.
/// \p tokensBox must be an unallocated allocatable array that receives the
diff --git a/flang/include/flang/Runtime/character.h b/flang/include/flang/Runtime/character.h
index 360418b7d5531..93f36d077a532 100644
--- a/flang/include/flang/Runtime/character.h
+++ b/flang/include/flang/Runtime/character.h
@@ -137,6 +137,15 @@ void RTDECL(Tokenize)(Descriptor &tokens, Descriptor *separator,
void RTDECL(TokenizePositions)(Descriptor &first, Descriptor &last,
const Descriptor &string, const Descriptor &set,
const char *sourceFile = nullptr, int sourceLine = 0);
+
+std::size_t RTDECL(Split1)(const char *string, std::size_t stringLen,
+ const char *set, std::size_t setLen, std::size_t pos, bool back = false);
+std::size_t RTDECL(Split2)(const char16_t *string, std::size_t stringLen,
+ const char16_t *set, std::size_t setLen, std::size_t pos,
+ bool back = false);
+std::size_t RTDECL(Split4)(const char32_t *string, std::size_t stringLen,
+ const char32_t *set, std::size_t setLen, std::size_t pos,
+ bool back = false);
}
} // namespace Fortran::runtime
#endif // FORTRAN_RUNTIME_CHARACTER_H_
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index 2ae1c478489c4..d94ec117ac18a 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -1750,6 +1750,16 @@ static const IntrinsicInterface intrinsicSubroutine[]{
{{"seconds", AnyInt, Rank::scalar, Optionality::required,
common::Intent::In}},
{}, Rank::elemental, IntrinsicClass::impureSubroutine},
+ {"split",
+ {{"string", SameCharNoLen, Rank::scalar, Optionality::required,
+ common::Intent::In},
+ {"set", SameCharNoLen, Rank::scalar, Optionality::required,
+ common::Intent::In},
+ {"pos", AnyInt, Rank::scalar, Optionality::required,
+ common::Intent::InOut},
+ {"back", AnyLogical, Rank::scalar, Optionality::optional,
+ common::Intent::In}},
+ {}, Rank::elemental, IntrinsicClass::pureSubroutine},
{"tokenize",
{{"string", SameCharNoLen, Rank::scalar, Optionality::required,
common::Intent::In},
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index d67eebdd8c93c..45e5568e43ba7 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -764,6 +764,13 @@ static constexpr IntrinsicHandler handlers[]{
/*isElemental=*/false},
{"sleep", &I::genSleep, {{{"seconds", asValue}}}, /*isElemental=*/false},
{"spacing", &I::genSpacing},
+ {"split",
+ &I::genSplit,
+ {{{"string", asAddr},
+ {"set", asAddr},
+ {"pos", asAddr},
+ {"back", asValue, handleDynamicOptional}}},
+ /*isElemental=*/true},
{"spread",
&I::genSpread,
{{{"source", asBox}, {"dim", asValue}, {"ncopies", asValue}}},
@@ -8538,6 +8545,50 @@ void IntrinsicLibrary::genSleep(llvm::ArrayRef<fir::ExtendedValue> args) {
fir::runtime::genSleep(builder, loc, fir::getBase(args[0]));
}
+// SPLIT
+void IntrinsicLibrary::genSplit(llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 4 && "SPLIT requires 3 or 4 arguments");
+
+ // Handle required STRING base and length args
+ mlir::Value stringBase = fir::getBase(args[0]);
+ mlir::Value stringLen = fir::getLen(args[0]);
+
+ // Handle required SET string base and length args
+ mlir::Value setBase = fir::getBase(args[1]);
+ mlir::Value setLen = fir::getLen(args[1]);
+
+ // POS is INTENT(INOUT) - it's passed as an address
+ mlir::Value posAddr = fir::getBase(args[2]);
+
+ // Determine character kind
+ fir::KindTy kind =
+ fir::factory::CharacterExprHelper{builder, loc}.getCharacterKind(
+ stringBase.getType());
+
+ // Handle optional BACK argument
+ mlir::Value back =
+ isStaticallyAbsent(args[3])
+ ? builder.createIntegerConstant(loc, builder.getI1Type(), 0)
+ : fir::getBase(args[3]);
+
+ // Load current POS value
+ mlir::Type posRefTy = fir::dyn_cast_ptrEleTy(posAddr.getType());
+ mlir::Value posValue = fir::LoadOp::create(builder, loc, posRefTy, posAddr);
+
+ // Convert POS to std::size_t (index type) for the runtime call
+ mlir::Type indexTy = builder.getIndexType();
+ mlir::Value posIndex = builder.createConvert(loc, indexTy, posValue);
+
+ // Call the runtime
+ mlir::Value newPos = fir::runtime::genSplit(builder, loc, kind, stringBase,
+ stringLen, setBase, setLen,
+ posIndex, back);
+
+ // Convert result back to the POS integer type and store
+ mlir::Value newPosConverted = builder.createConvert(loc, posRefTy, newPos);
+ fir::StoreOp::create(builder, loc, newPosConverted, posAddr);
+}
+
// TOKENIZE
void IntrinsicLibrary::genTokenize(llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 4 && "TOKENIZE requires 3 or 4 arguments");
diff --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
index 28e795b8de759..d49d9f86c97b9 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
@@ -339,3 +339,31 @@ mlir::Value fir::runtime::genVerify(fir::FirOpBuilder &builder,
stringLen, setBase, setLen, back);
return fir::CallOp::create(builder, loc, func, args).getResult(0);
}
+
+mlir::Value fir::runtime::genSplit(fir::FirOpBuilder &builder,
+ mlir::Location loc, int kind,
+ mlir::Value stringBase,
+ mlir::Value stringLen, mlir::Value setBase,
+ mlir::Value setLen, mlir::Value pos,
+ mlir::Value back) {
+ mlir::func::FuncOp func;
+ switch (kind) {
+ case 1:
+ func = fir::runtime::getRuntimeFunc<mkRTKey(Split1)>(loc, builder);
+ break;
+ case 2:
+ func = fir::runtime::getRuntimeFunc<mkRTKey(Split2)>(loc, builder);
+ break;
+ case 4:
+ func = fir::runtime::getRuntimeFunc<mkRTKey(Split4)>(loc, builder);
+ break;
+ default:
+ fir::emitFatalError(
+ loc, "unsupported CHARACTER kind value. Runtime expects 1, 2, or 4.");
+ }
+ auto fTy = func.getFunctionType();
+ auto args = fir::runtime::createArguments(builder, loc, fTy, stringBase,
+ stringLen, setBase, setLen, pos,
+ back);
+ return fir::CallOp::create(builder, loc, func, args).getResult(0);
+}
diff --git a/flang/test/Lower/Intrinsics/split.f90 b/flang/test/Lower/Intrinsics/split.f90
new file mode 100644
index 0000000000000..d8a55a547c9d0
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/split.f90
@@ -0,0 +1,37 @@
+! RUN: %flang_fc1 -emit-hlfir %s -o - | FileCheck %s
+
+! CHECK-LABEL: split_basic
+subroutine split_basic()
+ implicit none
+ character(20) :: string
+ character(5) :: set
+ integer :: pos
+ string = "one,two,three"
+ set = ","
+ pos = 0
+ call split(string, set, pos)
+ ! CHECK: %[[BACK:.*]] = arith.constant false
+ ! CHECK: %[[POS:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+ ! CHECK: %[[POS_IDX:.*]] = fir.convert %[[POS]] : (i32) -> index
+ ! CHECK: %[[STRING:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.char<1,20>>) -> !fir.ref<i8>
+ ! CHECK: %[[SET:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.char<1,5>>) -> !fir.ref<i8>
+ ! CHECK: %[[POS_I64:.*]] = fir.convert %[[POS_IDX]] : (index) -> i64
+ ! CHECK: %[[RESULT:.*]] = fir.call @_FortranASplit1(%[[STRING]], %{{.*}}, %[[SET]], %{{.*}}, %[[POS_I64]], %[[BACK]]) {{.*}} : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1) -> i64
+ ! CHECK: %[[RESULT_I32:.*]] = fir.convert %[[RESULT]] : (i64) -> i32
+ ! CHECK: fir.store %[[RESULT_I32]] to %{{.*}} : !fir.ref<i32>
+end subroutine split_basic
+
+! CHECK-LABEL: split_back
+subroutine split_back()
+ implicit none
+ character(20) :: string
+ character(5) :: set
+ integer :: pos
+ logical :: back
+ string = "one,two,three"
+ set = ","
+ pos = 14
+ back = .true.
+ call split(string, set, pos, back)
+ ! CHECK: fir.call @_FortranASplit1(
+end subroutine split_back
diff --git a/flang/test/Semantics/split.f90 b/flang/test/Semantics/split.f90
new file mode 100644
index 0000000000000..f0388584010f6
--- /dev/null
+++ b/flang/test/Semantics/split.f90
@@ -0,0 +1,69 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+! Check for semantic errors in split() subroutine calls
+! Based on Fortran 2023 standard requirements
+
+program test_split_errors
+ implicit none
+
+ character(20) :: string
+ character(5) :: set
+ integer :: pos
+ logical :: back
+
+ ! Valid declarations for testing
+ integer :: int_scalar
+ real :: real_scalar
+ character(10) :: string_array(5)
+ character(5) :: set_array(5)
+
+ !========================================================================
+ ! Valid calls (reference)
+ !========================================================================
+
+ call split(string, set, pos)
+ call split(string, set, pos, back)
+ call split("hello world", " ", pos)
+ call split("hello world", " ", pos, .false.)
+
+ !========================================================================
+ ! Wrong types for STRING argument
+ !========================================================================
+
+ !ERROR: Actual argument for 'string=' has bad type 'INTEGER(4)'
+ call split(int_scalar, set, pos)
+
+ !ERROR: Actual argument for 'string=' has bad type 'REAL(4)'
+ call split(real_scalar, set, pos)
+
+ !========================================================================
+ ! Wrong rank for STRING (must be scalar)
+ !========================================================================
+
+ !ERROR: 'string=' argument has unacceptable rank 1
+ call split(string_array, set, pos)
+
+ !========================================================================
+ ! Wrong types for SET argument
+ !========================================================================
+
+ !ERROR: Actual argument for 'set=' has bad type 'INTEGER(4)'
+ call split(string, int_scalar, pos)
+
+ !ERROR: Actual argument for 'set=' has bad type 'REAL(4)'
+ call split(string, real_scalar, pos)
+
+ !========================================================================
+ ! Wrong types for POS argument
+ !========================================================================
+
+ !ERROR: Actual argument for 'pos=' has bad type 'REAL(4)'
+ call split(string, set, real_scalar)
+
+ !========================================================================
+ ! Wrong types for BACK argument
+ !========================================================================
+
+ !ERROR: Actual argument for 'back=' has bad type 'INTEGER(4)'
+ call split(string, set, pos, int_scalar)
+
+end program test_split_errors
>From 0ede8cdabb587686cb4e274b7c12dd2ef0cfc015 Mon Sep 17 00:00:00 2001
From: Shandong Lao <shandong.lao at hpe.com>
Date: Tue, 10 Mar 2026 01:58:04 -0500
Subject: [PATCH 2/2] Reformat code in character.cpp, IntrinsicCall.cpp and
Character.cpp to follow clang-format.
---
flang-rt/lib/runtime/character.cpp | 7 ++++---
flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 6 +++---
flang/lib/Optimizer/Builder/Runtime/Character.cpp | 5 ++---
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index 39b58465e9156..cb801f41bdeb2 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -1001,8 +1001,8 @@ static RT_API_ATTRS void TokenizePositionsImpl(Descriptor &first,
// character exists.
template <typename CHAR>
static RT_API_ATTRS std::size_t SplitImpl(const CHAR *string,
- std::size_t stringLen, const CHAR *set, std::size_t setLen,
- std::size_t pos, bool back) {
+ std::size_t stringLen, const CHAR *set, std::size_t setLen, std::size_t pos,
+ bool back) {
if (back) {
// Scan backwards from position pos-1 (1-indexed pos means index pos-2)
// looking for the rightmost separator at position < pos.
@@ -1028,7 +1028,8 @@ static RT_API_ATTRS std::size_t SplitImpl(const CHAR *string,
if (pos >= stringLen) {
return stringLen + 1;
}
- std::size_t startIdx = pos; // 0-indexed start = pos (since pos is 1-indexed and we want pos+1)
+ std::size_t startIdx =
+ pos; // 0-indexed start = pos (since pos is 1-indexed and we want pos+1)
for (std::size_t i = startIdx; i < stringLen; ++i) {
CHAR ch = string[i];
for (std::size_t j = 0; j < setLen; ++j) {
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 45e5568e43ba7..acfd9cfe89d30 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -8580,9 +8580,9 @@ void IntrinsicLibrary::genSplit(llvm::ArrayRef<fir::ExtendedValue> args) {
mlir::Value posIndex = builder.createConvert(loc, indexTy, posValue);
// Call the runtime
- mlir::Value newPos = fir::runtime::genSplit(builder, loc, kind, stringBase,
- stringLen, setBase, setLen,
- posIndex, back);
+ mlir::Value newPos =
+ fir::runtime::genSplit(builder, loc, kind, stringBase, stringLen, setBase,
+ setLen, posIndex, back);
// Convert result back to the POS integer type and store
mlir::Value newPosConverted = builder.createConvert(loc, posRefTy, newPos);
diff --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
index d49d9f86c97b9..c77374986010c 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
@@ -362,8 +362,7 @@ mlir::Value fir::runtime::genSplit(fir::FirOpBuilder &builder,
loc, "unsupported CHARACTER kind value. Runtime expects 1, 2, or 4.");
}
auto fTy = func.getFunctionType();
- auto args = fir::runtime::createArguments(builder, loc, fTy, stringBase,
- stringLen, setBase, setLen, pos,
- back);
+ auto args = fir::runtime::createArguments(
+ builder, loc, fTy, stringBase, stringLen, setBase, setLen, pos, back);
return fir::CallOp::create(builder, loc, func, args).getResult(0);
}
More information about the llvm-commits
mailing list