[flang-commits] [flang] 5a14e4f - [flang] Implement SPLIT intrinsic subroutine with tests (#185584)
via flang-commits
flang-commits at lists.llvm.org
Fri Mar 20 10:12:57 PDT 2026
Author: laoshd
Date: 2026-03-20T13:12:51-04:00
New Revision: 5a14e4f231bbffd1168747a13d28e82d6212e979
URL: https://github.com/llvm/llvm-project/commit/5a14e4f231bbffd1168747a13d28e82d6212e979
DIFF: https://github.com/llvm/llvm-project/commit/5a14e4f231bbffd1168747a13d28e82d6212e979.diff
LOG: [flang] Implement SPLIT intrinsic subroutine with tests (#185584)
This is the implementation of part of F2023 new feature US 03.
Extracting tokens from a string, SPLIT intrinsic.
It's section 16.9.196 SPLIT (STRING, SET, POS [, BACK]) of Fortran 2023
Standard.
It's part of Flang issue
[#178044](https://github.com/llvm/llvm-project/issues/178044). Note that
I work with @kwyatt-ext on this issue. He implemented the other part,
TOKENIZE.
A test will be added into
[llvm-test-suite](https://github.com/llvm/llvm-test-suite) later after
this PR is merged.
Added:
flang/test/Lower/Intrinsics/split.f90
flang/test/Semantics/split.f90
Modified:
flang-rt/lib/runtime/character.cpp
flang-rt/unittests/Runtime/CharacterTest.cpp
flang/docs/F202X.md
flang/docs/FortranStandardsSupport.md
flang/docs/Intrinsics.md
flang/include/flang/Optimizer/Builder/IntrinsicCall.h
flang/include/flang/Optimizer/Builder/Runtime/Character.h
flang/include/flang/Runtime/character.h
flang/lib/Evaluate/intrinsics.cpp
flang/lib/Optimizer/Builder/IntrinsicCall.cpp
flang/lib/Optimizer/Builder/Runtime/Character.cpp
Removed:
################################################################################
diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index a663643fa18cc..8980077e79a0c 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -992,6 +992,42 @@ static RT_API_ATTRS void TokenizePositionsImpl(Descriptor &first,
}
}
+// SPLIT — implemented in terms of SCAN.
+// When BACK is false, returns the position of the leftmost character in SET
+// at a position > POS, or LEN(STRING)+1 if none.
+// When BACK is true, returns the position of the rightmost character in SET
+// at a position < POS, or 0 if none.
+template <typename CHAR>
+static RT_API_ATTRS std::size_t SplitImpl(const CHAR *string,
+ std::size_t stringLen, const CHAR *set, std::size_t setLen, std::size_t pos,
+ bool back) {
+ if (back) {
+ std::size_t scanLen{pos > 1 ? pos - 1 : std::size_t{0}};
+ if (scanLen > stringLen) {
+ scanLen = stringLen;
+ }
+ if constexpr (sizeof(CHAR) == 1) {
+ return ScanVerify<false>(string, scanLen, set, setLen, true);
+ } else {
+ return ScanVerify<CHAR, CharFunc::Scan>(
+ string, scanLen, set, setLen, true);
+ }
+ } else {
+ if (pos >= stringLen) {
+ return stringLen + 1;
+ }
+ std::size_t npos;
+ if constexpr (sizeof(CHAR) == 1) {
+ npos =
+ ScanVerify<false>(string + pos, stringLen - pos, set, setLen, false);
+ } else {
+ npos = ScanVerify<CHAR, CharFunc::Scan>(
+ string + pos, stringLen - pos, set, setLen, false);
+ }
+ return npos != 0 ? pos + npos : stringLen + 1;
+ }
+}
+
extern "C" {
RT_EXT_API_GROUP_BEGIN
@@ -1375,6 +1411,21 @@ void RTDEF(TokenizePositions)(Descriptor &first, Descriptor &last,
TokenizePositionsImpl(first, last, string, set, terminator);
}
+std::size_t RTDEF(Split1)(const char *string, std::size_t stringLen,
+ const char *set, std::size_t setLen, std::size_t pos, bool back) {
+ return SplitImpl(string, stringLen, set, setLen, pos, back);
+}
+
+std::size_t RTDEF(Split2)(const char16_t *string, std::size_t stringLen,
+ const char16_t *set, std::size_t setLen, std::size_t pos, bool back) {
+ return SplitImpl(string, stringLen, set, setLen, pos, back);
+}
+
+std::size_t RTDEF(Split4)(const char32_t *string, std::size_t stringLen,
+ const char32_t *set, std::size_t setLen, std::size_t pos, bool back) {
+ return SplitImpl(string, stringLen, set, setLen, pos, back);
+}
+
RT_EXT_API_GROUP_END
}
} // namespace Fortran::runtime
diff --git a/flang-rt/unittests/Runtime/CharacterTest.cpp b/flang-rt/unittests/Runtime/CharacterTest.cpp
index 4b304a98ada1b..6577876057c3c 100644
--- a/flang-rt/unittests/Runtime/CharacterTest.cpp
+++ b/flang-rt/unittests/Runtime/CharacterTest.cpp
@@ -392,6 +392,91 @@ TYPED_TEST(SearchTests, VerifyTests) {
"VERIFY", tests, std::get<SearchFunction<TypeParam>>(functions));
}
+// Test SPLIT()
+template <typename CHAR>
+using SplitFunction = std::function<std::size_t(
+ const CHAR *, std::size_t, const CHAR *, std::size_t, std::size_t, bool)>;
+using SplitFunctions = std::tuple<SplitFunction<char>, SplitFunction<char16_t>,
+ SplitFunction<char32_t>>;
+struct SplitTestCase {
+ const char *string, *set;
+ std::size_t pos;
+ bool back;
+ std::size_t expect;
+};
+
+template <typename CHAR>
+void RunSplitTests(const char *which,
+ const std::vector<SplitTestCase> &testCases,
+ const SplitFunction<CHAR> &function) {
+ for (const auto &t : testCases) {
+ std::size_t strLen{std::strlen(t.string)}, setLen{std::strlen(t.set)};
+ std::basic_string<CHAR> str{t.string, t.string + strLen};
+ std::basic_string<CHAR> set{t.set, t.set + setLen};
+ auto got{function(str.data(), strLen, set.data(), setLen, t.pos, t.back)};
+ ASSERT_EQ(got, t.expect)
+ << which << "('" << t.string << "','" << t.set << "',pos=" << t.pos
+ << ",back=" << t.back << ") for CHARACTER(kind=" << sizeof(CHAR)
+ << "): got " << got << ", expected " << t.expect;
+ }
+}
+
+template <typename CHAR> struct SplitTests : public ::testing::Test {};
+TYPED_TEST_SUITE(SplitTests, CharacterTypes, );
+
+TYPED_TEST(SplitTests, SplitForward) {
+ static SplitFunctions functions{
+ RTNAME(Split1), RTNAME(Split2), RTNAME(Split4)};
+ static std::vector<SplitTestCase> tests{
+ // "one,two,three" with set=","
+ // Forward scanning: from pos=0, find first ',' at position 4
+ {"one,two,three", ",", 0, false, 4},
+ // From pos=4, find next ',' at position 8
+ {"one,two,three", ",", 4, false, 8},
+ // From pos=8, no more ',', return len+1=14
+ {"one,two,three", ",", 8, false, 14},
+ // Empty string
+ {"", ",", 0, false, 1},
+ // No delimiters in string
+ {"abc", ",", 0, false, 4},
+ // String is all delimiters
+ {",,", ",", 0, false, 1},
+ {",,", ",", 1, false, 2},
+ {",,", ",", 2, false, 3},
+ // pos at end of string
+ {"abc", ",", 3, false, 4},
+ // Multiple delimiter characters in set
+ {"a,b;c", ",;", 0, false, 2},
+ {"a,b;c", ",;", 2, false, 4},
+ {"a,b;c", ",;", 4, false, 6},
+ };
+ RunSplitTests(
+ "SPLIT(forward)", tests, std::get<SplitFunction<TypeParam>>(functions));
+}
+
+TYPED_TEST(SplitTests, SplitBackward) {
+ static SplitFunctions functions{
+ RTNAME(Split1), RTNAME(Split2), RTNAME(Split4)};
+ static std::vector<SplitTestCase> tests{
+ // "one,two,three" with set=","
+ // Backward scanning: from pos=14 (len+1), find last ',' at position 8
+ {"one,two,three", ",", 14, true, 8},
+ // From pos=8, find previous ',' at position 4
+ {"one,two,three", ",", 8, true, 4},
+ // From pos=4, no ',' before position 4, return 0
+ {"one,two,three", ",", 4, true, 0},
+ // Empty string
+ {"", ",", 1, true, 0},
+ // pos=0 or pos=1 should return 0
+ {"abc", ",", 0, true, 0},
+ {"abc", ",", 1, true, 0},
+ // No delimiters in string
+ {"abc", ",", 4, true, 0},
+ };
+ RunSplitTests(
+ "SPLIT(backward)", tests, std::get<SplitFunction<TypeParam>>(functions));
+}
+
// Test REPEAT()
template <typename CHAR> struct RepeatTests : public ::testing::Test {};
TYPED_TEST_SUITE(RepeatTests, CharacterTypes, );
diff --git a/flang/docs/F202X.md b/flang/docs/F202X.md
index d1940a1858db1..988c0e9f083e0 100644
--- a/flang/docs/F202X.md
+++ b/flang/docs/F202X.md
@@ -284,47 +284,18 @@ arguments or results with conversion factors.
`SELECTED_LOGICAL_KIND` maps a bit size to a kind of `LOGICAL`
-There are two new character utility intrinsic
-functions whose implementations have very low priority: `SPLIT` and `TOKENIZE`.
-`TOKENIZE` requires memory allocation to return its results,
-and could and should have been implemented once in some Fortran utility
-library for those who need a slow tokenization facility rather than
-requiring implementations in each vendor's runtime support library with
-all the extra cost and compatibility risk that entails.
-
-`SPLIT` is worse -- not only could it, like `TOKENIZE`,
-have been supplied by a Fortran utility library rather than being
-added to the standard, it's redundant;
-it provides nothing that cannot be already accomplished by
-composing today's `SCAN` intrinsic function with substring indexing:
-
-```
-module m
- interface split
- module procedure :: split
- end interface
- !instantiate for all possible ck/ik/lk combinations
- integer, parameter :: ck = kind(''), ik = kind(0), lk = kind(.true.)
- contains
- simple elemental subroutine split(string, set, pos, back)
- character(*, kind=ck), intent(in) :: string, set
- integer(kind=ik), intent(in out) :: pos
- logical(kind=lk), intent(in), optional :: back
- if (present(back)) then
- if (back) then
- pos = scan(string(:pos-1), set, .true.)
- return
- end if
- end if
- npos = scan(string(pos+1:), set)
- pos = merge(pos + npos, len(string) + 1, npos /= 0)
- end
-end
-```
-
-(The code above isn't a proposed implementation for `SPLIT`, just a
-demonstration of how programs could use `SCAN` to accomplish the same
-results today.)
+There are two new character utility intrinsic subroutines,
+`SPLIT` and `TOKENIZE`, both of which are now implemented.
+
+`SPLIT` scans for separator characters in a string.
+When `BACK` is absent or false, it returns the position of the leftmost
+character in `SET` whose position in `STRING` is greater than `POS`,
+or `LEN(STRING)+1` if no such character exists.
+When `BACK` is true, it returns the position of the rightmost character
+in `SET` whose position in `STRING` is less than `POS`, or 0 if no
+such character exists.
+
+`TOKENIZE` extracts tokens from a string separated by characters in a set.
## Source limitations
diff --git a/flang/docs/FortranStandardsSupport.md b/flang/docs/FortranStandardsSupport.md
index f57956cd6d6b8..8a04510918e62 100644
--- a/flang/docs/FortranStandardsSupport.md
+++ b/flang/docs/FortranStandardsSupport.md
@@ -39,7 +39,7 @@ status of all important Fortran 2023 features. The table entries are based on th
| The specifiers typeof and classof | N | |
| Conditional expressions and arguments | N | |
| More use of boz constants | P | All usages other than enum are supported |
-| Intrinsics for extracting tokens from a string | N | |
+| Intrinsics for extracting tokens from a string | Y | |
| Intrinsics for Trig functions that work in degrees | Y | |
| Intrinsics for Trig functions that work in half revolutions| Y | |
| Changes to system_clock | N | |
diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md
index 615d2746284ab..330fcf303de0e 100644
--- a/flang/docs/Intrinsics.md
+++ b/flang/docs/Intrinsics.md
@@ -361,6 +361,24 @@ that is present in `SET`, or zero if none is.
`VERIFY` is essentially the opposite: it returns the index of the first (or last) character
in `STRING` that is *not* present in `SET`, or zero if all are.
+### Character intrinsic subroutines (Fortran 2023)
+```
+CALL SPLIT(CHARACTER(k,n) STRING, CHARACTER(k,m) SET, INTEGER(any) POS, LOGICAL(any) BACK=.FALSE.)
+CALL TOKENIZE(CHARACTER(k,n) STRING, CHARACTER(k,m) SET, CHARACTER(k,:) TOKENS(:) [, SEPARATOR])
+CALL TOKENIZE(CHARACTER(k,n) STRING, CHARACTER(k,m) SET, INTEGER FIRST(:), INTEGER LAST(:))
+```
+
+`SPLIT` scans for separator characters in `STRING` from the set `SET`.
+When `BACK` is absent or `.FALSE.`, it returns (in `POS`) the position of the
+leftmost character in `SET` whose position in `STRING` is greater than `POS`,
+or `LEN(STRING)+1` if no such character exists.
+When `BACK` is `.TRUE.`, it returns the position of the rightmost character in
+`SET` whose position in `STRING` is less than `POS`, or 0 if no such character exists.
+
+`TOKENIZE` extracts tokens from `STRING` delimited by characters in `SET`.
+In Form 1, it returns the tokens as an array of characters and optionally the separator characters.
+In Form 2, it returns the starting and ending positions of each token.
+
## Transformational intrinsic functions
This category comprises a large collection of intrinsic functions that
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 3ef4045518cc4..ca9677a8cb2b1 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -390,6 +390,7 @@ struct IntrinsicLibrary {
fir::ExtendedValue genSizeOf(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genSpacing(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args);
+ void genSplit(llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genSpread(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genStorageSize(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Character.h b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
index 684b7498e725a..b365b0bce31df 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Character.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
@@ -142,6 +142,15 @@ mlir::Value genVerify(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
mlir::Value setBase, mlir::Value setLen,
mlir::Value back);
+/// Generate call to the SPLIT runtime routine that is specialized on
+/// \param kind.
+/// The \param kind represents the kind of the elements in the strings.
+/// Updates \p pos to the next separator position.
+mlir::Value genSplit(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
+ mlir::Value stringBase, mlir::Value stringLen,
+ mlir::Value setBase, mlir::Value setLen, mlir::Value pos,
+ mlir::Value back);
+
/// Generate call to TOKENIZE runtime (Form 1).
/// Splits \p stringBox into tokens based on separator characters in \p setBox.
/// \p tokensBox must be an unallocated allocatable array that receives the
diff --git a/flang/include/flang/Runtime/character.h b/flang/include/flang/Runtime/character.h
index 360418b7d5531..93f36d077a532 100644
--- a/flang/include/flang/Runtime/character.h
+++ b/flang/include/flang/Runtime/character.h
@@ -137,6 +137,15 @@ void RTDECL(Tokenize)(Descriptor &tokens, Descriptor *separator,
void RTDECL(TokenizePositions)(Descriptor &first, Descriptor &last,
const Descriptor &string, const Descriptor &set,
const char *sourceFile = nullptr, int sourceLine = 0);
+
+std::size_t RTDECL(Split1)(const char *string, std::size_t stringLen,
+ const char *set, std::size_t setLen, std::size_t pos, bool back = false);
+std::size_t RTDECL(Split2)(const char16_t *string, std::size_t stringLen,
+ const char16_t *set, std::size_t setLen, std::size_t pos,
+ bool back = false);
+std::size_t RTDECL(Split4)(const char32_t *string, std::size_t stringLen,
+ const char32_t *set, std::size_t setLen, std::size_t pos,
+ bool back = false);
}
} // namespace Fortran::runtime
#endif // FORTRAN_RUNTIME_CHARACTER_H_
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index 5659c5ae7f2d6..84cd2288fcd0b 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -1750,6 +1750,16 @@ static const IntrinsicInterface intrinsicSubroutine[]{
{{"seconds", AnyInt, Rank::scalar, Optionality::required,
common::Intent::In}},
{}, Rank::elemental, IntrinsicClass::impureSubroutine},
+ {"split",
+ {{"string", SameCharNoLen, Rank::scalar, Optionality::required,
+ common::Intent::In},
+ {"set", SameCharNoLen, Rank::scalar, Optionality::required,
+ common::Intent::In},
+ {"pos", AnyInt, Rank::scalar, Optionality::required,
+ common::Intent::InOut},
+ {"back", AnyLogical, Rank::scalar, Optionality::optional,
+ common::Intent::In}},
+ {}, Rank::elemental, IntrinsicClass::pureSubroutine},
{"tokenize",
{{"string", SameCharNoLen, Rank::scalar, Optionality::required,
common::Intent::In},
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index d57a2468dcffe..d6dee88f422e0 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -764,6 +764,13 @@ static constexpr IntrinsicHandler handlers[]{
/*isElemental=*/false},
{"sleep", &I::genSleep, {{{"seconds", asValue}}}, /*isElemental=*/false},
{"spacing", &I::genSpacing},
+ {"split",
+ &I::genSplit,
+ {{{"string", asAddr},
+ {"set", asAddr},
+ {"pos", asAddr},
+ {"back", asValue, handleDynamicOptional}}},
+ /*isElemental=*/false},
{"spread",
&I::genSpread,
{{{"source", asBox}, {"dim", asValue}, {"ncopies", asValue}}},
@@ -8538,6 +8545,39 @@ void IntrinsicLibrary::genSleep(llvm::ArrayRef<fir::ExtendedValue> args) {
fir::runtime::genSleep(builder, loc, fir::getBase(args[0]));
}
+// SPLIT
+void IntrinsicLibrary::genSplit(llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 4);
+
+ mlir::Value stringBase = fir::getBase(args[0]);
+ mlir::Value stringLen = fir::getLen(args[0]);
+ mlir::Value setBase = fir::getBase(args[1]);
+ mlir::Value setLen = fir::getLen(args[1]);
+ mlir::Value posAddr = fir::getBase(args[2]);
+
+ fir::KindTy kind =
+ fir::factory::CharacterExprHelper{builder, loc}.getCharacterKind(
+ stringBase.getType());
+
+ // BACK is optional and defaults to .FALSE. when absent.
+ mlir::Value back =
+ isStaticallyAbsent(args[3])
+ ? builder.createIntegerConstant(loc, builder.getI1Type(), 0)
+ : fir::getBase(args[3]);
+
+ mlir::Type posRefTy = fir::dyn_cast_ptrEleTy(posAddr.getType());
+ mlir::Value posValue = fir::LoadOp::create(builder, loc, posRefTy, posAddr);
+ mlir::Type indexTy = builder.getIndexType();
+ mlir::Value posIndex = builder.createConvert(loc, indexTy, posValue);
+
+ mlir::Value newPos =
+ fir::runtime::genSplit(builder, loc, kind, stringBase, stringLen, setBase,
+ setLen, posIndex, back);
+
+ mlir::Value newPosConverted = builder.createConvert(loc, posRefTy, newPos);
+ fir::StoreOp::create(builder, loc, newPosConverted, posAddr);
+}
+
// TOKENIZE
void IntrinsicLibrary::genTokenize(llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 4 && "TOKENIZE requires 3 or 4 arguments");
diff --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
index 28e795b8de759..c77374986010c 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
@@ -339,3 +339,30 @@ mlir::Value fir::runtime::genVerify(fir::FirOpBuilder &builder,
stringLen, setBase, setLen, back);
return fir::CallOp::create(builder, loc, func, args).getResult(0);
}
+
+mlir::Value fir::runtime::genSplit(fir::FirOpBuilder &builder,
+ mlir::Location loc, int kind,
+ mlir::Value stringBase,
+ mlir::Value stringLen, mlir::Value setBase,
+ mlir::Value setLen, mlir::Value pos,
+ mlir::Value back) {
+ mlir::func::FuncOp func;
+ switch (kind) {
+ case 1:
+ func = fir::runtime::getRuntimeFunc<mkRTKey(Split1)>(loc, builder);
+ break;
+ case 2:
+ func = fir::runtime::getRuntimeFunc<mkRTKey(Split2)>(loc, builder);
+ break;
+ case 4:
+ func = fir::runtime::getRuntimeFunc<mkRTKey(Split4)>(loc, builder);
+ break;
+ default:
+ fir::emitFatalError(
+ loc, "unsupported CHARACTER kind value. Runtime expects 1, 2, or 4.");
+ }
+ auto fTy = func.getFunctionType();
+ auto args = fir::runtime::createArguments(
+ builder, loc, fTy, stringBase, stringLen, setBase, setLen, pos, back);
+ return fir::CallOp::create(builder, loc, func, args).getResult(0);
+}
diff --git a/flang/test/Lower/Intrinsics/split.f90 b/flang/test/Lower/Intrinsics/split.f90
new file mode 100644
index 0000000000000..88fdb7e9a400a
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/split.f90
@@ -0,0 +1,45 @@
+! RUN: %flang_fc1 -emit-hlfir %s -o - | FileCheck %s
+
+! CHECK-LABEL: split_basic
+subroutine split_basic()
+ implicit none
+ character(20) :: string
+ character(5) :: set
+ integer :: pos
+ string = "one,two,three"
+ set = ","
+ pos = 0
+ call split(string, set, pos)
+ ! CHECK: %[[BACK:.*]] = arith.constant false
+ ! CHECK: %[[POS:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+ ! CHECK: %[[POS_IDX:.*]] = fir.convert %[[POS]] : (i32) -> index
+ ! CHECK: %[[STRING:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.char<1,20>>) -> !fir.ref<i8>
+ ! CHECK: %[[SET:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.char<1,5>>) -> !fir.ref<i8>
+ ! CHECK: %[[POS_I64:.*]] = fir.convert %[[POS_IDX]] : (index) -> i64
+ ! CHECK: %[[RESULT:.*]] = fir.call @_FortranASplit1(%[[STRING]], %{{.*}}, %[[SET]], %{{.*}}, %[[POS_I64]], %[[BACK]]) {{.*}} : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1) -> i64
+ ! CHECK: %[[RESULT_I32:.*]] = fir.convert %[[RESULT]] : (i64) -> i32
+ ! CHECK: fir.store %[[RESULT_I32]] to %{{.*}} : !fir.ref<i32>
+end subroutine split_basic
+
+! CHECK-LABEL: split_back
+subroutine split_back()
+ implicit none
+ character(20) :: string
+ character(5) :: set
+ integer :: pos
+ logical :: back
+ string = "one,two,three"
+ set = ","
+ pos = 14
+ back = .true.
+ call split(string, set, pos, back)
+ ! CHECK: %[[POS:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+ ! CHECK: %[[POS_IDX:.*]] = fir.convert %[[POS]] : (i32) -> index
+ ! CHECK: %[[STRING:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.char<1,20>>) -> !fir.ref<i8>
+ ! CHECK: %[[SET:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.char<1,5>>) -> !fir.ref<i8>
+ ! CHECK: %[[POS_I64:.*]] = fir.convert %[[POS_IDX]] : (index) -> i64
+ ! CHECK: %[[BACK_CVT:.*]] = fir.convert %{{.*}} : (!fir.logical<4>) -> i1
+ ! CHECK: %[[RESULT:.*]] = fir.call @_FortranASplit1(%[[STRING]], %{{.*}}, %[[SET]], %{{.*}}, %[[POS_I64]], %[[BACK_CVT]]) {{.*}} : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1) -> i64
+ ! CHECK: %[[RESULT_I32:.*]] = fir.convert %[[RESULT]] : (i64) -> i32
+ ! CHECK: fir.store %[[RESULT_I32]] to %{{.*}} : !fir.ref<i32>
+end subroutine split_back
diff --git a/flang/test/Semantics/split.f90 b/flang/test/Semantics/split.f90
new file mode 100644
index 0000000000000..60e35753c6d40
--- /dev/null
+++ b/flang/test/Semantics/split.f90
@@ -0,0 +1,98 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+! Check for semantic errors in split() subroutine calls
+! Based on Fortran 2023 standard requirements
+
+program test_split_errors
+ implicit none
+
+ character(20) :: string
+ character(5) :: set
+ integer :: pos
+ logical :: back
+
+ ! Valid declarations for testing
+ integer :: int_scalar
+ real :: real_scalar
+ character(10) :: string_array(5)
+ character(5) :: set_array(5)
+ character(len=20, kind=2) :: string_k2
+ character(len=5, kind=2) :: set_k2
+ character(len=20, kind=4) :: string_k4
+ character(len=5, kind=4) :: set_k4
+
+ !========================================================================
+ ! Valid calls (reference)
+ !========================================================================
+
+ call split(string, set, pos)
+ call split(string, set, pos, back)
+ call split("hello world", " ", pos)
+ call split("hello world", " ", pos, .false.)
+
+ ! Valid calls with
diff erent character kinds
+ call split(string_k2, set_k2, pos)
+ call split(string_k2, set_k2, pos, back)
+ call split(string_k4, set_k4, pos)
+ call split(string_k4, set_k4, pos, back)
+
+ !========================================================================
+ ! Wrong types for STRING argument
+ !========================================================================
+
+ !ERROR: Actual argument for 'string=' has bad type 'INTEGER(4)'
+ call split(int_scalar, set, pos)
+
+ !ERROR: Actual argument for 'string=' has bad type 'REAL(4)'
+ call split(real_scalar, set, pos)
+
+ !========================================================================
+ ! Wrong rank for STRING (must be scalar)
+ !========================================================================
+
+ !ERROR: 'string=' argument has unacceptable rank 1
+ call split(string_array, set, pos)
+
+ !========================================================================
+ ! Wrong types for SET argument
+ !========================================================================
+
+ !ERROR: Actual argument for 'set=' has bad type 'INTEGER(4)'
+ call split(string, int_scalar, pos)
+
+ !ERROR: Actual argument for 'set=' has bad type 'REAL(4)'
+ call split(string, real_scalar, pos)
+
+ !========================================================================
+ ! Wrong types for POS argument
+ !========================================================================
+
+ !ERROR: Actual argument for 'pos=' has bad type 'REAL(4)'
+ call split(string, set, real_scalar)
+
+ !========================================================================
+ ! Wrong types for BACK argument
+ !========================================================================
+
+ !ERROR: Actual argument for 'back=' has bad type 'INTEGER(4)'
+ call split(string, set, pos, int_scalar)
+
+ !========================================================================
+ ! Character kind mismatches between STRING and SET
+ !========================================================================
+
+ !ERROR: Actual argument for 'set=' has bad type or kind 'CHARACTER(KIND=1,LEN=5_8)'
+ call split(string_k2, set, pos)
+
+ !ERROR: Actual argument for 'set=' has bad type or kind 'CHARACTER(KIND=2,LEN=5_8)'
+ call split(string, set_k2, pos)
+
+ !ERROR: Actual argument for 'set=' has bad type or kind 'CHARACTER(KIND=1,LEN=5_8)'
+ call split(string_k4, set, pos)
+
+ !ERROR: Actual argument for 'set=' has bad type or kind 'CHARACTER(KIND=4,LEN=5_8)'
+ call split(string, set_k4, pos)
+
+ !ERROR: Actual argument for 'set=' has bad type or kind 'CHARACTER(KIND=4,LEN=5_8)'
+ call split(string_k2, set_k4, pos)
+
+end program test_split_errors
More information about the flang-commits
mailing list