[flang-commits] [flang] 5a14e4f - [flang] Implement SPLIT intrinsic subroutine with tests (#185584)

via flang-commits flang-commits at lists.llvm.org
Fri Mar 20 10:12:57 PDT 2026


Author: laoshd
Date: 2026-03-20T13:12:51-04:00
New Revision: 5a14e4f231bbffd1168747a13d28e82d6212e979

URL: https://github.com/llvm/llvm-project/commit/5a14e4f231bbffd1168747a13d28e82d6212e979
DIFF: https://github.com/llvm/llvm-project/commit/5a14e4f231bbffd1168747a13d28e82d6212e979.diff

LOG: [flang] Implement SPLIT intrinsic subroutine with tests (#185584)

This is the implementation of part of F2023 new feature US 03.
Extracting tokens from a string, SPLIT intrinsic.

It's section 16.9.196 SPLIT (STRING, SET, POS [, BACK]) of Fortran 2023
Standard.

It's part of Flang issue
[#178044](https://github.com/llvm/llvm-project/issues/178044). Note that
I work with @kwyatt-ext on this issue. He implemented the other part,
TOKENIZE.

A test will be added into
[llvm-test-suite](https://github.com/llvm/llvm-test-suite) later after
this PR is merged.

Added: 
    flang/test/Lower/Intrinsics/split.f90
    flang/test/Semantics/split.f90

Modified: 
    flang-rt/lib/runtime/character.cpp
    flang-rt/unittests/Runtime/CharacterTest.cpp
    flang/docs/F202X.md
    flang/docs/FortranStandardsSupport.md
    flang/docs/Intrinsics.md
    flang/include/flang/Optimizer/Builder/IntrinsicCall.h
    flang/include/flang/Optimizer/Builder/Runtime/Character.h
    flang/include/flang/Runtime/character.h
    flang/lib/Evaluate/intrinsics.cpp
    flang/lib/Optimizer/Builder/IntrinsicCall.cpp
    flang/lib/Optimizer/Builder/Runtime/Character.cpp

Removed: 
    


################################################################################
diff  --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index a663643fa18cc..8980077e79a0c 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -992,6 +992,42 @@ static RT_API_ATTRS void TokenizePositionsImpl(Descriptor &first,
   }
 }
 
+// SPLIT — implemented in terms of SCAN.
+// When BACK is false, returns the position of the leftmost character in SET
+// at a position > POS, or LEN(STRING)+1 if none.
+// When BACK is true, returns the position of the rightmost character in SET
+// at a position < POS, or 0 if none.
+template <typename CHAR>
+static RT_API_ATTRS std::size_t SplitImpl(const CHAR *string,
+    std::size_t stringLen, const CHAR *set, std::size_t setLen, std::size_t pos,
+    bool back) {
+  if (back) {
+    std::size_t scanLen{pos > 1 ? pos - 1 : std::size_t{0}};
+    if (scanLen > stringLen) {
+      scanLen = stringLen;
+    }
+    if constexpr (sizeof(CHAR) == 1) {
+      return ScanVerify<false>(string, scanLen, set, setLen, true);
+    } else {
+      return ScanVerify<CHAR, CharFunc::Scan>(
+          string, scanLen, set, setLen, true);
+    }
+  } else {
+    if (pos >= stringLen) {
+      return stringLen + 1;
+    }
+    std::size_t npos;
+    if constexpr (sizeof(CHAR) == 1) {
+      npos =
+          ScanVerify<false>(string + pos, stringLen - pos, set, setLen, false);
+    } else {
+      npos = ScanVerify<CHAR, CharFunc::Scan>(
+          string + pos, stringLen - pos, set, setLen, false);
+    }
+    return npos != 0 ? pos + npos : stringLen + 1;
+  }
+}
+
 extern "C" {
 RT_EXT_API_GROUP_BEGIN
 
@@ -1375,6 +1411,21 @@ void RTDEF(TokenizePositions)(Descriptor &first, Descriptor &last,
   TokenizePositionsImpl(first, last, string, set, terminator);
 }
 
+std::size_t RTDEF(Split1)(const char *string, std::size_t stringLen,
+    const char *set, std::size_t setLen, std::size_t pos, bool back) {
+  return SplitImpl(string, stringLen, set, setLen, pos, back);
+}
+
+std::size_t RTDEF(Split2)(const char16_t *string, std::size_t stringLen,
+    const char16_t *set, std::size_t setLen, std::size_t pos, bool back) {
+  return SplitImpl(string, stringLen, set, setLen, pos, back);
+}
+
+std::size_t RTDEF(Split4)(const char32_t *string, std::size_t stringLen,
+    const char32_t *set, std::size_t setLen, std::size_t pos, bool back) {
+  return SplitImpl(string, stringLen, set, setLen, pos, back);
+}
+
 RT_EXT_API_GROUP_END
 }
 } // namespace Fortran::runtime

diff  --git a/flang-rt/unittests/Runtime/CharacterTest.cpp b/flang-rt/unittests/Runtime/CharacterTest.cpp
index 4b304a98ada1b..6577876057c3c 100644
--- a/flang-rt/unittests/Runtime/CharacterTest.cpp
+++ b/flang-rt/unittests/Runtime/CharacterTest.cpp
@@ -392,6 +392,91 @@ TYPED_TEST(SearchTests, VerifyTests) {
       "VERIFY", tests, std::get<SearchFunction<TypeParam>>(functions));
 }
 
+// Test SPLIT()
+template <typename CHAR>
+using SplitFunction = std::function<std::size_t(
+    const CHAR *, std::size_t, const CHAR *, std::size_t, std::size_t, bool)>;
+using SplitFunctions = std::tuple<SplitFunction<char>, SplitFunction<char16_t>,
+    SplitFunction<char32_t>>;
+struct SplitTestCase {
+  const char *string, *set;
+  std::size_t pos;
+  bool back;
+  std::size_t expect;
+};
+
+template <typename CHAR>
+void RunSplitTests(const char *which,
+    const std::vector<SplitTestCase> &testCases,
+    const SplitFunction<CHAR> &function) {
+  for (const auto &t : testCases) {
+    std::size_t strLen{std::strlen(t.string)}, setLen{std::strlen(t.set)};
+    std::basic_string<CHAR> str{t.string, t.string + strLen};
+    std::basic_string<CHAR> set{t.set, t.set + setLen};
+    auto got{function(str.data(), strLen, set.data(), setLen, t.pos, t.back)};
+    ASSERT_EQ(got, t.expect)
+        << which << "('" << t.string << "','" << t.set << "',pos=" << t.pos
+        << ",back=" << t.back << ") for CHARACTER(kind=" << sizeof(CHAR)
+        << "): got " << got << ", expected " << t.expect;
+  }
+}
+
+template <typename CHAR> struct SplitTests : public ::testing::Test {};
+TYPED_TEST_SUITE(SplitTests, CharacterTypes, );
+
+TYPED_TEST(SplitTests, SplitForward) {
+  static SplitFunctions functions{
+      RTNAME(Split1), RTNAME(Split2), RTNAME(Split4)};
+  static std::vector<SplitTestCase> tests{
+      // "one,two,three" with set=","
+      // Forward scanning: from pos=0, find first ',' at position 4
+      {"one,two,three", ",", 0, false, 4},
+      // From pos=4, find next ',' at position 8
+      {"one,two,three", ",", 4, false, 8},
+      // From pos=8, no more ',', return len+1=14
+      {"one,two,three", ",", 8, false, 14},
+      // Empty string
+      {"", ",", 0, false, 1},
+      // No delimiters in string
+      {"abc", ",", 0, false, 4},
+      // String is all delimiters
+      {",,", ",", 0, false, 1},
+      {",,", ",", 1, false, 2},
+      {",,", ",", 2, false, 3},
+      // pos at end of string
+      {"abc", ",", 3, false, 4},
+      // Multiple delimiter characters in set
+      {"a,b;c", ",;", 0, false, 2},
+      {"a,b;c", ",;", 2, false, 4},
+      {"a,b;c", ",;", 4, false, 6},
+  };
+  RunSplitTests(
+      "SPLIT(forward)", tests, std::get<SplitFunction<TypeParam>>(functions));
+}
+
+TYPED_TEST(SplitTests, SplitBackward) {
+  static SplitFunctions functions{
+      RTNAME(Split1), RTNAME(Split2), RTNAME(Split4)};
+  static std::vector<SplitTestCase> tests{
+      // "one,two,three" with set=","
+      // Backward scanning: from pos=14 (len+1), find last ',' at position 8
+      {"one,two,three", ",", 14, true, 8},
+      // From pos=8, find previous ',' at position 4
+      {"one,two,three", ",", 8, true, 4},
+      // From pos=4, no ',' before position 4, return 0
+      {"one,two,three", ",", 4, true, 0},
+      // Empty string
+      {"", ",", 1, true, 0},
+      // pos=0 or pos=1 should return 0
+      {"abc", ",", 0, true, 0},
+      {"abc", ",", 1, true, 0},
+      // No delimiters in string
+      {"abc", ",", 4, true, 0},
+  };
+  RunSplitTests(
+      "SPLIT(backward)", tests, std::get<SplitFunction<TypeParam>>(functions));
+}
+
 // Test REPEAT()
 template <typename CHAR> struct RepeatTests : public ::testing::Test {};
 TYPED_TEST_SUITE(RepeatTests, CharacterTypes, );

diff  --git a/flang/docs/F202X.md b/flang/docs/F202X.md
index d1940a1858db1..988c0e9f083e0 100644
--- a/flang/docs/F202X.md
+++ b/flang/docs/F202X.md
@@ -284,47 +284,18 @@ arguments or results with conversion factors.
 
 `SELECTED_LOGICAL_KIND` maps a bit size to a kind of `LOGICAL`
 
-There are two new character utility intrinsic
-functions whose implementations have very low priority: `SPLIT` and `TOKENIZE`.
-`TOKENIZE` requires memory allocation to return its results,
-and could and should have been implemented once in some Fortran utility
-library for those who need a slow tokenization facility rather than
-requiring implementations in each vendor's runtime support library with
-all the extra cost and compatibility risk that entails.
-
-`SPLIT` is worse -- not only could it, like `TOKENIZE`,
-have been supplied by a Fortran utility library rather than being
-added to the standard, it's redundant;
-it provides nothing that cannot be already accomplished by
-composing today's `SCAN` intrinsic function with substring indexing:
-
-```
-module m
-  interface split
-    module procedure :: split
-  end interface
-  !instantiate for all possible ck/ik/lk combinations
-  integer, parameter :: ck = kind(''), ik = kind(0), lk = kind(.true.)
- contains
-  simple elemental subroutine split(string, set, pos, back)
-    character(*, kind=ck), intent(in) :: string, set
-    integer(kind=ik), intent(in out) :: pos
-    logical(kind=lk), intent(in), optional :: back
-    if (present(back)) then
-      if (back) then
-        pos = scan(string(:pos-1), set, .true.)
-        return
-      end if
-    end if
-    npos = scan(string(pos+1:), set)
-    pos = merge(pos + npos, len(string) + 1, npos /= 0)
-  end
-end
-```
-
-(The code above isn't a proposed implementation for `SPLIT`, just a
-demonstration of how programs could use `SCAN` to accomplish the same
-results today.)
+There are two new character utility intrinsic subroutines,
+`SPLIT` and `TOKENIZE`, both of which are now implemented.
+
+`SPLIT` scans for separator characters in a string.
+When `BACK` is absent or false, it returns the position of the leftmost
+character in `SET` whose position in `STRING` is greater than `POS`,
+or `LEN(STRING)+1` if no such character exists.
+When `BACK` is true, it returns the position of the rightmost character
+in `SET` whose position in `STRING` is less than `POS`, or 0 if no
+such character exists.
+
+`TOKENIZE` extracts tokens from a string separated by characters in a set.
 
 ## Source limitations
 

diff  --git a/flang/docs/FortranStandardsSupport.md b/flang/docs/FortranStandardsSupport.md
index f57956cd6d6b8..8a04510918e62 100644
--- a/flang/docs/FortranStandardsSupport.md
+++ b/flang/docs/FortranStandardsSupport.md
@@ -39,7 +39,7 @@ status of all important Fortran 2023 features. The table entries are based on th
 | The specifiers typeof and classof                          | N      | |
 | Conditional expressions and arguments                      | N      | |
 | More use of boz constants                                  | P      | All usages other than enum are supported |
-| Intrinsics for extracting tokens from a string             | N      | |
+| Intrinsics for extracting tokens from a string             | Y      | |
 | Intrinsics for Trig functions that work in degrees         | Y      | |
 | Intrinsics for Trig functions that work in half revolutions| Y      | |
 | Changes to system_clock                                    | N      | |

diff  --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md
index 615d2746284ab..330fcf303de0e 100644
--- a/flang/docs/Intrinsics.md
+++ b/flang/docs/Intrinsics.md
@@ -361,6 +361,24 @@ that is present in `SET`, or zero if none is.
 `VERIFY` is essentially the opposite: it returns the index of the first (or last) character
 in `STRING` that is *not* present in `SET`, or zero if all are.
 
+### Character intrinsic subroutines (Fortran 2023)
+```
+CALL SPLIT(CHARACTER(k,n) STRING, CHARACTER(k,m) SET, INTEGER(any) POS, LOGICAL(any) BACK=.FALSE.)
+CALL TOKENIZE(CHARACTER(k,n) STRING, CHARACTER(k,m) SET, CHARACTER(k,:) TOKENS(:) [, SEPARATOR])
+CALL TOKENIZE(CHARACTER(k,n) STRING, CHARACTER(k,m) SET, INTEGER FIRST(:), INTEGER LAST(:))
+```
+
+`SPLIT` scans for separator characters in `STRING` from the set `SET`.
+When `BACK` is absent or `.FALSE.`, it returns (in `POS`) the position of the
+leftmost character in `SET` whose position in `STRING` is greater than `POS`,
+or `LEN(STRING)+1` if no such character exists.
+When `BACK` is `.TRUE.`, it returns the position of the rightmost character in
+`SET` whose position in `STRING` is less than `POS`, or 0 if no such character exists.
+
+`TOKENIZE` extracts tokens from `STRING` delimited by characters in `SET`.
+In Form 1, it returns the tokens as an array of characters and optionally the separator characters.
+In Form 2, it returns the starting and ending positions of each token.
+
 ## Transformational intrinsic functions
 
 This category comprises a large collection of intrinsic functions that

diff  --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 3ef4045518cc4..ca9677a8cb2b1 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -390,6 +390,7 @@ struct IntrinsicLibrary {
   fir::ExtendedValue genSizeOf(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   mlir::Value genSpacing(mlir::Type resultType,
                          llvm::ArrayRef<mlir::Value> args);
+  void genSplit(llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genSpread(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genStorageSize(mlir::Type,
                                     llvm::ArrayRef<fir::ExtendedValue>);

diff  --git a/flang/include/flang/Optimizer/Builder/Runtime/Character.h b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
index 684b7498e725a..b365b0bce31df 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Character.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
@@ -142,6 +142,15 @@ mlir::Value genVerify(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
                       mlir::Value setBase, mlir::Value setLen,
                       mlir::Value back);
 
+/// Generate call to the SPLIT runtime routine that is specialized on
+/// \param kind.
+/// The \param kind represents the kind of the elements in the strings.
+/// Updates \p pos to the next separator position.
+mlir::Value genSplit(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
+                     mlir::Value stringBase, mlir::Value stringLen,
+                     mlir::Value setBase, mlir::Value setLen, mlir::Value pos,
+                     mlir::Value back);
+
 /// Generate call to TOKENIZE runtime (Form 1).
 /// Splits \p stringBox into tokens based on separator characters in \p setBox.
 /// \p tokensBox must be an unallocated allocatable array that receives the

diff  --git a/flang/include/flang/Runtime/character.h b/flang/include/flang/Runtime/character.h
index 360418b7d5531..93f36d077a532 100644
--- a/flang/include/flang/Runtime/character.h
+++ b/flang/include/flang/Runtime/character.h
@@ -137,6 +137,15 @@ void RTDECL(Tokenize)(Descriptor &tokens, Descriptor *separator,
 void RTDECL(TokenizePositions)(Descriptor &first, Descriptor &last,
     const Descriptor &string, const Descriptor &set,
     const char *sourceFile = nullptr, int sourceLine = 0);
+
+std::size_t RTDECL(Split1)(const char *string, std::size_t stringLen,
+    const char *set, std::size_t setLen, std::size_t pos, bool back = false);
+std::size_t RTDECL(Split2)(const char16_t *string, std::size_t stringLen,
+    const char16_t *set, std::size_t setLen, std::size_t pos,
+    bool back = false);
+std::size_t RTDECL(Split4)(const char32_t *string, std::size_t stringLen,
+    const char32_t *set, std::size_t setLen, std::size_t pos,
+    bool back = false);
 }
 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_CHARACTER_H_

diff  --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index 5659c5ae7f2d6..84cd2288fcd0b 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -1750,6 +1750,16 @@ static const IntrinsicInterface intrinsicSubroutine[]{
         {{"seconds", AnyInt, Rank::scalar, Optionality::required,
             common::Intent::In}},
         {}, Rank::elemental, IntrinsicClass::impureSubroutine},
+    {"split",
+        {{"string", SameCharNoLen, Rank::scalar, Optionality::required,
+             common::Intent::In},
+            {"set", SameCharNoLen, Rank::scalar, Optionality::required,
+                common::Intent::In},
+            {"pos", AnyInt, Rank::scalar, Optionality::required,
+                common::Intent::InOut},
+            {"back", AnyLogical, Rank::scalar, Optionality::optional,
+                common::Intent::In}},
+        {}, Rank::elemental, IntrinsicClass::pureSubroutine},
     {"tokenize",
         {{"string", SameCharNoLen, Rank::scalar, Optionality::required,
              common::Intent::In},

diff  --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index d57a2468dcffe..d6dee88f422e0 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -764,6 +764,13 @@ static constexpr IntrinsicHandler handlers[]{
      /*isElemental=*/false},
     {"sleep", &I::genSleep, {{{"seconds", asValue}}}, /*isElemental=*/false},
     {"spacing", &I::genSpacing},
+    {"split",
+     &I::genSplit,
+     {{{"string", asAddr},
+       {"set", asAddr},
+       {"pos", asAddr},
+       {"back", asValue, handleDynamicOptional}}},
+     /*isElemental=*/false},
     {"spread",
      &I::genSpread,
      {{{"source", asBox}, {"dim", asValue}, {"ncopies", asValue}}},
@@ -8538,6 +8545,39 @@ void IntrinsicLibrary::genSleep(llvm::ArrayRef<fir::ExtendedValue> args) {
   fir::runtime::genSleep(builder, loc, fir::getBase(args[0]));
 }
 
+// SPLIT
+void IntrinsicLibrary::genSplit(llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 4);
+
+  mlir::Value stringBase = fir::getBase(args[0]);
+  mlir::Value stringLen = fir::getLen(args[0]);
+  mlir::Value setBase = fir::getBase(args[1]);
+  mlir::Value setLen = fir::getLen(args[1]);
+  mlir::Value posAddr = fir::getBase(args[2]);
+
+  fir::KindTy kind =
+      fir::factory::CharacterExprHelper{builder, loc}.getCharacterKind(
+          stringBase.getType());
+
+  // BACK is optional and defaults to .FALSE. when absent.
+  mlir::Value back =
+      isStaticallyAbsent(args[3])
+          ? builder.createIntegerConstant(loc, builder.getI1Type(), 0)
+          : fir::getBase(args[3]);
+
+  mlir::Type posRefTy = fir::dyn_cast_ptrEleTy(posAddr.getType());
+  mlir::Value posValue = fir::LoadOp::create(builder, loc, posRefTy, posAddr);
+  mlir::Type indexTy = builder.getIndexType();
+  mlir::Value posIndex = builder.createConvert(loc, indexTy, posValue);
+
+  mlir::Value newPos =
+      fir::runtime::genSplit(builder, loc, kind, stringBase, stringLen, setBase,
+                             setLen, posIndex, back);
+
+  mlir::Value newPosConverted = builder.createConvert(loc, posRefTy, newPos);
+  fir::StoreOp::create(builder, loc, newPosConverted, posAddr);
+}
+
 // TOKENIZE
 void IntrinsicLibrary::genTokenize(llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 4 && "TOKENIZE requires 3 or 4 arguments");

diff  --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
index 28e795b8de759..c77374986010c 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
@@ -339,3 +339,30 @@ mlir::Value fir::runtime::genVerify(fir::FirOpBuilder &builder,
                                             stringLen, setBase, setLen, back);
   return fir::CallOp::create(builder, loc, func, args).getResult(0);
 }
+
+mlir::Value fir::runtime::genSplit(fir::FirOpBuilder &builder,
+                                   mlir::Location loc, int kind,
+                                   mlir::Value stringBase,
+                                   mlir::Value stringLen, mlir::Value setBase,
+                                   mlir::Value setLen, mlir::Value pos,
+                                   mlir::Value back) {
+  mlir::func::FuncOp func;
+  switch (kind) {
+  case 1:
+    func = fir::runtime::getRuntimeFunc<mkRTKey(Split1)>(loc, builder);
+    break;
+  case 2:
+    func = fir::runtime::getRuntimeFunc<mkRTKey(Split2)>(loc, builder);
+    break;
+  case 4:
+    func = fir::runtime::getRuntimeFunc<mkRTKey(Split4)>(loc, builder);
+    break;
+  default:
+    fir::emitFatalError(
+        loc, "unsupported CHARACTER kind value. Runtime expects 1, 2, or 4.");
+  }
+  auto fTy = func.getFunctionType();
+  auto args = fir::runtime::createArguments(
+      builder, loc, fTy, stringBase, stringLen, setBase, setLen, pos, back);
+  return fir::CallOp::create(builder, loc, func, args).getResult(0);
+}

diff  --git a/flang/test/Lower/Intrinsics/split.f90 b/flang/test/Lower/Intrinsics/split.f90
new file mode 100644
index 0000000000000..88fdb7e9a400a
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/split.f90
@@ -0,0 +1,45 @@
+! RUN: %flang_fc1 -emit-hlfir %s -o - | FileCheck %s
+
+! CHECK-LABEL: split_basic
+subroutine split_basic()
+  implicit none
+  character(20) :: string
+  character(5) :: set
+  integer :: pos
+  string = "one,two,three"
+  set = ","
+  pos = 0
+  call split(string, set, pos)
+  ! CHECK: %[[BACK:.*]] = arith.constant false
+  ! CHECK: %[[POS:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+  ! CHECK: %[[POS_IDX:.*]] = fir.convert %[[POS]] : (i32) -> index
+  ! CHECK: %[[STRING:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.char<1,20>>) -> !fir.ref<i8>
+  ! CHECK: %[[SET:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.char<1,5>>) -> !fir.ref<i8>
+  ! CHECK: %[[POS_I64:.*]] = fir.convert %[[POS_IDX]] : (index) -> i64
+  ! CHECK: %[[RESULT:.*]] = fir.call @_FortranASplit1(%[[STRING]], %{{.*}}, %[[SET]], %{{.*}}, %[[POS_I64]], %[[BACK]]) {{.*}} : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1) -> i64
+  ! CHECK: %[[RESULT_I32:.*]] = fir.convert %[[RESULT]] : (i64) -> i32
+  ! CHECK: fir.store %[[RESULT_I32]] to %{{.*}} : !fir.ref<i32>
+end subroutine split_basic
+
+! CHECK-LABEL: split_back
+subroutine split_back()
+  implicit none
+  character(20) :: string
+  character(5) :: set
+  integer :: pos
+  logical :: back
+  string = "one,two,three"
+  set = ","
+  pos = 14
+  back = .true.
+  call split(string, set, pos, back)
+  ! CHECK: %[[POS:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+  ! CHECK: %[[POS_IDX:.*]] = fir.convert %[[POS]] : (i32) -> index
+  ! CHECK: %[[STRING:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.char<1,20>>) -> !fir.ref<i8>
+  ! CHECK: %[[SET:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.char<1,5>>) -> !fir.ref<i8>
+  ! CHECK: %[[POS_I64:.*]] = fir.convert %[[POS_IDX]] : (index) -> i64
+  ! CHECK: %[[BACK_CVT:.*]] = fir.convert %{{.*}} : (!fir.logical<4>) -> i1
+  ! CHECK: %[[RESULT:.*]] = fir.call @_FortranASplit1(%[[STRING]], %{{.*}}, %[[SET]], %{{.*}}, %[[POS_I64]], %[[BACK_CVT]]) {{.*}} : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1) -> i64
+  ! CHECK: %[[RESULT_I32:.*]] = fir.convert %[[RESULT]] : (i64) -> i32
+  ! CHECK: fir.store %[[RESULT_I32]] to %{{.*}} : !fir.ref<i32>
+end subroutine split_back

diff  --git a/flang/test/Semantics/split.f90 b/flang/test/Semantics/split.f90
new file mode 100644
index 0000000000000..60e35753c6d40
--- /dev/null
+++ b/flang/test/Semantics/split.f90
@@ -0,0 +1,98 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+! Check for semantic errors in split() subroutine calls
+! Based on Fortran 2023 standard requirements
+
+program test_split_errors
+  implicit none
+
+  character(20) :: string
+  character(5) :: set
+  integer :: pos
+  logical :: back
+
+  ! Valid declarations for testing
+  integer :: int_scalar
+  real :: real_scalar
+  character(10) :: string_array(5)
+  character(5) :: set_array(5)
+  character(len=20, kind=2) :: string_k2
+  character(len=5, kind=2) :: set_k2
+  character(len=20, kind=4) :: string_k4
+  character(len=5, kind=4) :: set_k4
+
+  !========================================================================
+  ! Valid calls (reference)
+  !========================================================================
+
+  call split(string, set, pos)
+  call split(string, set, pos, back)
+  call split("hello world", " ", pos)
+  call split("hello world", " ", pos, .false.)
+
+  ! Valid calls with 
diff erent character kinds
+  call split(string_k2, set_k2, pos)
+  call split(string_k2, set_k2, pos, back)
+  call split(string_k4, set_k4, pos)
+  call split(string_k4, set_k4, pos, back)
+
+  !========================================================================
+  ! Wrong types for STRING argument
+  !========================================================================
+
+  !ERROR: Actual argument for 'string=' has bad type 'INTEGER(4)'
+  call split(int_scalar, set, pos)
+
+  !ERROR: Actual argument for 'string=' has bad type 'REAL(4)'
+  call split(real_scalar, set, pos)
+
+  !========================================================================
+  ! Wrong rank for STRING (must be scalar)
+  !========================================================================
+
+  !ERROR: 'string=' argument has unacceptable rank 1
+  call split(string_array, set, pos)
+
+  !========================================================================
+  ! Wrong types for SET argument
+  !========================================================================
+
+  !ERROR: Actual argument for 'set=' has bad type 'INTEGER(4)'
+  call split(string, int_scalar, pos)
+
+  !ERROR: Actual argument for 'set=' has bad type 'REAL(4)'
+  call split(string, real_scalar, pos)
+
+  !========================================================================
+  ! Wrong types for POS argument
+  !========================================================================
+
+  !ERROR: Actual argument for 'pos=' has bad type 'REAL(4)'
+  call split(string, set, real_scalar)
+
+  !========================================================================
+  ! Wrong types for BACK argument
+  !========================================================================
+
+  !ERROR: Actual argument for 'back=' has bad type 'INTEGER(4)'
+  call split(string, set, pos, int_scalar)
+
+  !========================================================================
+  ! Character kind mismatches between STRING and SET
+  !========================================================================
+
+  !ERROR: Actual argument for 'set=' has bad type or kind 'CHARACTER(KIND=1,LEN=5_8)'
+  call split(string_k2, set, pos)
+
+  !ERROR: Actual argument for 'set=' has bad type or kind 'CHARACTER(KIND=2,LEN=5_8)'
+  call split(string, set_k2, pos)
+
+  !ERROR: Actual argument for 'set=' has bad type or kind 'CHARACTER(KIND=1,LEN=5_8)'
+  call split(string_k4, set, pos)
+
+  !ERROR: Actual argument for 'set=' has bad type or kind 'CHARACTER(KIND=4,LEN=5_8)'
+  call split(string, set_k4, pos)
+
+  !ERROR: Actual argument for 'set=' has bad type or kind 'CHARACTER(KIND=4,LEN=5_8)'
+  call split(string_k2, set_k4, pos)
+
+end program test_split_errors


        


More information about the flang-commits mailing list