[flang] [llvm] [flang] Implement SPLIT intrinsic subroutine with tests (PR #185584)

via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 9 23:58:24 PDT 2026


https://github.com/laoshd updated https://github.com/llvm/llvm-project/pull/185584

>From 335b9515c2e41b78b44cd1221c9391c3a3fc9422 Mon Sep 17 00:00:00 2001
From: Shandong Lao <shandong.lao at hpe.com>
Date: Tue, 10 Mar 2026 01:07:14 -0500
Subject: [PATCH 1/2] [flang] Implement SPLIT intrinsic subroutine with tests

---
 flang-rt/lib/runtime/character.cpp            | 64 ++++++++++++++
 flang-rt/unittests/Runtime/CharacterTest.cpp  | 85 +++++++++++++++++++
 flang/docs/F202X.md                           | 53 +++---------
 flang/docs/FortranStandardsSupport.md         |  2 +-
 flang/docs/Intrinsics.md                      | 18 ++++
 .../flang/Optimizer/Builder/IntrinsicCall.h   |  1 +
 .../Optimizer/Builder/Runtime/Character.h     |  9 ++
 flang/include/flang/Runtime/character.h       |  9 ++
 flang/lib/Evaluate/intrinsics.cpp             | 10 +++
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 51 +++++++++++
 .../Optimizer/Builder/Runtime/Character.cpp   | 28 ++++++
 flang/test/Lower/Intrinsics/split.f90         | 37 ++++++++
 flang/test/Semantics/split.f90                | 69 +++++++++++++++
 13 files changed, 394 insertions(+), 42 deletions(-)
 create mode 100644 flang/test/Lower/Intrinsics/split.f90
 create mode 100644 flang/test/Semantics/split.f90

diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index a663643fa18cc..39b58465e9156 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -992,6 +992,55 @@ static RT_API_ATTRS void TokenizePositionsImpl(Descriptor &first,
   }
 }
 
+// SPLIT - scans for the next separator character in STRING.
+// When BACK is false (or absent), returns the position of the leftmost
+// character in SET whose position in STRING is greater than POS, or
+// LEN(STRING)+1 if no such character exists.
+// When BACK is true, returns the position of the rightmost character in
+// SET whose position in STRING is less than POS, or 0 if no such
+// character exists.
+template <typename CHAR>
+static RT_API_ATTRS std::size_t SplitImpl(const CHAR *string,
+    std::size_t stringLen, const CHAR *set, std::size_t setLen,
+    std::size_t pos, bool back) {
+  if (back) {
+    // Scan backwards from position pos-1 (1-indexed pos means index pos-2)
+    // looking for the rightmost separator at position < pos.
+    if (pos <= 1) {
+      return 0;
+    }
+    std::size_t scanLen = pos - 1; // number of characters to scan
+    if (scanLen > stringLen) {
+      scanLen = stringLen;
+    }
+    for (std::size_t i = scanLen; i > 0; --i) {
+      CHAR ch = string[i - 1];
+      for (std::size_t j = 0; j < setLen; ++j) {
+        if (set[j] == ch) {
+          return i; // 1-indexed position
+        }
+      }
+    }
+    return 0;
+  } else {
+    // Scan forward from position pos+1 (1-indexed) looking for the
+    // leftmost separator at position > pos.
+    if (pos >= stringLen) {
+      return stringLen + 1;
+    }
+    std::size_t startIdx = pos; // 0-indexed start = pos (since pos is 1-indexed and we want pos+1)
+    for (std::size_t i = startIdx; i < stringLen; ++i) {
+      CHAR ch = string[i];
+      for (std::size_t j = 0; j < setLen; ++j) {
+        if (set[j] == ch) {
+          return i + 1; // convert to 1-indexed
+        }
+      }
+    }
+    return stringLen + 1;
+  }
+}
+
 extern "C" {
 RT_EXT_API_GROUP_BEGIN
 
@@ -1375,6 +1424,21 @@ void RTDEF(TokenizePositions)(Descriptor &first, Descriptor &last,
   TokenizePositionsImpl(first, last, string, set, terminator);
 }
 
+std::size_t RTDEF(Split1)(const char *string, std::size_t stringLen,
+    const char *set, std::size_t setLen, std::size_t pos, bool back) {
+  return SplitImpl(string, stringLen, set, setLen, pos, back);
+}
+
+std::size_t RTDEF(Split2)(const char16_t *string, std::size_t stringLen,
+    const char16_t *set, std::size_t setLen, std::size_t pos, bool back) {
+  return SplitImpl(string, stringLen, set, setLen, pos, back);
+}
+
+std::size_t RTDEF(Split4)(const char32_t *string, std::size_t stringLen,
+    const char32_t *set, std::size_t setLen, std::size_t pos, bool back) {
+  return SplitImpl(string, stringLen, set, setLen, pos, back);
+}
+
 RT_EXT_API_GROUP_END
 }
 } // namespace Fortran::runtime
diff --git a/flang-rt/unittests/Runtime/CharacterTest.cpp b/flang-rt/unittests/Runtime/CharacterTest.cpp
index 4b304a98ada1b..6577876057c3c 100644
--- a/flang-rt/unittests/Runtime/CharacterTest.cpp
+++ b/flang-rt/unittests/Runtime/CharacterTest.cpp
@@ -392,6 +392,91 @@ TYPED_TEST(SearchTests, VerifyTests) {
       "VERIFY", tests, std::get<SearchFunction<TypeParam>>(functions));
 }
 
+// Test SPLIT()
+template <typename CHAR>
+using SplitFunction = std::function<std::size_t(
+    const CHAR *, std::size_t, const CHAR *, std::size_t, std::size_t, bool)>;
+using SplitFunctions = std::tuple<SplitFunction<char>, SplitFunction<char16_t>,
+    SplitFunction<char32_t>>;
+struct SplitTestCase {
+  const char *string, *set;
+  std::size_t pos;
+  bool back;
+  std::size_t expect;
+};
+
+template <typename CHAR>
+void RunSplitTests(const char *which,
+    const std::vector<SplitTestCase> &testCases,
+    const SplitFunction<CHAR> &function) {
+  for (const auto &t : testCases) {
+    std::size_t strLen{std::strlen(t.string)}, setLen{std::strlen(t.set)};
+    std::basic_string<CHAR> str{t.string, t.string + strLen};
+    std::basic_string<CHAR> set{t.set, t.set + setLen};
+    auto got{function(str.data(), strLen, set.data(), setLen, t.pos, t.back)};
+    ASSERT_EQ(got, t.expect)
+        << which << "('" << t.string << "','" << t.set << "',pos=" << t.pos
+        << ",back=" << t.back << ") for CHARACTER(kind=" << sizeof(CHAR)
+        << "): got " << got << ", expected " << t.expect;
+  }
+}
+
+template <typename CHAR> struct SplitTests : public ::testing::Test {};
+TYPED_TEST_SUITE(SplitTests, CharacterTypes, );
+
+TYPED_TEST(SplitTests, SplitForward) {
+  static SplitFunctions functions{
+      RTNAME(Split1), RTNAME(Split2), RTNAME(Split4)};
+  static std::vector<SplitTestCase> tests{
+      // "one,two,three" with set=","
+      // Forward scanning: from pos=0, find first ',' at position 4
+      {"one,two,three", ",", 0, false, 4},
+      // From pos=4, find next ',' at position 8
+      {"one,two,three", ",", 4, false, 8},
+      // From pos=8, no more ',', return len+1=14
+      {"one,two,three", ",", 8, false, 14},
+      // Empty string
+      {"", ",", 0, false, 1},
+      // No delimiters in string
+      {"abc", ",", 0, false, 4},
+      // String is all delimiters
+      {",,", ",", 0, false, 1},
+      {",,", ",", 1, false, 2},
+      {",,", ",", 2, false, 3},
+      // pos at end of string
+      {"abc", ",", 3, false, 4},
+      // Multiple delimiter characters in set
+      {"a,b;c", ",;", 0, false, 2},
+      {"a,b;c", ",;", 2, false, 4},
+      {"a,b;c", ",;", 4, false, 6},
+  };
+  RunSplitTests(
+      "SPLIT(forward)", tests, std::get<SplitFunction<TypeParam>>(functions));
+}
+
+TYPED_TEST(SplitTests, SplitBackward) {
+  static SplitFunctions functions{
+      RTNAME(Split1), RTNAME(Split2), RTNAME(Split4)};
+  static std::vector<SplitTestCase> tests{
+      // "one,two,three" with set=","
+      // Backward scanning: from pos=14 (len+1), find last ',' at position 8
+      {"one,two,three", ",", 14, true, 8},
+      // From pos=8, find previous ',' at position 4
+      {"one,two,three", ",", 8, true, 4},
+      // From pos=4, no ',' before position 4, return 0
+      {"one,two,three", ",", 4, true, 0},
+      // Empty string
+      {"", ",", 1, true, 0},
+      // pos=0 or pos=1 should return 0
+      {"abc", ",", 0, true, 0},
+      {"abc", ",", 1, true, 0},
+      // No delimiters in string
+      {"abc", ",", 4, true, 0},
+  };
+  RunSplitTests(
+      "SPLIT(backward)", tests, std::get<SplitFunction<TypeParam>>(functions));
+}
+
 // Test REPEAT()
 template <typename CHAR> struct RepeatTests : public ::testing::Test {};
 TYPED_TEST_SUITE(RepeatTests, CharacterTypes, );
diff --git a/flang/docs/F202X.md b/flang/docs/F202X.md
index d1940a1858db1..988c0e9f083e0 100644
--- a/flang/docs/F202X.md
+++ b/flang/docs/F202X.md
@@ -284,47 +284,18 @@ arguments or results with conversion factors.
 
 `SELECTED_LOGICAL_KIND` maps a bit size to a kind of `LOGICAL`
 
-There are two new character utility intrinsic
-functions whose implementations have very low priority: `SPLIT` and `TOKENIZE`.
-`TOKENIZE` requires memory allocation to return its results,
-and could and should have been implemented once in some Fortran utility
-library for those who need a slow tokenization facility rather than
-requiring implementations in each vendor's runtime support library with
-all the extra cost and compatibility risk that entails.
-
-`SPLIT` is worse -- not only could it, like `TOKENIZE`,
-have been supplied by a Fortran utility library rather than being
-added to the standard, it's redundant;
-it provides nothing that cannot be already accomplished by
-composing today's `SCAN` intrinsic function with substring indexing:
-
-```
-module m
-  interface split
-    module procedure :: split
-  end interface
-  !instantiate for all possible ck/ik/lk combinations
-  integer, parameter :: ck = kind(''), ik = kind(0), lk = kind(.true.)
- contains
-  simple elemental subroutine split(string, set, pos, back)
-    character(*, kind=ck), intent(in) :: string, set
-    integer(kind=ik), intent(in out) :: pos
-    logical(kind=lk), intent(in), optional :: back
-    if (present(back)) then
-      if (back) then
-        pos = scan(string(:pos-1), set, .true.)
-        return
-      end if
-    end if
-    npos = scan(string(pos+1:), set)
-    pos = merge(pos + npos, len(string) + 1, npos /= 0)
-  end
-end
-```
-
-(The code above isn't a proposed implementation for `SPLIT`, just a
-demonstration of how programs could use `SCAN` to accomplish the same
-results today.)
+There are two new character utility intrinsic subroutines,
+`SPLIT` and `TOKENIZE`, both of which are now implemented.
+
+`SPLIT` scans for separator characters in a string.
+When `BACK` is absent or false, it returns the position of the leftmost
+character in `SET` whose position in `STRING` is greater than `POS`,
+or `LEN(STRING)+1` if no such character exists.
+When `BACK` is true, it returns the position of the rightmost character
+in `SET` whose position in `STRING` is less than `POS`, or 0 if no
+such character exists.
+
+`TOKENIZE` extracts tokens from a string separated by characters in a set.
 
 ## Source limitations
 
diff --git a/flang/docs/FortranStandardsSupport.md b/flang/docs/FortranStandardsSupport.md
index f57956cd6d6b8..db66df5670e65 100644
--- a/flang/docs/FortranStandardsSupport.md
+++ b/flang/docs/FortranStandardsSupport.md
@@ -39,7 +39,7 @@ status of all important Fortran 2023 features. The table entries are based on th
 | The specifiers typeof and classof                          | N      | |
 | Conditional expressions and arguments                      | N      | |
 | More use of boz constants                                  | P      | All usages other than enum are supported |
-| Intrinsics for extracting tokens from a string             | N      | |
+| Intrinsics for extracting tokens from a string             | Y      | SPLIT, TOKENIZE |
 | Intrinsics for Trig functions that work in degrees         | Y      | |
 | Intrinsics for Trig functions that work in half revolutions| Y      | |
 | Changes to system_clock                                    | N      | |
diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md
index 615d2746284ab..330fcf303de0e 100644
--- a/flang/docs/Intrinsics.md
+++ b/flang/docs/Intrinsics.md
@@ -361,6 +361,24 @@ that is present in `SET`, or zero if none is.
 `VERIFY` is essentially the opposite: it returns the index of the first (or last) character
 in `STRING` that is *not* present in `SET`, or zero if all are.
 
+### Character intrinsic subroutines (Fortran 2023)
+```
+CALL SPLIT(CHARACTER(k,n) STRING, CHARACTER(k,m) SET, INTEGER(any) POS, LOGICAL(any) BACK=.FALSE.)
+CALL TOKENIZE(CHARACTER(k,n) STRING, CHARACTER(k,m) SET, CHARACTER(k,:) TOKENS(:) [, SEPARATOR])
+CALL TOKENIZE(CHARACTER(k,n) STRING, CHARACTER(k,m) SET, INTEGER FIRST(:), INTEGER LAST(:))
+```
+
+`SPLIT` scans for separator characters in `STRING` from the set `SET`.
+When `BACK` is absent or `.FALSE.`, it returns (in `POS`) the position of the
+leftmost character in `SET` whose position in `STRING` is greater than `POS`,
+or `LEN(STRING)+1` if no such character exists.
+When `BACK` is `.TRUE.`, it returns the position of the rightmost character in
+`SET` whose position in `STRING` is less than `POS`, or 0 if no such character exists.
+
+`TOKENIZE` extracts tokens from `STRING` delimited by characters in `SET`.
+In Form 1, it returns the tokens as an array of characters and optionally the separator characters.
+In Form 2, it returns the starting and ending positions of each token.
+
 ## Transformational intrinsic functions
 
 This category comprises a large collection of intrinsic functions that
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 3ef4045518cc4..ca9677a8cb2b1 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -390,6 +390,7 @@ struct IntrinsicLibrary {
   fir::ExtendedValue genSizeOf(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   mlir::Value genSpacing(mlir::Type resultType,
                          llvm::ArrayRef<mlir::Value> args);
+  void genSplit(llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genSpread(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genStorageSize(mlir::Type,
                                     llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Character.h b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
index 684b7498e725a..b365b0bce31df 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Character.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
@@ -142,6 +142,15 @@ mlir::Value genVerify(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
                       mlir::Value setBase, mlir::Value setLen,
                       mlir::Value back);
 
+/// Generate call to the SPLIT runtime routine that is specialized on
+/// \param kind.
+/// The \param kind represents the kind of the elements in the strings.
+/// Updates \p pos to the next separator position.
+mlir::Value genSplit(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
+                     mlir::Value stringBase, mlir::Value stringLen,
+                     mlir::Value setBase, mlir::Value setLen, mlir::Value pos,
+                     mlir::Value back);
+
 /// Generate call to TOKENIZE runtime (Form 1).
 /// Splits \p stringBox into tokens based on separator characters in \p setBox.
 /// \p tokensBox must be an unallocated allocatable array that receives the
diff --git a/flang/include/flang/Runtime/character.h b/flang/include/flang/Runtime/character.h
index 360418b7d5531..93f36d077a532 100644
--- a/flang/include/flang/Runtime/character.h
+++ b/flang/include/flang/Runtime/character.h
@@ -137,6 +137,15 @@ void RTDECL(Tokenize)(Descriptor &tokens, Descriptor *separator,
 void RTDECL(TokenizePositions)(Descriptor &first, Descriptor &last,
     const Descriptor &string, const Descriptor &set,
     const char *sourceFile = nullptr, int sourceLine = 0);
+
+std::size_t RTDECL(Split1)(const char *string, std::size_t stringLen,
+    const char *set, std::size_t setLen, std::size_t pos, bool back = false);
+std::size_t RTDECL(Split2)(const char16_t *string, std::size_t stringLen,
+    const char16_t *set, std::size_t setLen, std::size_t pos,
+    bool back = false);
+std::size_t RTDECL(Split4)(const char32_t *string, std::size_t stringLen,
+    const char32_t *set, std::size_t setLen, std::size_t pos,
+    bool back = false);
 }
 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_CHARACTER_H_
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index 2ae1c478489c4..d94ec117ac18a 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -1750,6 +1750,16 @@ static const IntrinsicInterface intrinsicSubroutine[]{
         {{"seconds", AnyInt, Rank::scalar, Optionality::required,
             common::Intent::In}},
         {}, Rank::elemental, IntrinsicClass::impureSubroutine},
+    {"split",
+        {{"string", SameCharNoLen, Rank::scalar, Optionality::required,
+             common::Intent::In},
+            {"set", SameCharNoLen, Rank::scalar, Optionality::required,
+                common::Intent::In},
+            {"pos", AnyInt, Rank::scalar, Optionality::required,
+                common::Intent::InOut},
+            {"back", AnyLogical, Rank::scalar, Optionality::optional,
+                common::Intent::In}},
+        {}, Rank::elemental, IntrinsicClass::pureSubroutine},
     {"tokenize",
         {{"string", SameCharNoLen, Rank::scalar, Optionality::required,
              common::Intent::In},
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index d67eebdd8c93c..45e5568e43ba7 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -764,6 +764,13 @@ static constexpr IntrinsicHandler handlers[]{
      /*isElemental=*/false},
     {"sleep", &I::genSleep, {{{"seconds", asValue}}}, /*isElemental=*/false},
     {"spacing", &I::genSpacing},
+    {"split",
+     &I::genSplit,
+     {{{"string", asAddr},
+       {"set", asAddr},
+       {"pos", asAddr},
+       {"back", asValue, handleDynamicOptional}}},
+     /*isElemental=*/true},
     {"spread",
      &I::genSpread,
      {{{"source", asBox}, {"dim", asValue}, {"ncopies", asValue}}},
@@ -8538,6 +8545,50 @@ void IntrinsicLibrary::genSleep(llvm::ArrayRef<fir::ExtendedValue> args) {
   fir::runtime::genSleep(builder, loc, fir::getBase(args[0]));
 }
 
+// SPLIT
+void IntrinsicLibrary::genSplit(llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 4 && "SPLIT requires 3 or 4 arguments");
+
+  // Handle required STRING base and length args
+  mlir::Value stringBase = fir::getBase(args[0]);
+  mlir::Value stringLen = fir::getLen(args[0]);
+
+  // Handle required SET string base and length args
+  mlir::Value setBase = fir::getBase(args[1]);
+  mlir::Value setLen = fir::getLen(args[1]);
+
+  // POS is INTENT(INOUT) - it's passed as an address
+  mlir::Value posAddr = fir::getBase(args[2]);
+
+  // Determine character kind
+  fir::KindTy kind =
+      fir::factory::CharacterExprHelper{builder, loc}.getCharacterKind(
+          stringBase.getType());
+
+  // Handle optional BACK argument
+  mlir::Value back =
+      isStaticallyAbsent(args[3])
+          ? builder.createIntegerConstant(loc, builder.getI1Type(), 0)
+          : fir::getBase(args[3]);
+
+  // Load current POS value
+  mlir::Type posRefTy = fir::dyn_cast_ptrEleTy(posAddr.getType());
+  mlir::Value posValue = fir::LoadOp::create(builder, loc, posRefTy, posAddr);
+
+  // Convert POS to std::size_t (index type) for the runtime call
+  mlir::Type indexTy = builder.getIndexType();
+  mlir::Value posIndex = builder.createConvert(loc, indexTy, posValue);
+
+  // Call the runtime
+  mlir::Value newPos = fir::runtime::genSplit(builder, loc, kind, stringBase,
+                                              stringLen, setBase, setLen,
+                                              posIndex, back);
+
+  // Convert result back to the POS integer type and store
+  mlir::Value newPosConverted = builder.createConvert(loc, posRefTy, newPos);
+  fir::StoreOp::create(builder, loc, newPosConverted, posAddr);
+}
+
 // TOKENIZE
 void IntrinsicLibrary::genTokenize(llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 4 && "TOKENIZE requires 3 or 4 arguments");
diff --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
index 28e795b8de759..d49d9f86c97b9 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
@@ -339,3 +339,31 @@ mlir::Value fir::runtime::genVerify(fir::FirOpBuilder &builder,
                                             stringLen, setBase, setLen, back);
   return fir::CallOp::create(builder, loc, func, args).getResult(0);
 }
+
+mlir::Value fir::runtime::genSplit(fir::FirOpBuilder &builder,
+                                   mlir::Location loc, int kind,
+                                   mlir::Value stringBase,
+                                   mlir::Value stringLen, mlir::Value setBase,
+                                   mlir::Value setLen, mlir::Value pos,
+                                   mlir::Value back) {
+  mlir::func::FuncOp func;
+  switch (kind) {
+  case 1:
+    func = fir::runtime::getRuntimeFunc<mkRTKey(Split1)>(loc, builder);
+    break;
+  case 2:
+    func = fir::runtime::getRuntimeFunc<mkRTKey(Split2)>(loc, builder);
+    break;
+  case 4:
+    func = fir::runtime::getRuntimeFunc<mkRTKey(Split4)>(loc, builder);
+    break;
+  default:
+    fir::emitFatalError(
+        loc, "unsupported CHARACTER kind value. Runtime expects 1, 2, or 4.");
+  }
+  auto fTy = func.getFunctionType();
+  auto args = fir::runtime::createArguments(builder, loc, fTy, stringBase,
+                                            stringLen, setBase, setLen, pos,
+                                            back);
+  return fir::CallOp::create(builder, loc, func, args).getResult(0);
+}
diff --git a/flang/test/Lower/Intrinsics/split.f90 b/flang/test/Lower/Intrinsics/split.f90
new file mode 100644
index 0000000000000..d8a55a547c9d0
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/split.f90
@@ -0,0 +1,37 @@
+! RUN: %flang_fc1 -emit-hlfir %s -o - | FileCheck %s
+
+! CHECK-LABEL: split_basic
+subroutine split_basic()
+  implicit none
+  character(20) :: string
+  character(5) :: set
+  integer :: pos
+  string = "one,two,three"
+  set = ","
+  pos = 0
+  call split(string, set, pos)
+  ! CHECK: %[[BACK:.*]] = arith.constant false
+  ! CHECK: %[[POS:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+  ! CHECK: %[[POS_IDX:.*]] = fir.convert %[[POS]] : (i32) -> index
+  ! CHECK: %[[STRING:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.char<1,20>>) -> !fir.ref<i8>
+  ! CHECK: %[[SET:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.char<1,5>>) -> !fir.ref<i8>
+  ! CHECK: %[[POS_I64:.*]] = fir.convert %[[POS_IDX]] : (index) -> i64
+  ! CHECK: %[[RESULT:.*]] = fir.call @_FortranASplit1(%[[STRING]], %{{.*}}, %[[SET]], %{{.*}}, %[[POS_I64]], %[[BACK]]) {{.*}} : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1) -> i64
+  ! CHECK: %[[RESULT_I32:.*]] = fir.convert %[[RESULT]] : (i64) -> i32
+  ! CHECK: fir.store %[[RESULT_I32]] to %{{.*}} : !fir.ref<i32>
+end subroutine split_basic
+
+! CHECK-LABEL: split_back
+subroutine split_back()
+  implicit none
+  character(20) :: string
+  character(5) :: set
+  integer :: pos
+  logical :: back
+  string = "one,two,three"
+  set = ","
+  pos = 14
+  back = .true.
+  call split(string, set, pos, back)
+  ! CHECK: fir.call @_FortranASplit1(
+end subroutine split_back
diff --git a/flang/test/Semantics/split.f90 b/flang/test/Semantics/split.f90
new file mode 100644
index 0000000000000..f0388584010f6
--- /dev/null
+++ b/flang/test/Semantics/split.f90
@@ -0,0 +1,69 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+! Check for semantic errors in split() subroutine calls
+! Based on Fortran 2023 standard requirements
+
+program test_split_errors
+  implicit none
+
+  character(20) :: string
+  character(5) :: set
+  integer :: pos
+  logical :: back
+
+  ! Valid declarations for testing
+  integer :: int_scalar
+  real :: real_scalar
+  character(10) :: string_array(5)
+  character(5) :: set_array(5)
+
+  !========================================================================
+  ! Valid calls (reference)
+  !========================================================================
+
+  call split(string, set, pos)
+  call split(string, set, pos, back)
+  call split("hello world", " ", pos)
+  call split("hello world", " ", pos, .false.)
+
+  !========================================================================
+  ! Wrong types for STRING argument
+  !========================================================================
+
+  !ERROR: Actual argument for 'string=' has bad type 'INTEGER(4)'
+  call split(int_scalar, set, pos)
+
+  !ERROR: Actual argument for 'string=' has bad type 'REAL(4)'
+  call split(real_scalar, set, pos)
+
+  !========================================================================
+  ! Wrong rank for STRING (must be scalar)
+  !========================================================================
+
+  !ERROR: 'string=' argument has unacceptable rank 1
+  call split(string_array, set, pos)
+
+  !========================================================================
+  ! Wrong types for SET argument
+  !========================================================================
+
+  !ERROR: Actual argument for 'set=' has bad type 'INTEGER(4)'
+  call split(string, int_scalar, pos)
+
+  !ERROR: Actual argument for 'set=' has bad type 'REAL(4)'
+  call split(string, real_scalar, pos)
+
+  !========================================================================
+  ! Wrong types for POS argument
+  !========================================================================
+
+  !ERROR: Actual argument for 'pos=' has bad type 'REAL(4)'
+  call split(string, set, real_scalar)
+
+  !========================================================================
+  ! Wrong types for BACK argument
+  !========================================================================
+
+  !ERROR: Actual argument for 'back=' has bad type 'INTEGER(4)'
+  call split(string, set, pos, int_scalar)
+
+end program test_split_errors

>From 0ede8cdabb587686cb4e274b7c12dd2ef0cfc015 Mon Sep 17 00:00:00 2001
From: Shandong Lao <shandong.lao at hpe.com>
Date: Tue, 10 Mar 2026 01:58:04 -0500
Subject: [PATCH 2/2] Reformat code in character.cpp, IntrinsicCall.cpp and
 Character.cpp to follow clang-format.

---
 flang-rt/lib/runtime/character.cpp                | 7 ++++---
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp     | 6 +++---
 flang/lib/Optimizer/Builder/Runtime/Character.cpp | 5 ++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index 39b58465e9156..cb801f41bdeb2 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -1001,8 +1001,8 @@ static RT_API_ATTRS void TokenizePositionsImpl(Descriptor &first,
 // character exists.
 template <typename CHAR>
 static RT_API_ATTRS std::size_t SplitImpl(const CHAR *string,
-    std::size_t stringLen, const CHAR *set, std::size_t setLen,
-    std::size_t pos, bool back) {
+    std::size_t stringLen, const CHAR *set, std::size_t setLen, std::size_t pos,
+    bool back) {
   if (back) {
     // Scan backwards from position pos-1 (1-indexed pos means index pos-2)
     // looking for the rightmost separator at position < pos.
@@ -1028,7 +1028,8 @@ static RT_API_ATTRS std::size_t SplitImpl(const CHAR *string,
     if (pos >= stringLen) {
       return stringLen + 1;
     }
-    std::size_t startIdx = pos; // 0-indexed start = pos (since pos is 1-indexed and we want pos+1)
+    std::size_t startIdx =
+        pos; // 0-indexed start = pos (since pos is 1-indexed and we want pos+1)
     for (std::size_t i = startIdx; i < stringLen; ++i) {
       CHAR ch = string[i];
       for (std::size_t j = 0; j < setLen; ++j) {
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 45e5568e43ba7..acfd9cfe89d30 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -8580,9 +8580,9 @@ void IntrinsicLibrary::genSplit(llvm::ArrayRef<fir::ExtendedValue> args) {
   mlir::Value posIndex = builder.createConvert(loc, indexTy, posValue);
 
   // Call the runtime
-  mlir::Value newPos = fir::runtime::genSplit(builder, loc, kind, stringBase,
-                                              stringLen, setBase, setLen,
-                                              posIndex, back);
+  mlir::Value newPos =
+      fir::runtime::genSplit(builder, loc, kind, stringBase, stringLen, setBase,
+                             setLen, posIndex, back);
 
   // Convert result back to the POS integer type and store
   mlir::Value newPosConverted = builder.createConvert(loc, posRefTy, newPos);
diff --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
index d49d9f86c97b9..c77374986010c 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
@@ -362,8 +362,7 @@ mlir::Value fir::runtime::genSplit(fir::FirOpBuilder &builder,
         loc, "unsupported CHARACTER kind value. Runtime expects 1, 2, or 4.");
   }
   auto fTy = func.getFunctionType();
-  auto args = fir::runtime::createArguments(builder, loc, fTy, stringBase,
-                                            stringLen, setBase, setLen, pos,
-                                            back);
+  auto args = fir::runtime::createArguments(
+      builder, loc, fTy, stringBase, stringLen, setBase, setLen, pos, back);
   return fir::CallOp::create(builder, loc, func, args).getResult(0);
 }



More information about the llvm-commits mailing list