[flang-commits] [flang] [llvm] [flang] implement `split` for fortran 2023 (PR #161484)

Connector Switch via flang-commits flang-commits at lists.llvm.org
Thu Oct 2 05:41:39 PDT 2025


https://github.com/c8ef updated https://github.com/llvm/llvm-project/pull/161484

>From e801c74be52dd396fb09a995762af1f48b07a944 Mon Sep 17 00:00:00 2001
From: c8ef <c8ef at outlook.com>
Date: Thu, 2 Oct 2025 20:41:28 +0800
Subject: [PATCH] implement split for f2023

---
 flang-rt/lib/runtime/character.cpp            | 47 +++++++++++++++++
 flang-rt/unittests/Runtime/CharacterTest.cpp  | 50 +++++++++++++++++++
 .../flang/Optimizer/Builder/IntrinsicCall.h   |  1 +
 .../Optimizer/Builder/Runtime/Character.h     |  8 +++
 flang/include/flang/Runtime/character.h       | 10 ++++
 flang/lib/Evaluate/intrinsics.cpp             |  4 ++
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 42 ++++++++++++++++
 .../Optimizer/Builder/Runtime/Character.cpp   | 31 ++++++++++++
 flang/test/Lower/Intrinsics/split.f90         | 43 ++++++++++++++++
 .../Builder/Runtime/CharacterTest.cpp         | 23 +++++++++
 10 files changed, 259 insertions(+)
 create mode 100644 flang/test/Lower/Intrinsics/split.f90

diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index 98a225dbec9f9..e26f9ef4c5310 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -570,6 +570,35 @@ static RT_API_ATTRS void MaxMin(Descriptor &accumulator, const Descriptor &x,
   }
 }
 
+template <typename CHAR>
+inline RT_API_ATTRS std::size_t Split(const CHAR *x, std::size_t xLen,
+    const CHAR *set, std::size_t setLen, std::size_t pos, bool back,
+    const char *sourceFile, int sourceLine) {
+  Terminator terminator{sourceFile, sourceLine};
+
+  if (!back) {
+    RUNTIME_CHECK(terminator, pos <= xLen);
+    for (std::size_t i{pos + 1}; i <= xLen; ++i) {
+      for (std::size_t j{0}; j < setLen; ++j) {
+        if (x[i - 1] == set[j]) {
+          return i;
+        }
+      }
+    }
+    return xLen + 1;
+  } else {
+    RUNTIME_CHECK(terminator, pos >= 1 && pos <= xLen + 1);
+    for (std::size_t i{pos - 1}; i != 0; --i) {
+      for (std::size_t j{0}; j < setLen; ++j) {
+        if (x[i - 1] == set[j]) {
+          return i;
+        }
+      }
+    }
+    return 0;
+  }
+}
+
 extern "C" {
 RT_EXT_API_GROUP_BEGIN
 
@@ -917,6 +946,24 @@ void RTDEF(CharacterMin)(Descriptor &accumulator, const Descriptor &x,
   MaxMin<true>(accumulator, x, sourceFile, sourceLine);
 }
 
+std::size_t RTDEF(Split1)(const char *x, std::size_t xLen, const char *set,
+    std::size_t setLen, std::size_t pos, bool back, const char *sourceFile,
+    int sourceLine) {
+  return Split<char>(x, xLen, set, setLen, pos, back, sourceFile, sourceLine);
+}
+std::size_t RTDEF(Split2)(const char16_t *x, std::size_t xLen,
+    const char16_t *set, std::size_t setLen, std::size_t pos, bool back,
+    const char *sourceFile, int sourceLine) {
+  return Split<char16_t>(
+      x, xLen, set, setLen, pos, back, sourceFile, sourceLine);
+}
+std::size_t RTDEF(Split4)(const char32_t *x, std::size_t xLen,
+    const char32_t *set, std::size_t setLen, std::size_t pos, bool back,
+    const char *sourceFile, int sourceLine) {
+  return Split<char32_t>(
+      x, xLen, set, setLen, pos, back, sourceFile, sourceLine);
+}
+
 RT_EXT_API_GROUP_END
 }
 } // namespace Fortran::runtime
diff --git a/flang-rt/unittests/Runtime/CharacterTest.cpp b/flang-rt/unittests/Runtime/CharacterTest.cpp
index 2c7af27b9da77..4c2d8b3ecad5f 100644
--- a/flang-rt/unittests/Runtime/CharacterTest.cpp
+++ b/flang-rt/unittests/Runtime/CharacterTest.cpp
@@ -430,3 +430,53 @@ TYPED_TEST(RepeatTests, Repeat) {
     RunRepeatTest<TypeParam>(t.ncopies, t.input, t.output);
   }
 }
+
+// Test SPLIT()
+template <typename CHAR>
+using SplitFunction = std::function<std::size_t(const CHAR *, std::size_t,
+    const CHAR *, std::size_t, std::size_t, bool, const char *, int)>;
+using SplitFunctions = CharTypedFunctions<SplitFunction>;
+template <typename CHAR> struct SplitTests : public ::testing::Test {};
+TYPED_TEST_SUITE(SplitTests, CharacterTypes, );
+
+struct SplitTestCase {
+  const char *x, *y;
+  std::size_t pos;
+  bool back;
+  std::size_t expect;
+};
+
+template <typename CHAR>
+void RunSplitTests(const std::vector<SplitTestCase> &testCases,
+    const SplitFunction<CHAR> &function) {
+  for (const auto &t : testCases) {
+    // Convert default character to desired kind
+    std::size_t xLen{std::strlen(t.x)}, yLen{std::strlen(t.y)};
+    std::basic_string<CHAR> x{t.x, t.x + xLen};
+    std::basic_string<CHAR> y{t.y, t.y + yLen};
+    auto got{function(x.data(), xLen, y.data(), yLen, t.pos, t.back, "", 0)};
+    ASSERT_EQ(got, t.expect)
+        << "SPLIT('" << t.x << "','" << t.y << "',pos=" << t.pos
+        << ",back=" << t.back << ") for CHARACTER(kind=" << sizeof(CHAR)
+        << "): got " << got << ", expected " << t.expect;
+  }
+}
+
+TYPED_TEST(SplitTests, Split) {
+  static SplitFunctions functions{
+      RTNAME(Split1), RTNAME(Split2), RTNAME(Split4)};
+  static std::vector<SplitTestCase> testcases{
+      {" one,last example,", ", ", 0, false, 1},
+      {" one,last example,", ", ", 1, false, 5},
+      {" one,last example,", ", ", 5, false, 10},
+      {" one,last example,", ", ", 10, false, 18},
+      {" one,last example,", ", ", 18, false, 19},
+      {" one,last example,", ", ", 19, true, 18},
+      {" one,last example,", ", ", 18, true, 10},
+      {" one,last example,", ", ", 10, true, 5},
+      {" one,last example,", ", ", 5, true, 1},
+      {" one,last example,", ", ", 1, true, 0},
+  };
+  RunSplitTests<TypeParam>(
+      testcases, std::get<SplitFunction<TypeParam>>(functions));
+}
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 320f913858956..8b539b164726c 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -436,6 +436,7 @@ struct IntrinsicLibrary {
   fir::ExtendedValue genSizeOf(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   mlir::Value genSpacing(mlir::Type resultType,
                          llvm::ArrayRef<mlir::Value> args);
+  void genSplit(llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genSpread(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genStorageSize(mlir::Type,
                                     llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Character.h b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
index 261ac348a4024..2ab0652ee0f7d 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Character.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
@@ -128,6 +128,14 @@ mlir::Value genVerify(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
                       mlir::Value setBase, mlir::Value setLen,
                       mlir::Value back);
 
+/// Generate call to the split runtime routine that is specialized on
+/// \param kind.
+/// The \param kind represents the kind of the elements in the strings.
+mlir::Value genSplit(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
+                     mlir::Value stringBase, mlir::Value stringLen,
+                     mlir::Value setBase, mlir::Value setLen, mlir::Value pos,
+                     mlir::Value back);
+
 } // namespace fir::runtime
 
 #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_CHARACTER_H
diff --git a/flang/include/flang/Runtime/character.h b/flang/include/flang/Runtime/character.h
index dd47686fe858f..f9bbd6d727d4e 100644
--- a/flang/include/flang/Runtime/character.h
+++ b/flang/include/flang/Runtime/character.h
@@ -127,6 +127,16 @@ std::size_t RTDECL(Verify4)(const char32_t *, std::size_t, const char32_t *set,
 void RTDECL(Verify)(Descriptor &result, const Descriptor &string,
     const Descriptor &set, const Descriptor *back /*can be null*/, int kind,
     const char *sourceFile = nullptr, int sourceLine = 0);
+
+std::size_t RTDECL(Split1)(const char *, std::size_t, const char *set,
+    std::size_t, std::size_t, bool back = false,
+    const char *sourceFile = nullptr, int sourceLine = 0);
+std::size_t RTDECL(Split2)(const char16_t *, std::size_t, const char16_t *set,
+    std::size_t, std::size_t, bool back = false,
+    const char *sourceFile = nullptr, int sourceLine = 0);
+std::size_t RTDECL(Split4)(const char32_t *, std::size_t, const char32_t *set,
+    std::size_t, std::size_t, bool back = false,
+    const char *sourceFile = nullptr, int sourceLine = 0);
 }
 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_CHARACTER_H_
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index fe679da4ff98b..310c1a3cd3957 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -1725,6 +1725,10 @@ static const IntrinsicInterface intrinsicSubroutine[]{
         {{"seconds", AnyInt, Rank::scalar, Optionality::required,
             common::Intent::In}},
         {}, Rank::elemental, IntrinsicClass::impureSubroutine},
+    {"split",
+        {{"string", SameCharNoLen}, {"set", SameCharNoLen}, {"pos", AnyInt},
+            {"back", AnyLogical, Rank::elemental, Optionality::optional}},
+        {}, Rank::elemental, IntrinsicClass::pureSubroutine},
     {"unlink",
         {{"path", DefaultChar, Rank::scalar, Optionality::required,
              common::Intent::In},
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 71d35e37bbe94..06aab4b3c466d 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -958,6 +958,12 @@ static constexpr IntrinsicHandler handlers[]{
      /*isElemental=*/false},
     {"sleep", &I::genSleep, {{{"seconds", asValue}}}, /*isElemental=*/false},
     {"spacing", &I::genSpacing},
+    {"split",
+     &I::genSplit,
+     {{{"string", asAddr},
+       {"set", asAddr},
+       {"pos", asAddr},
+       {"back", asValue, handleDynamicOptional}}}},
     {"spread",
      &I::genSpread,
      {{{"source", asBox}, {"dim", asValue}, {"ncopies", asValue}}},
@@ -8763,6 +8769,42 @@ mlir::Value IntrinsicLibrary::genSpacing(mlir::Type resultType,
       fir::runtime::genSpacing(builder, loc, fir::getBase(args[0])));
 }
 
+// SPLIT
+void IntrinsicLibrary::genSplit(llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 4);
+
+  // Handle required string base arg
+  mlir::Value stringBase = fir::getBase(args[0]);
+
+  // Handle required set string base arg
+  mlir::Value setBase = fir::getBase(args[1]);
+
+  // Handle kind argument; it is the kind of character in this case
+  fir::KindTy kind =
+      fir::factory::CharacterExprHelper{builder, loc}.getCharacterKind(
+          stringBase.getType());
+
+  // Handle string length argument
+  mlir::Value stringLen = fir::getLen(args[0]);
+
+  // Handle set string length argument
+  mlir::Value setLen = fir::getLen(args[1]);
+
+  // Handle pos argument
+  mlir::Value posAddr = fir::getBase(args[2]);
+  mlir::Value pos = fir::LoadOp::create(builder, loc, posAddr);
+
+  // Handle optional back argument
+  mlir::Value back =
+      isStaticallyAbsent(args[3])
+          ? builder.createIntegerConstant(loc, builder.getI1Type(), 0)
+          : fir::getBase(args[3]);
+
+  pos = fir::runtime::genSplit(builder, loc, kind, stringBase, stringLen,
+                               setBase, setLen, pos, back);
+  builder.createStoreWithConvert(loc, pos, posAddr);
+}
+
 // SPREAD
 fir::ExtendedValue
 IntrinsicLibrary::genSpread(mlir::Type resultType,
diff --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
index 540ecba299dc3..6d4942e65d77a 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
@@ -290,3 +290,34 @@ mlir::Value fir::runtime::genVerify(fir::FirOpBuilder &builder,
                                             stringLen, setBase, setLen, back);
   return fir::CallOp::create(builder, loc, func, args).getResult(0);
 }
+
+mlir::Value fir::runtime::genSplit(fir::FirOpBuilder &builder,
+                                   mlir::Location loc, int kind,
+                                   mlir::Value stringBase,
+                                   mlir::Value stringLen, mlir::Value setBase,
+                                   mlir::Value setLen, mlir::Value pos,
+                                   mlir::Value back) {
+  mlir::func::FuncOp func;
+  switch (kind) {
+  case 1:
+    func = fir::runtime::getRuntimeFunc<mkRTKey(Split1)>(loc, builder);
+    break;
+  case 2:
+    func = fir::runtime::getRuntimeFunc<mkRTKey(Split2)>(loc, builder);
+    break;
+  case 4:
+    func = fir::runtime::getRuntimeFunc<mkRTKey(Split4)>(loc, builder);
+    break;
+  default:
+    fir::emitFatalError(
+        loc, "unsupported CHARACTER kind value. Runtime expects 1, 2, or 4.");
+  }
+  auto fTy = func.getFunctionType();
+  auto sourceFile = fir::factory::locationToFilename(builder, loc);
+  auto sourceLine =
+      fir::factory::locationToLineNo(builder, loc, fTy.getInput(7));
+  auto args = fir::runtime::createArguments(builder, loc, fTy, stringBase,
+                                            stringLen, setBase, setLen, pos,
+                                            back, sourceFile, sourceLine);
+  return fir::CallOp::create(builder, loc, func, args).getResult(0);
+}
diff --git a/flang/test/Lower/Intrinsics/split.f90 b/flang/test/Lower/Intrinsics/split.f90
new file mode 100644
index 0000000000000..6d643aa54452a
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/split.f90
@@ -0,0 +1,43 @@
+! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPsplit_test1(
+! CHECK-SAME: %[[s1:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[s2:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[p:[^:]+]]: !fir.ref<i32>{{.*}})
+subroutine split_test1(s1, s2, p)
+character(*) :: s1, s2
+integer :: p
+! CHECK: %[[c1:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[c2:.*]]:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[pos:.*]] = fir.load %arg2 : !fir.ref<i32>
+! CHECK: %false = arith.constant false
+! CHECK: %[[c1base:.*]] = fir.convert %[[c1]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c1len:.*]] = fir.convert %[[c1]]#1 : (index) -> i64
+! CHECK: %[[c2base:.*]] = fir.convert %[[c2]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c2len:.*]] = fir.convert %[[c2]]#1 : (index) -> i64
+! CHECK: %[[pos1:.*]] = fir.convert %[[pos]] : (i32) -> i64
+! CHECK: %[[pos2:.*]] = fir.call @_FortranASplit1(%[[c1base]], %[[c1len]], %[[c2base]], %[[c2len]], %[[pos1]], %false, {{.*}}) {{.*}}: (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1, !fir.ref<i8>, i32) -> i64
+! CHECK: %[[pos3:.*]] = fir.convert %[[pos2]] : (i64) -> i32
+! CHECK: fir.store %[[pos3]] to %[[p]] : !fir.ref<i32>
+! CHECK: return
+call split(s1, s2, p)
+end subroutine split_test1
+
+! CHECK-LABEL: func @_QPsplit_test2(
+! CHECK-SAME: %[[s1:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[s2:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[p:[^:]+]]: !fir.ref<i32>{{.*}})
+subroutine split_test2(s1, s2, p)
+character(*) :: s1, s2
+integer :: p
+! CHECK: %[[c1:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[c2:.*]]:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %true = arith.constant true
+! CHECK: %[[pos:.*]] = fir.load %arg2 : !fir.ref<i32>
+! CHECK: %[[c1base:.*]] = fir.convert %[[c1]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c1len:.*]] = fir.convert %[[c1]]#1 : (index) -> i64
+! CHECK: %[[c2base:.*]] = fir.convert %[[c2]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c2len:.*]] = fir.convert %[[c2]]#1 : (index) -> i64
+! CHECK: %[[pos1:.*]] = fir.convert %[[pos]] : (i32) -> i64
+! CHECK: %[[pos2:.*]] = fir.call @_FortranASplit1(%[[c1base]], %[[c1len]], %[[c2base]], %[[c2len]], %[[pos1]], %true, {{.*}}) {{.*}}: (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1, !fir.ref<i8>, i32) -> i64
+! CHECK: %[[pos3:.*]] = fir.convert %[[pos2]] : (i64) -> i32
+! CHECK: fir.store %[[pos3]] to %[[p]] : !fir.ref<i32>
+! CHECK: return
+call split(s1, s2, p, .true.)
+end subroutine split_test2
diff --git a/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp b/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp
index f3b0fde175bac..d2344ad64e810 100644
--- a/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp
+++ b/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp
@@ -209,3 +209,26 @@ TEST_F(RuntimeCallTest, genVerifyTest) {
   checkGenVerify(*firBuilder, "_FortranAVerify2", 2);
   checkGenVerify(*firBuilder, "_FortranAVerify4", 4);
 }
+
+void checkGenSplit(
+    fir::FirOpBuilder &builder, llvm::StringRef fctName, unsigned kind) {
+  auto loc = builder.getUnknownLoc();
+  mlir::Type charTy = fir::CharacterType::get(builder.getContext(), kind, 10);
+  mlir::Type boxTy = fir::BoxType::get(charTy);
+  mlir::Type i32Ty = IntegerType::get(builder.getContext(), 32);
+  mlir::Value stringBase = fir::UndefOp::create(builder, loc, boxTy);
+  mlir::Value stringLen = fir::UndefOp::create(builder, loc, i32Ty);
+  mlir::Value setBase = fir::UndefOp::create(builder, loc, boxTy);
+  mlir::Value setLen = fir::UndefOp::create(builder, loc, i32Ty);
+  mlir::Value pos = fir::UndefOp::create(builder, loc, i32Ty);
+  mlir::Value back = fir::UndefOp::create(builder, loc, i32Ty);
+  mlir::Value res = fir::runtime::genSplit(
+      builder, loc, kind, stringBase, stringLen, setBase, setLen, pos, back);
+  checkCallOp(res.getDefiningOp(), fctName, 6);
+}
+
+TEST_F(RuntimeCallTest, genSplitTest) {
+  checkGenSplit(*firBuilder, "_FortranASplit1", 1);
+  checkGenSplit(*firBuilder, "_FortranASplit2", 2);
+  checkGenSplit(*firBuilder, "_FortranASplit4", 4);
+}



More information about the flang-commits mailing list