[flang] [llvm] [flang] implement `split` for fortran 2023 (PR #161484)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 1 00:14:20 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-semantics

Author: Connector Switch (c8ef)

<details>
<summary>Changes</summary>

ref: F2023, 16.9.196

---
Full diff: https://github.com/llvm/llvm-project/pull/161484.diff


9 Files Affected:

- (modified) flang-rt/lib/runtime/character.cpp (+37) 
- (modified) flang/include/flang/Optimizer/Builder/IntrinsicCall.h (+1) 
- (modified) flang/include/flang/Optimizer/Builder/Runtime/Character.h (+8) 
- (modified) flang/include/flang/Runtime/character.h (+7) 
- (modified) flang/lib/Evaluate/intrinsics.cpp (+4) 
- (modified) flang/lib/Optimizer/Builder/IntrinsicCall.cpp (+42) 
- (modified) flang/lib/Optimizer/Builder/Runtime/Character.cpp (+27) 
- (added) flang/test/Lower/Intrinsics/split.f90 (+43) 
- (modified) flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp (+23) 


``````````diff
diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index 98a225dbec9f9..64aa8a24c0e64 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -570,6 +570,30 @@ static RT_API_ATTRS void MaxMin(Descriptor &accumulator, const Descriptor &x,
   }
 }
 
+template <typename CHAR>
+inline RT_API_ATTRS std::size_t Split(const CHAR *x, std::size_t xLen,
+    const CHAR *set, std::size_t setLen, std::size_t pos, bool back) {
+  if (!back) {
+    for (std::size_t i{pos + 1}; i <= xLen; ++i) {
+      for (std::size_t j{0}; j < setLen; ++j) {
+        if (x[i - 1] == set[j]) {
+          return i;
+        }
+      }
+    }
+    return xLen + 1;
+  } else {
+    for (std::size_t i{pos - 1}; i != 0; --i) {
+      for (std::size_t j{0}; j < setLen; ++j) {
+        if (x[i - 1] == set[j]) {
+          return i;
+        }
+      }
+    }
+    return 0;
+  }
+}
+
 extern "C" {
 RT_EXT_API_GROUP_BEGIN
 
@@ -917,6 +941,19 @@ void RTDEF(CharacterMin)(Descriptor &accumulator, const Descriptor &x,
   MaxMin<true>(accumulator, x, sourceFile, sourceLine);
 }
 
+std::size_t RTDEF(Split1)(const char *x, std::size_t xLen, const char *set,
+    std::size_t setLen, std::size_t pos, bool back) {
+  return Split<char>(x, xLen, set, setLen, pos, back);
+}
+std::size_t RTDEF(Split2)(const char16_t *x, std::size_t xLen,
+    const char16_t *set, std::size_t setLen, std::size_t pos, bool back) {
+  return Split<char16_t>(x, xLen, set, setLen, pos, back);
+}
+std::size_t RTDEF(Split4)(const char32_t *x, std::size_t xLen,
+    const char32_t *set, std::size_t setLen, std::size_t pos, bool back) {
+  return Split<char32_t>(x, xLen, set, setLen, pos, back);
+}
+
 RT_EXT_API_GROUP_END
 }
 } // namespace Fortran::runtime
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index cd73798d71262..6c668af9adf2c 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -430,6 +430,7 @@ struct IntrinsicLibrary {
   fir::ExtendedValue genSizeOf(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   mlir::Value genSpacing(mlir::Type resultType,
                          llvm::ArrayRef<mlir::Value> args);
+  void genSplit(llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genSpread(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genStorageSize(mlir::Type,
                                     llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Character.h b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
index d1c521de94438..a5ac3e175fe64 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Character.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
@@ -120,6 +120,14 @@ mlir::Value genVerify(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
                       mlir::Value setBase, mlir::Value setLen,
                       mlir::Value back);
 
+/// Generate call to the split runtime routine that is specialized on
+/// \param kind.
+/// The \param kind represents the kind of the elements in the strings.
+mlir::Value genSplit(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
+                     mlir::Value stringBase, mlir::Value stringLen,
+                     mlir::Value setBase, mlir::Value setLen, mlir::Value pos,
+                     mlir::Value back);
+
 } // namespace fir::runtime
 
 #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_CHARACTER_H
diff --git a/flang/include/flang/Runtime/character.h b/flang/include/flang/Runtime/character.h
index dd47686fe858f..2b94b3cc2d1d9 100644
--- a/flang/include/flang/Runtime/character.h
+++ b/flang/include/flang/Runtime/character.h
@@ -127,6 +127,13 @@ std::size_t RTDECL(Verify4)(const char32_t *, std::size_t, const char32_t *set,
 void RTDECL(Verify)(Descriptor &result, const Descriptor &string,
     const Descriptor &set, const Descriptor *back /*can be null*/, int kind,
     const char *sourceFile = nullptr, int sourceLine = 0);
+
+std::size_t RTDECL(Split1)(const char *, std::size_t, const char *set,
+    std::size_t, std::size_t, bool back = false);
+std::size_t RTDECL(Split2)(const char16_t *, std::size_t, const char16_t *set,
+    std::size_t, std::size_t, bool back = false);
+std::size_t RTDECL(Split4)(const char32_t *, std::size_t, const char32_t *set,
+    std::size_t, std::size_t, bool back = false);
 }
 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_CHARACTER_H_
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index abe53c31210d0..c80778ccaa547 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -1721,6 +1721,10 @@ static const IntrinsicInterface intrinsicSubroutine[]{
         {{"seconds", AnyInt, Rank::scalar, Optionality::required,
             common::Intent::In}},
         {}, Rank::elemental, IntrinsicClass::impureSubroutine},
+    {"split",
+        {{"string", SameCharNoLen}, {"set", SameCharNoLen}, {"pos", AnyInt},
+            {"back", AnyLogical, Rank::elemental, Optionality::optional}},
+        {}, Rank::elemental, IntrinsicClass::pureSubroutine},
     {"unlink",
         {{"path", DefaultChar, Rank::scalar, Optionality::required,
              common::Intent::In},
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index e1c9520592de6..a9d2ccc1ff868 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -926,6 +926,12 @@ static constexpr IntrinsicHandler handlers[]{
      /*isElemental=*/false},
     {"sleep", &I::genSleep, {{{"seconds", asValue}}}, /*isElemental=*/false},
     {"spacing", &I::genSpacing},
+    {"split",
+     &I::genSplit,
+     {{{"string", asAddr},
+       {"set", asAddr},
+       {"pos", asAddr},
+       {"back", asValue, handleDynamicOptional}}}},
     {"spread",
      &I::genSpread,
      {{{"source", asBox}, {"dim", asValue}, {"ncopies", asValue}}},
@@ -8655,6 +8661,42 @@ mlir::Value IntrinsicLibrary::genSpacing(mlir::Type resultType,
       fir::runtime::genSpacing(builder, loc, fir::getBase(args[0])));
 }
 
+// SPLIT
+void IntrinsicLibrary::genSplit(llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 4);
+
+  // Handle required string base arg
+  mlir::Value stringBase = fir::getBase(args[0]);
+
+  // Handle required set string base arg
+  mlir::Value setBase = fir::getBase(args[1]);
+
+  // Handle kind argument; it is the kind of character in this case
+  fir::KindTy kind =
+      fir::factory::CharacterExprHelper{builder, loc}.getCharacterKind(
+          stringBase.getType());
+
+  // Handle string length argument
+  mlir::Value stringLen = fir::getLen(args[0]);
+
+  // Handle set string length argument
+  mlir::Value setLen = fir::getLen(args[1]);
+
+  // Handle pos argument
+  mlir::Value posAddr = fir::getBase(args[2]);
+  mlir::Value pos = fir::LoadOp::create(builder, loc, posAddr);
+
+  // Handle optional back argument
+  mlir::Value back =
+      isStaticallyAbsent(args[3])
+          ? builder.createIntegerConstant(loc, builder.getI1Type(), 0)
+          : fir::getBase(args[3]);
+
+  pos = fir::runtime::genSplit(builder, loc, kind, stringBase, stringLen,
+                               setBase, setLen, pos, back);
+  builder.createStoreWithConvert(loc, pos, posAddr);
+}
+
 // SPREAD
 fir::ExtendedValue
 IntrinsicLibrary::genSpread(mlir::Type resultType,
diff --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
index 57fb0cccf6863..bd112082711d6 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
@@ -276,3 +276,30 @@ mlir::Value fir::runtime::genVerify(fir::FirOpBuilder &builder,
                                             stringLen, setBase, setLen, back);
   return fir::CallOp::create(builder, loc, func, args).getResult(0);
 }
+
+mlir::Value fir::runtime::genSplit(fir::FirOpBuilder &builder,
+                                   mlir::Location loc, int kind,
+                                   mlir::Value stringBase,
+                                   mlir::Value stringLen, mlir::Value setBase,
+                                   mlir::Value setLen, mlir::Value pos,
+                                   mlir::Value back) {
+  mlir::func::FuncOp func;
+  switch (kind) {
+  case 1:
+    func = fir::runtime::getRuntimeFunc<mkRTKey(Split1)>(loc, builder);
+    break;
+  case 2:
+    func = fir::runtime::getRuntimeFunc<mkRTKey(Split2)>(loc, builder);
+    break;
+  case 4:
+    func = fir::runtime::getRuntimeFunc<mkRTKey(Split4)>(loc, builder);
+    break;
+  default:
+    fir::emitFatalError(
+        loc, "unsupported CHARACTER kind value. Runtime expects 1, 2, or 4.");
+  }
+  auto fTy = func.getFunctionType();
+  auto args = fir::runtime::createArguments(
+      builder, loc, fTy, stringBase, stringLen, setBase, setLen, pos, back);
+  return fir::CallOp::create(builder, loc, func, args).getResult(0);
+}
diff --git a/flang/test/Lower/Intrinsics/split.f90 b/flang/test/Lower/Intrinsics/split.f90
new file mode 100644
index 0000000000000..d0d2f48cddf16
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/split.f90
@@ -0,0 +1,43 @@
+! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPsplit_test1(
+! CHECK-SAME: %[[s1:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[s2:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[p:[^:]+]]: !fir.ref<i32>{{.*}})
+subroutine split_test1(s1, s2, p)
+character(*) :: s1, s2
+integer :: p
+! CHECK: %[[c1:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[c2:.*]]:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[pos:.*]] = fir.load %arg2 : !fir.ref<i32>
+! CHECK: %false = arith.constant false
+! CHECK: %[[c1base:.*]] = fir.convert %[[c1]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c1len:.*]] = fir.convert %[[c1]]#1 : (index) -> i64
+! CHECK: %[[c2base:.*]] = fir.convert %[[c2]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c2len:.*]] = fir.convert %[[c2]]#1 : (index) -> i64
+! CHECK: %[[pos1:.*]] = fir.convert %[[pos]] : (i32) -> i64
+! CHECK: %[[pos2:.*]] = fir.call @_FortranASplit1(%[[c1base]], %[[c1len]], %[[c2base]], %[[c2len]], %[[pos1]], %false) fastmath<contract> : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1) -> i64
+! CHECK: %[[pos3:.*]] = fir.convert %[[pos2]] : (i64) -> i32
+! CHECK: fir.store %[[pos3]] to %[[p]] : !fir.ref<i32>
+! CHECK: return
+call split(s1, s2, p)
+end subroutine split_test1
+
+! CHECK-LABEL: func @_QPsplit_test2(
+! CHECK-SAME: %[[s1:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[s2:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[p:[^:]+]]: !fir.ref<i32>{{.*}})
+subroutine split_test2(s1, s2, p)
+character(*) :: s1, s2
+integer :: p
+! CHECK: %[[c1:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[c2:.*]]:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %true = arith.constant true
+! CHECK: %[[pos:.*]] = fir.load %arg2 : !fir.ref<i32>
+! CHECK: %[[c1base:.*]] = fir.convert %[[c1]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c1len:.*]] = fir.convert %[[c1]]#1 : (index) -> i64
+! CHECK: %[[c2base:.*]] = fir.convert %[[c2]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c2len:.*]] = fir.convert %[[c2]]#1 : (index) -> i64
+! CHECK: %[[pos1:.*]] = fir.convert %[[pos]] : (i32) -> i64
+! CHECK: %[[pos2:.*]] = fir.call @_FortranASplit1(%[[c1base]], %[[c1len]], %[[c2base]], %[[c2len]], %[[pos1]], %true) fastmath<contract> : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1) -> i64
+! CHECK: %[[pos3:.*]] = fir.convert %[[pos2]] : (i64) -> i32
+! CHECK: fir.store %[[pos3]] to %[[p]] : !fir.ref<i32>
+! CHECK: return
+call split(s1, s2, p, .true.)
+end subroutine split_test2
diff --git a/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp b/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp
index f3b0fde175bac..f8db671ad845c 100644
--- a/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp
+++ b/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp
@@ -209,3 +209,26 @@ TEST_F(RuntimeCallTest, genVerifyTest) {
   checkGenVerify(*firBuilder, "_FortranAVerify2", 2);
   checkGenVerify(*firBuilder, "_FortranAVerify4", 4);
 }
+
+void checkGenSplit(
+    fir::FirOpBuilder &builder, llvm::StringRef fctName, unsigned kind) {
+  auto loc = builder.getUnknownLoc();
+  mlir::Type charTy = fir::CharacterType::get(builder.getContext(), kind, 10);
+  mlir::Type boxTy = fir::BoxType::get(charTy);
+  mlir::Type i32Ty = IntegerType::get(builder.getContext(), 32);
+  mlir::Value stringBase = fir::UndefOp::create(builder, loc, boxTy);
+  mlir::Value stringLen = fir::UndefOp::create(builder, loc, i32Ty);
+  mlir::Value setBase = fir::UndefOp::create(builder, loc, boxTy);
+  mlir::Value setLen = fir::UndefOp::create(builder, loc, i32Ty);
+  mlir::Value pos = fir::UndefOp::create(builder, loc, i32Ty);
+  mlir::Value back = fir::UndefOp::create(builder, loc, i32Ty);
+  mlir::Value res = fir::runtime::genSplit(
+      builder, loc, kind, stringBase, stringLen, setBase, setLen, pos, back);
+  checkCallOp(res.getDefiningOp(), fctName, 6, /*addLocArgs=*/false);
+}
+
+TEST_F(RuntimeCallTest, genSplitTest) {
+  checkGenSplit(*firBuilder, "_FortranASplit1", 1);
+  checkGenSplit(*firBuilder, "_FortranASplit2", 2);
+  checkGenSplit(*firBuilder, "_FortranASplit4", 4);
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/161484


More information about the llvm-commits mailing list