From 4f8a1e1cf275586c3ce783a67a4cc7d559616ac7 Mon Sep 17 00:00:00 2001 From: Dmitry Nekrasov Date: Mon, 3 Nov 2025 18:31:18 +0400 Subject: [PATCH 01/12] Optimize parser performance by introducing `ConcatenatedListView` - Replace direct list concatenation in `ParserStructure.append()` with `ConcatenatedListView` for improved efficiency. - Add `ConcatenatedListView` implementation to lazily combine two lists without creating a new collection. --- .../internal/format/parser/ConcatenatedListView.kt | 13 +++++++++++++ core/common/src/internal/format/parser/Parser.kt | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 core/common/src/internal/format/parser/ConcatenatedListView.kt diff --git a/core/common/src/internal/format/parser/ConcatenatedListView.kt b/core/common/src/internal/format/parser/ConcatenatedListView.kt new file mode 100644 index 000000000..b7f44b2a1 --- /dev/null +++ b/core/common/src/internal/format/parser/ConcatenatedListView.kt @@ -0,0 +1,13 @@ +/* + * Copyright 2019-2025 JetBrains s.r.o. and contributors. + * Use of this source code is governed by the Apache 2.0 License that can be found in the LICENSE.txt file. + */ + +package kotlinx.datetime.internal.format.parser + +internal class ConcatenatedListView(val list1: List, val list2: List) : AbstractList() { + override val size: Int + get() = list1.size + list2.size + + override fun get(index: Int): T = if (index < list1.size) list1[index] else list2[index - list1.size] +} diff --git a/core/common/src/internal/format/parser/Parser.kt b/core/common/src/internal/format/parser/Parser.kt index 9958e3fb9..27fc9d277 100644 --- a/core/common/src/internal/format/parser/Parser.kt +++ b/core/common/src/internal/format/parser/Parser.kt @@ -44,7 +44,7 @@ internal class ParserStructure( // TODO: O(size of the resulting parser ^ 2), but can be O(size of the resulting parser) internal fun List>.concat(): ParserStructure { fun ParserStructure.append(other: ParserStructure): ParserStructure = if (followedBy.isEmpty()) { - ParserStructure(operations + other.operations, other.followedBy) + ParserStructure(ConcatenatedListView(operations, other.operations), other.followedBy) } else { ParserStructure(operations, followedBy.map { it.append(other) }) } From d8fae29ef4acdfcebfd9db9e8ee2b1e3a64864d9 Mon Sep 17 00:00:00 2001 From: Dmitry Nekrasov Date: Thu, 6 Nov 2025 17:29:33 +0400 Subject: [PATCH 02/12] Add JMH benchmark for parser structure formatting --- .../kotlin/ParserStructureConcatBenchmark.kt | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt diff --git a/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt b/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt new file mode 100644 index 000000000..b70def048 --- /dev/null +++ b/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt @@ -0,0 +1,47 @@ +/* + * Copyright 2019-2025 JetBrains s.r.o. and contributors. + * Use of this source code is governed by the Apache 2.0 License that can be found in the LICENSE.txt file. + */ + +@file:Suppress("unused") + +package kotlinx.datetime + +import kotlinx.datetime.format.char +import kotlinx.datetime.format.optional +import org.openjdk.jmh.annotations.* +import org.openjdk.jmh.infra.Blackhole +import java.util.concurrent.* + +@Warmup(iterations = 5, time = 1) +@Measurement(iterations = 5, time = 1) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Benchmark) +@Fork(1) +open class ParserStructureConcatBenchmark { + + @Benchmark + fun buildFormat(blackhole: Blackhole) { + val v = LocalDateTime.Format { + year() + char('-') + monthNumber() + char('-') + day() + char(' ') + hour() + char(':') + minute() + optional { + char(':') + second() + optional { + char('.') + secondFraction() + } + } + } + blackhole.consume(v) + } +} From 71d545de92fe05b55e2aa5c72028c696a7cab9a2 Mon Sep 17 00:00:00 2001 From: Dmitry Nekrasov Date: Mon, 10 Nov 2025 13:30:34 +0400 Subject: [PATCH 03/12] Rename `buildFormat` to `buildPythonDateTimeFormat` for improved readability and specificity. --- benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt b/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt index b70def048..2f7a914a1 100644 --- a/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt +++ b/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt @@ -22,7 +22,7 @@ import java.util.concurrent.* open class ParserStructureConcatBenchmark { @Benchmark - fun buildFormat(blackhole: Blackhole) { + fun buildPythonDateTimeFormat(blackhole: Blackhole) { val v = LocalDateTime.Format { year() char('-') From 4e59e09f61d6f869b30366fee4fd34c5ebb39e3e Mon Sep 17 00:00:00 2001 From: Dmitry Nekrasov Date: Mon, 10 Nov 2025 14:12:00 +0400 Subject: [PATCH 04/12] Add parameterized benchmark `largeSerialFormat` to test parser structure formatting scalability. --- .../kotlin/ParserStructureConcatBenchmark.kt | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt b/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt index 2f7a914a1..8fbd6bb9c 100644 --- a/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt +++ b/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt @@ -21,6 +21,30 @@ import java.util.concurrent.* @Fork(1) open class ParserStructureConcatBenchmark { + @Param("1", "2", "4", "8", "16") + var n = 0 + + @Benchmark + fun largeSerialFormat(blackhole: Blackhole) { + val format = LocalDateTime.Format { + repeat(n) { + year() + char('-') + monthNumber() + char('-') + day() + char(' ') + hour() + char(':') + minute() + char(':') + second() + char('_') + } + } + blackhole.consume(format) + } + @Benchmark fun buildPythonDateTimeFormat(blackhole: Blackhole) { val v = LocalDateTime.Format { From 5630daf07d0ae22167c46e73e1b639a5cddd5bf5 Mon Sep 17 00:00:00 2001 From: Dmitry Nekrasov Date: Mon, 10 Nov 2025 16:28:24 +0400 Subject: [PATCH 05/12] Add iterator implementation for `ConcatenatedListView` - Introduce `ConcatenatedListViewIterator` to enable iteration without materializing combined lists. - Optimize nested list handling by directly traversing inner `ConcatenatedListView` instances. --- .../format/parser/ConcatenatedListView.kt | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/core/common/src/internal/format/parser/ConcatenatedListView.kt b/core/common/src/internal/format/parser/ConcatenatedListView.kt index b7f44b2a1..3f11c3826 100644 --- a/core/common/src/internal/format/parser/ConcatenatedListView.kt +++ b/core/common/src/internal/format/parser/ConcatenatedListView.kt @@ -10,4 +10,32 @@ internal class ConcatenatedListView(val list1: List, val list2: List) : get() = list1.size + list2.size override fun get(index: Int): T = if (index < list1.size) list1[index] else list2[index - list1.size] + + override fun iterator(): Iterator = ConcatenatedListViewIterator() + + private inner class ConcatenatedListViewIterator : Iterator { + private val iterators: List> = buildList { + collectIterators(list1) + collectIterators(list2) + } + private var index = 0 + + private fun MutableList>.collectIterators(list: List) { + if (list is ConcatenatedListView) { + collectIterators(list.list1) + collectIterators(list.list2) + } else { + add(list.iterator()) + } + } + + override fun hasNext(): Boolean { + while (index < iterators.size && !iterators[index].hasNext()) { + index++ + } + return index < iterators.size + } + + override fun next(): T = iterators[index].next() + } } From 39dd4570e7a2b0502b356f4c023d68722e7ccc3c Mon Sep 17 00:00:00 2001 From: Dmitry Nekrasov Date: Mon, 10 Nov 2025 16:29:28 +0400 Subject: [PATCH 06/12] Expand `largeSerialFormat` benchmark parameters and update character tokens in format definition --- .../jmh/kotlin/ParserStructureConcatBenchmark.kt | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt b/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt index 8fbd6bb9c..80a622cfd 100644 --- a/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt +++ b/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt @@ -21,25 +21,24 @@ import java.util.concurrent.* @Fork(1) open class ParserStructureConcatBenchmark { - @Param("1", "2", "4", "8", "16") + @Param("1", "2", "4", "8", "16", "32", "64", "128", "256", "512", "1024") var n = 0 @Benchmark fun largeSerialFormat(blackhole: Blackhole) { val format = LocalDateTime.Format { repeat(n) { - year() - char('-') + char('^') monthNumber() - char('-') + char('&') day() - char(' ') + char('!') hour() - char(':') + char('$') minute() - char(':') + char('#') second() - char('_') + char('@') } } blackhole.consume(format) From bc2a745c6dbbde653a5cfd9d09132eed886aa44a Mon Sep 17 00:00:00 2001 From: Dmitry Nekrasov Date: Mon, 10 Nov 2025 17:52:45 +0400 Subject: [PATCH 07/12] Move largeSerialFormat benchmark to the separate file --- ...rk.kt => PythonDateTimeFormatBenchmark.kt} | 25 +---------- .../src/jmh/kotlin/SerialFormatBenchmark.kt | 45 +++++++++++++++++++ 2 files changed, 46 insertions(+), 24 deletions(-) rename benchmarks/src/jmh/kotlin/{ParserStructureConcatBenchmark.kt => PythonDateTimeFormatBenchmark.kt} (64%) create mode 100644 benchmarks/src/jmh/kotlin/SerialFormatBenchmark.kt diff --git a/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt b/benchmarks/src/jmh/kotlin/PythonDateTimeFormatBenchmark.kt similarity index 64% rename from benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt rename to benchmarks/src/jmh/kotlin/PythonDateTimeFormatBenchmark.kt index 80a622cfd..24e3bbfcb 100644 --- a/benchmarks/src/jmh/kotlin/ParserStructureConcatBenchmark.kt +++ b/benchmarks/src/jmh/kotlin/PythonDateTimeFormatBenchmark.kt @@ -19,30 +19,7 @@ import java.util.concurrent.* @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Benchmark) @Fork(1) -open class ParserStructureConcatBenchmark { - - @Param("1", "2", "4", "8", "16", "32", "64", "128", "256", "512", "1024") - var n = 0 - - @Benchmark - fun largeSerialFormat(blackhole: Blackhole) { - val format = LocalDateTime.Format { - repeat(n) { - char('^') - monthNumber() - char('&') - day() - char('!') - hour() - char('$') - minute() - char('#') - second() - char('@') - } - } - blackhole.consume(format) - } +open class PythonDateTimeFormatBenchmark { @Benchmark fun buildPythonDateTimeFormat(blackhole: Blackhole) { diff --git a/benchmarks/src/jmh/kotlin/SerialFormatBenchmark.kt b/benchmarks/src/jmh/kotlin/SerialFormatBenchmark.kt new file mode 100644 index 000000000..fb63f577e --- /dev/null +++ b/benchmarks/src/jmh/kotlin/SerialFormatBenchmark.kt @@ -0,0 +1,45 @@ +/* + * Copyright 2019-2025 JetBrains s.r.o. and contributors. + * Use of this source code is governed by the Apache 2.0 License that can be found in the LICENSE.txt file. + */ + +@file:Suppress("unused") + +package kotlinx.datetime + +import kotlinx.datetime.format.char +import org.openjdk.jmh.annotations.* +import org.openjdk.jmh.infra.Blackhole +import java.util.concurrent.TimeUnit + +@Warmup(iterations = 5, time = 1) +@Measurement(iterations = 5, time = 1) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Benchmark) +@Fork(1) +open class SerialFormatBenchmark { + + @Param("1", "2", "4", "8", "16", "32", "64", "128", "256", "512", "1024") + var n = 0 + + @Benchmark + fun largeSerialFormat(blackhole: Blackhole) { + val format = LocalDateTime.Format { + repeat(n) { + char('^') + monthNumber() + char('&') + day() + char('!') + hour() + char('$') + minute() + char('#') + second() + char('@') + } + } + blackhole.consume(format) + } +} From 707e0c021bed6e6c29befe1329c893cd51be58a7 Mon Sep 17 00:00:00 2001 From: Dmitry Nekrasov Date: Mon, 10 Nov 2025 19:19:05 +0400 Subject: [PATCH 08/12] Add `ParallelFormatBenchmark` for testing multi-iteration parser structure formatting performance --- .../src/jmh/kotlin/ParallelFormatBenchmark.kt | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt diff --git a/benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt b/benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt new file mode 100644 index 000000000..3de77e317 --- /dev/null +++ b/benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt @@ -0,0 +1,44 @@ +/* + * Copyright 2019-2025 JetBrains s.r.o. and contributors. + * Use of this source code is governed by the Apache 2.0 License that can be found in the LICENSE.txt file. + */ + +@file:Suppress("unused") + +package kotlinx.datetime + +import kotlinx.datetime.format.alternativeParsing +import kotlinx.datetime.format.char +import org.openjdk.jmh.annotations.* +import org.openjdk.jmh.infra.Blackhole +import java.util.concurrent.TimeUnit + +@Warmup(iterations = 5, time = 1) +@Measurement(iterations = 5, time = 1) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Benchmark) +@Fork(1) +open class ParallelFormatBenchmark { + + @Param("2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12") + var n = 0 + + @Benchmark + fun foo(blackhole: Blackhole) { + val format = LocalDateTime.Format { + repeat(n) { + alternativeParsing( + { monthNumber() }, + { day() }, + primaryFormat = { hour() } + ) + char('@') + minute() + char('#') + second() + } + } + blackhole.consume(format) + } +} From 792ed80b29f3a93ecc3d31bc3c70ca069a00f1cf Mon Sep 17 00:00:00 2001 From: Dmitry Nekrasov Date: Mon, 10 Nov 2025 19:45:13 +0400 Subject: [PATCH 09/12] Rename `foo` benchmark to `formatCreationWithAlternativeParsing` for clarity --- benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt b/benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt index 3de77e317..24feae45a 100644 --- a/benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt +++ b/benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt @@ -25,7 +25,7 @@ open class ParallelFormatBenchmark { var n = 0 @Benchmark - fun foo(blackhole: Blackhole) { + fun formatCreationWithAlternativeParsing(blackhole: Blackhole) { val format = LocalDateTime.Format { repeat(n) { alternativeParsing( From 9681c6ec8ff78e3145615210388512d410034b4a Mon Sep 17 00:00:00 2001 From: Dmitry Nekrasov Date: Mon, 10 Nov 2025 20:00:26 +0400 Subject: [PATCH 10/12] Add `formatCreationWithNestedAlternativeParsing` benchmark to test complex parser structures --- .../src/jmh/kotlin/ParallelFormatBenchmark.kt | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt b/benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt index 24feae45a..4f42566b1 100644 --- a/benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt +++ b/benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt @@ -41,4 +41,48 @@ open class ParallelFormatBenchmark { } blackhole.consume(format) } + + @Benchmark + fun formatCreationWithNestedAlternativeParsing(blackhole: Blackhole) { + val format = LocalDateTime.Format { + repeat(n) { index -> + alternativeParsing( + { monthNumber(); char('-'); day() }, + { day(); char('/'); monthNumber() }, + primaryFormat = { year(); char('-'); monthNumber(); char('-'); day() } + ) + + if (index % 2 == 0) { + alternativeParsing( + { + alternativeParsing( + { hour(); char(':'); minute() }, + { minute(); char(':'); second() }, + primaryFormat = { hour(); char(':'); minute(); char(':'); second() } + ) + }, + primaryFormat = { + year(); char('-'); monthNumber(); char('-'); day() + char('T') + hour(); char(':'); minute(); char(':'); second() + } + ) + } + + char('|') + if (index % 3 == 0) { + char('|') + } + + if (index < 8) { + alternativeParsing( + { char('Z') }, + { char('+'); hour(); char(':'); minute() }, + primaryFormat = { char('-'); hour(); char(':'); minute() } + ) + } + } + } + blackhole.consume(format) + } } From a32c6656e3073a013f978df600b0490bdbe7662b Mon Sep 17 00:00:00 2001 From: Dmitry Nekrasov Date: Mon, 10 Nov 2025 20:12:19 +0400 Subject: [PATCH 11/12] Optimize conditionals in `ParallelFormatBenchmark` by replacing modulus checks with bitwise operations --- benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt b/benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt index 4f42566b1..07bc7eed7 100644 --- a/benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt +++ b/benchmarks/src/jmh/kotlin/ParallelFormatBenchmark.kt @@ -52,7 +52,7 @@ open class ParallelFormatBenchmark { primaryFormat = { year(); char('-'); monthNumber(); char('-'); day() } ) - if (index % 2 == 0) { + if (index and 1 == 0) { alternativeParsing( { alternativeParsing( @@ -74,7 +74,7 @@ open class ParallelFormatBenchmark { char('|') } - if (index < 8) { + if (index and 2 == 0) { alternativeParsing( { char('Z') }, { char('+'); hour(); char(':'); minute() }, From 96397404ffd371fb081c14a493c806a1580f43a9 Mon Sep 17 00:00:00 2001 From: Dmitry Nekrasov Date: Mon, 10 Nov 2025 20:19:55 +0400 Subject: [PATCH 12/12] Add caching to `ParserStructure.simplify` to improve performance --- .../src/internal/format/parser/Parser.kt | 129 +++++++++--------- 1 file changed, 66 insertions(+), 63 deletions(-) diff --git a/core/common/src/internal/format/parser/Parser.kt b/core/common/src/internal/format/parser/Parser.kt index 27fc9d277..eb9de566b 100644 --- a/core/common/src/internal/format/parser/Parser.kt +++ b/core/common/src/internal/format/parser/Parser.kt @@ -49,78 +49,81 @@ internal fun List>.concat(): ParserStructure { ParserStructure(operations, followedBy.map { it.append(other) }) } - fun ParserStructure.simplify(unconditionalModifications: List>): ParserStructure { - val newOperations = mutableListOf>() - var currentNumberSpan: MutableList>? = null - val unconditionalModificationsForTails = unconditionalModifications.toMutableList() - // joining together the number consumers in this parser before the first alternative; - // collecting the unconditional modifications to push them to the end of all the parser's branches. - for (op in operations) { - if (op is NumberSpanParserOperation) { - if (currentNumberSpan != null) { - currentNumberSpan.addAll(op.consumers) + val cache = hashMapOf, List>>, ParserStructure>() + + fun ParserStructure.simplify(unconditionalModifications: List>): ParserStructure = + cache.getOrPut(this to unconditionalModifications) { + val newOperations = mutableListOf>() + var currentNumberSpan: MutableList>? = null + val unconditionalModificationsForTails = unconditionalModifications.toMutableList() + // joining together the number consumers in this parser before the first alternative; + // collecting the unconditional modifications to push them to the end of all the parser's branches. + for (op in operations) { + if (op is NumberSpanParserOperation) { + if (currentNumberSpan != null) { + currentNumberSpan.addAll(op.consumers) + } else { + currentNumberSpan = op.consumers.toMutableList() + } + } else if (op is UnconditionalModification) { + unconditionalModificationsForTails.add(op) } else { - currentNumberSpan = op.consumers.toMutableList() - } - } else if (op is UnconditionalModification) { - unconditionalModificationsForTails.add(op) - } else { - if (currentNumberSpan != null) { - newOperations.add(NumberSpanParserOperation(currentNumberSpan)) - currentNumberSpan = null + if (currentNumberSpan != null) { + newOperations.add(NumberSpanParserOperation(currentNumberSpan)) + currentNumberSpan = null + } + newOperations.add(op) } - newOperations.add(op) } - } - val mergedTails = followedBy.flatMap { - val simplified = it.simplify(unconditionalModificationsForTails) - // parser `ParserStructure(emptyList(), p)` is equivalent to `p`, - // unless `p` is empty. For example, ((a|b)|(c|d)) is equivalent to (a|b|c|d). - // As a special case, `ParserStructure(emptyList(), emptyList())` represents a parser that recognizes an empty - // string. For example, (|a|b) is not equivalent to (a|b). - if (simplified.operations.isEmpty()) - simplified.followedBy.ifEmpty { listOf(simplified) } - else - listOf(simplified) - }.ifEmpty { - // preserving the invariant that `mergedTails` contains all unconditional modifications - listOf(ParserStructure(unconditionalModificationsForTails, emptyList())) - } - return if (currentNumberSpan == null) { - // the last operation was not a number span, or it was a number span that we are allowed to interrupt - ParserStructure(newOperations, mergedTails) - } else if (mergedTails.none { - it.operations.firstOrNull()?.let { it is NumberSpanParserOperation } == true - }) { - // the last operation was a number span, but there are no alternatives that start with a number span. - newOperations.add(NumberSpanParserOperation(currentNumberSpan)) - ParserStructure(newOperations, mergedTails) - } else { - val newTails = mergedTails.map { - when (val firstOperation = it.operations.firstOrNull()) { - is NumberSpanParserOperation -> { - ParserStructure( - listOf(NumberSpanParserOperation(currentNumberSpan + firstOperation.consumers)) + it.operations.drop( - 1 - ), + val mergedTails = followedBy.flatMap { + val simplified = it.simplify(unconditionalModificationsForTails) + // parser `ParserStructure(emptyList(), p)` is equivalent to `p`, + // unless `p` is empty. For example, ((a|b)|(c|d)) is equivalent to (a|b|c|d). + // As a special case, `ParserStructure(emptyList(), emptyList())` represents a parser that recognizes an empty + // string. For example, (|a|b) is not equivalent to (a|b). + if (simplified.operations.isEmpty()) + simplified.followedBy.ifEmpty { listOf(simplified) } + else + listOf(simplified) + }.ifEmpty { + // preserving the invariant that `mergedTails` contains all unconditional modifications + listOf(ParserStructure(unconditionalModificationsForTails, emptyList())) + } + if (currentNumberSpan == null) { + // the last operation was not a number span, or it was a number span that we are allowed to interrupt + ParserStructure(newOperations, mergedTails) + } else if (mergedTails.none { + it.operations.firstOrNull()?.let { it is NumberSpanParserOperation } == true + }) { + // the last operation was a number span, but there are no alternatives that start with a number span. + newOperations.add(NumberSpanParserOperation(currentNumberSpan)) + ParserStructure(newOperations, mergedTails) + } else { + val newTails = mergedTails.map { + when (val firstOperation = it.operations.firstOrNull()) { + is NumberSpanParserOperation -> { + ParserStructure( + listOf(NumberSpanParserOperation(currentNumberSpan + firstOperation.consumers)) + it.operations.drop( + 1 + ), + it.followedBy + ) + } + + null -> ParserStructure( + listOf(NumberSpanParserOperation(currentNumberSpan)), it.followedBy ) - } - - null -> ParserStructure( - listOf(NumberSpanParserOperation(currentNumberSpan)), - it.followedBy - ) - else -> ParserStructure( - listOf(NumberSpanParserOperation(currentNumberSpan)) + it.operations, - it.followedBy - ) + else -> ParserStructure( + listOf(NumberSpanParserOperation(currentNumberSpan)) + it.operations, + it.followedBy + ) + } } + ParserStructure(newOperations, newTails) } - ParserStructure(newOperations, newTails) } - } val naiveParser = foldRight(ParserStructure(emptyList(), emptyList())) { parser, acc -> parser.append(acc) } return naiveParser.simplify(emptyList()) }