Skip to content
15 changes: 13 additions & 2 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind
import org.jetbrains.kotlinx.dataframe.impl.getValuesType
import org.jetbrains.kotlinx.dataframe.impl.nothingType
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.jetbrains.kotlinx.dataframe.util.CHUNKED_IMPL_IMPORT
import org.jetbrains.kotlinx.dataframe.util.CREATE
Expand Down Expand Up @@ -216,8 +217,18 @@ public interface DataColumn<out T> : BaseColumn<T> {
infer: Infer = Infer.None,
): DataColumn<T> = createByType(name, values, typeOf<T>(), infer)

/** Creates an empty [DataColumn] with given [name]. */
public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList<Unit>(), typeOf<Unit>())
/**
* Creates an empty [DataColumn] with given [name] of type [Nothing].
* If you want to specify another type, use [`emptyOf<T>()`][emptyOf].
*
* @see emptyOf
*/
public fun empty(name: String = ""): DataColumn<Nothing> =
createValueColumn(name, emptyList<Unit>(), nothingType).cast()

/** Creates an empty [DataColumn] of type [T] with given [name]. */
public inline fun <reified T> emptyOf(name: String = ""): DataColumn<T> =
createValueColumn(name, emptyList<T>(), typeOf<T>()).cast()

// region deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.TransformableColumnSet
import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
import org.jetbrains.kotlinx.dataframe.impl.columns.onResolve
import org.jetbrains.kotlinx.dataframe.impl.columns.transform
import org.jetbrains.kotlinx.dataframe.impl.nullableNothingType
import org.jetbrains.kotlinx.dataframe.impl.owner
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
import kotlin.reflect.KProperty
Expand All @@ -40,7 +41,10 @@ import kotlin.reflect.KProperty
public fun <T> DataColumn<T>.all(predicate: Predicate<T>): Boolean = values.all(predicate)

/** Returns `true` if all [values] are `null` or [values] is empty. */
public fun <C> DataColumn<C>.allNulls(): Boolean = size == 0 || all { it == null }
public fun <C> DataColumn<C>.allNulls(): Boolean =
size == 0 ||
type() == nullableNothingType ||
all { it == null }

// endregion

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,11 @@ public interface ColsOfColumnsSelectionDsl {
*
* This function operates solely on columns at the top-level.
*
* __NOTE:__ Null-filled columns of type [Nothing?][Nothing] will be included when selecting [`colsOf`][colsOf]`<T?>()`.
* This is because [Nothing][Nothing] is considered a subtype of all other types in Kotlin.
* To exclude these columns, call `.`[filter][ColumnsSelectionDsl.filter]` { !it.`[allNulls][DataColumn.allNulls]`() }`
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why colsOf<T?> except colsOf<Nothing> is not an option here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nothing cannot be used as reified argument

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just have a look what we have to do to achieve typeOf<Nothing>() in dataframe XD

internal val nothingType: KType = typeOf<List<Nothing>>().arguments.first().type!!

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we would need to have a shortcut for that nothingCols() for instance, but I'm not sure it's common enough

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or, you know, they type

colsOf<T?>() except colsOf(typeOf<List<Nothing>>().arguments.first().type!!)

you know, elegant

* after it.
*
* ### Check out: [Grammar]
*
* #### For example:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@ package org.jetbrains.kotlinx.dataframe.impl
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.api.asDataColumn
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.concat
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
import kotlin.reflect.KClass
import kotlin.reflect.KType
import kotlin.reflect.full.isSubclassOf
import kotlin.reflect.full.isSubtypeOf
import kotlin.reflect.full.withNullability
import kotlin.reflect.jvm.jvmErasure
import kotlin.reflect.typeOf

public interface DataCollector<T> {

Expand All @@ -38,17 +40,29 @@ internal abstract class DataCollectorBase<T>(initCapacity: Int) : DataCollector<
data.add(value)
}

protected fun createColumn(name: String, type: KType): DataColumn<T> {
val classifier = type.classifier as KClass<*>
if (classifier.isSubclassOf(DataFrame::class) && !hasNulls) {
return DataColumn.createFrameColumn(name, data as List<AnyFrame>) as DataColumn<T>
}
if (classifier.isSubclassOf(DataRow::class) && !hasNulls) {
val mergedDf = (data as List<AnyRow>).map { it.toDataFrame() }.concat()
return DataColumn.createColumnGroup(name, mergedDf) as DataColumn<T>
}
return DataColumn.createValueColumn(name, data, type.withNullability(hasNulls)) as DataColumn<T>
}
@Suppress("UNCHECKED_CAST")
protected fun createColumn(name: String, type: KType): DataColumn<T> =
when {
type == nothingType -> {
require(values.isEmpty()) { "Cannot create non-empty DataColumn of type Nothing" }
DataColumn.empty(name)
}

type == nullableNothingType -> {
require(values.all { it == null }) { "Cannot create DataColumn of type Nothing? with non-null values" }
DataColumn.createValueColumn(name, values, nullableNothingType)
}

type.isSubtypeOf(typeOf<AnyFrame?>()) && !hasNulls ->
DataColumn.createFrameColumn(name, data as List<AnyFrame>)

type.isSubtypeOf(typeOf<AnyRow?>()) && !hasNulls -> {
val mergedDf = (data as List<AnyRow>).map { it.toDataFrame() }.concat()
DataColumn.createColumnGroup(name, mergedDf).asDataColumn()
}

else -> DataColumn.createValueColumn(name, data, type.withNullability(hasNulls))
}.cast()
}

internal open class ColumnDataCollector(initCapacity: Int = 0, val typeOf: (KClass<*>) -> KType) :
Expand All @@ -65,7 +79,7 @@ internal class TypedColumnDataCollector<T>(initCapacity: Int = 0, val type: KTyp
override fun add(value: T?) {
if (checkTypes && value != null && !value.javaClass.kotlin.isSubclassOf(kclass)) {
throw IllegalArgumentException(
"Can not add value of class ${value.javaClass.kotlin.qualifiedName} to column of type $type. Value = $value",
"Cannot add a value of class ${value.javaClass.kotlin.qualifiedName} to a column of type $type. Value: '$value'.",
)
}
super.add(value)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ internal fun <T> concatImpl(name: String, columns: List<DataColumn<T>>): DataCol
internal fun <T> concatImpl(name: String, columns: List<DataColumn<T>?>, columnSizes: List<Int>): DataColumn<T> {
when (columns.size) {
0 -> return DataColumn.empty(name).cast()
1 -> return columns[0] ?: DataColumn.empty(name).cast()
1 -> return columns.single() ?: DataColumn.empty(name).cast()
}

if (columns.all { it == null || it.isColumnGroup() }) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,21 +101,25 @@ internal fun <T, C> DataColumn<C>.updateImpl(
): DataColumn<C> {
val collector = createDataCollector<C>(size, type)
val src = this
if (filter == null) {
df.indices().forEach { rowIndex ->
val row = AddDataRowImpl(rowIndex, df, collector.values)
collector.add(expression(row, src, src[rowIndex]))
}
} else {
df.indices().forEach { rowIndex ->
val row = AddDataRowImpl(rowIndex, df, collector.values)
val currentValue = row[src]
val newValue =
if (filter.invoke(row, currentValue)) expression(row, src, currentValue) else currentValue
collector.add(newValue)
try {
if (filter == null) {
df.indices().forEach { rowIndex ->
val row = AddDataRowImpl(rowIndex, df, collector.values)
collector.add(expression(row, src, src[rowIndex]))
}
} else {
df.indices().forEach { rowIndex ->
val row = AddDataRowImpl(rowIndex, df, collector.values)
val currentValue = row[src]
val newValue =
if (filter.invoke(row, currentValue)) expression(row, src, currentValue) else currentValue
collector.add(newValue)
}
}
return collector.toColumn(src.name).cast()
} catch (e: Throwable) {
throw IllegalStateException("Could not update column '${src.name}': ${e.message}", e)
}
return collector.toColumn(src.name).cast()
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import org.jetbrains.kotlinx.dataframe.ColumnsContainer
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.Selector
import org.jetbrains.kotlinx.dataframe.api.AddDataRow
import org.jetbrains.kotlinx.dataframe.api.AddExpression
Expand Down Expand Up @@ -43,9 +42,9 @@ import org.jetbrains.kotlinx.dataframe.index
import org.jetbrains.kotlinx.dataframe.nrow
import org.jetbrains.kotlinx.dataframe.util.CREATE_COLUMN
import org.jetbrains.kotlinx.dataframe.util.GUESS_COLUMN_TYPE
import kotlin.reflect.KClass
import kotlin.reflect.KType
import kotlin.reflect.full.isSubtypeOf
import kotlin.reflect.full.starProjectedType
import kotlin.reflect.full.withNullability
import kotlin.reflect.typeOf

Expand Down Expand Up @@ -262,10 +261,10 @@ internal fun <T> createColumnGuessingType(
return { value -> if (value != null && value is Number) converter(value) else value }
}

return when (type.classifier!! as KClass<*>) {
return when (type.classifier?.starProjectedType) {
// guessValueType can only return DataRow if all values are `AnyRow?`
// or allColsMakesColGroup == true, and all values are `AnyCol`
DataRow::class ->
typeOf<AnyRow>() ->
if (allColsMakesColGroup && values.firstOrNull() is AnyCol) {
val df = dataFrameOf(values as Iterable<AnyCol>)
DataColumn.createColumnGroup(name, df)
Expand All @@ -276,7 +275,7 @@ internal fun <T> createColumnGuessingType(
DataColumn.createColumnGroup(name, df)
}.asDataColumn().cast()

DataFrame::class -> {
typeOf<AnyFrame>() -> {
val frames = values.map {
when (it) {
null -> DataFrame.empty()
Expand All @@ -289,7 +288,7 @@ internal fun <T> createColumnGuessingType(
DataColumn.createFrameColumn(name, frames).asDataColumn().cast()
}

List::class -> {
typeOf<List<*>>() -> {
val nullable = type.isMarkedNullable
var isListOfRows: Boolean? = null
val subType = type.arguments.first().type!! // List<T> -> T
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.jetbrains.kotlinx.dataframe.api

import io.kotest.matchers.shouldBe
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.junit.Test

class ConcatTests {
Expand All @@ -9,7 +11,7 @@ class ConcatTests {
fun `different types`() {
val a by columnOf(1, 2)
val b by columnOf(3.0, null)
a.concat(b) shouldBe columnOf(1, 2, 3.0, null).named("a")
a.concat(b) shouldBe columnOf<Number?>(1, 2, 3.0, null).named("a")
}

@Test
Expand All @@ -23,4 +25,28 @@ class ConcatTests {

dfWithCategory.columnNames() shouldBe listOf("value", "type", "category")
}

@Test
fun `concat empty DataFrames no rows`() {
val dfWithSchema = DataFrame.emptyOf<Pair<Int, String>>()
(dfWithSchema concat dfWithSchema).let { concatenated ->
concatenated shouldBe dfWithSchema
concatenated.schema() shouldBe dfWithSchema.schema()
}

val dfNothingCols = dataFrameOf(
"a" to DataColumn.empty(),
"b" to DataColumn.empty(),
)
(dfNothingCols concat dfNothingCols).let { concatenated ->
concatenated shouldBe dfNothingCols
concatenated.schema() shouldBe dfNothingCols.schema()
}
}

@Test
fun `concat empty DataFrames no cols`() {
val dfNoCols = DataFrame.empty(5)
(dfNoCols concat dfNoCols) shouldBe DataFrame.empty(10)
}
}
25 changes: 25 additions & 0 deletions core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package org.jetbrains.kotlinx.dataframe.api

import io.kotest.assertions.throwables.shouldThrow
import io.kotest.matchers.shouldBe
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.impl.nothingType
import org.jetbrains.kotlinx.dataframe.impl.nullableNothingType
import org.jetbrains.kotlinx.dataframe.size
import org.junit.Test

Expand Down Expand Up @@ -79,4 +83,25 @@ class UpdateTests {

df.fillNA(SchemaB::i).with { 42 }
}

@Test
fun `update Nothing columns`() {
val emptyDf = dataFrameOf("a" to DataColumn.empty())
emptyDf["a"].type() shouldBe nothingType

emptyDf.update { "a"<Nothing>() }.with { error("should not happen") }
.schema() shouldBe emptyDf.schema()

val nullFilledDf = dataFrameOf("a" to columnOf(null))
nullFilledDf["a"].type() shouldBe nullableNothingType

// can only update with null
nullFilledDf.update { "a"<Nothing?>() }.with { null }
.schema() shouldBe nullFilledDf.schema()

// or 'Nothing', aka, return early/throw exception
shouldThrow<IllegalStateException> {
nullFilledDf.update { "a"<Nothing?>() }.with { error("Nothing") }
}.cause!!.message shouldBe "Nothing"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1712,7 +1712,7 @@ class DataFrameTests : BaseTest() {
df.update("name").at(0).with { "ALICE" }
}

@Test(expected = IllegalArgumentException::class)
@Test(expected = IllegalStateException::class)
fun `update with wrong type`() {
typed.update("age").with { "string" }
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.jupyter
import kotlinx.serialization.ExperimentalSerializationApi
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.api.FormattedFrame
import org.jetbrains.kotlinx.dataframe.api.allNulls
import org.jetbrains.kotlinx.dataframe.api.colsOf
import org.jetbrains.kotlinx.dataframe.api.getColumns
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions
Expand Down Expand Up @@ -101,7 +102,7 @@ internal inline fun <reified T : Any> JupyterHtmlRenderer.render(
}

internal fun AnyFrame.hasFormattedColumns() =
this.getColumns { colsAtAnyDepth().colsOf<FormattedFrame<*>?>() }.isNotEmpty()
this.getColumns { colsAtAnyDepth().colsOf<FormattedFrame<*>?> { !it.allNulls() } }.isNotEmpty()

private fun KotlinNotebookPluginUtils.IdeBuildNumber?.supportsDynamicNestedTables() =
this != null && majorVersion >= MIN_IDE_VERSION_SUPPORT_JSON_WITH_METADATA
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ import kotlinx.serialization.json.jsonArray
import kotlinx.serialization.json.jsonObject
import kotlinx.serialization.json.jsonPrimitive
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.api.columnOf
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.format
import org.jetbrains.kotlinx.dataframe.api.with
import org.jetbrains.kotlinx.dataframe.jupyter.SerializationKeys.DATA
import org.jetbrains.kotlinx.dataframe.jupyter.SerializationKeys.KOTLIN_DATAFRAME
import org.jetbrains.kotlinx.dataframe.jupyter.SerializationKeys.METADATA
Expand Down Expand Up @@ -607,6 +612,21 @@ class RenderingTests : JupyterReplTestCase() {
json.extractColumn<String>(4, "mixed") shouldBe "1"
}

// Issue #1546
@Test
fun `hasFormattedFrame false positive`() {
val df = dataFrameOf(
"a" to columnOf(1, 2, 3, null),
"b" to DataColumn.createByInference("", listOf(null, null, null, null)),
"c" to columnOf(7, 3, 2, 65),
)

df.hasFormattedColumns() shouldBe false

val formatted = dataFrameOf("a" to columnOf(df.format { "c"() }.with { background(black) }))
formatted.hasFormattedColumns() shouldBe true
}

companion object {
/**
* Set the system property for the IDE version needed for specific serialization testing purposes.
Expand Down