diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index 30227ba5fcf1c..4ef53ade1507f 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -3728,6 +3728,19 @@ def timestamp_add(unit: str, quantity: "ColumnOrName", ts: "ColumnOrName") -> Co timestamp_add.__doc__ = pysparkfuncs.timestamp_add.__doc__ +def time_bucket( + bucket_size: "ColumnOrName", + ts: "ColumnOrName", + origin: Optional["ColumnOrName"] = None, +) -> Column: + if origin is None: + return _invoke_function_over_columns("time_bucket", bucket_size, ts) + return _invoke_function_over_columns("time_bucket", bucket_size, ts, origin) + + +time_bucket.__doc__ = pysparkfuncs.time_bucket.__doc__ + + def window( timeColumn: "ColumnOrName", windowDuration: str, diff --git a/python/pyspark/sql/functions/__init__.py b/python/pyspark/sql/functions/__init__.py index 7bdbc0cbbd09f..c1ecaeb8032fa 100644 --- a/python/pyspark/sql/functions/__init__.py +++ b/python/pyspark/sql/functions/__init__.py @@ -248,6 +248,7 @@ "timestamp_micros", "timestamp_millis", "timestamp_seconds", + "time_bucket", "time_diff", "time_from_micros", "time_from_millis", diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 67812846cf057..4eea8d56984bf 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -13125,6 +13125,74 @@ def timestamp_add(unit: str, quantity: "ColumnOrName", ts: "ColumnOrName") -> Co ) +@_try_remote_functions +def time_bucket( + bucket_size: "ColumnOrName", + ts: "ColumnOrName", + origin: Optional["ColumnOrName"] = None, +) -> Column: + """ + Aligns a timestamp to the start of a fixed-size interval bucket. + + Returns the start of the bucket that ``ts`` falls into, where buckets are defined by + the given ``bucket_size`` interval aligned to ``origin``. All bucketing is performed on + UTC micros, the session time zone does not affect bucket alignment. For local wall-clock + alignment in a DST zone, cast the TIMESTAMP to TIMESTAMP_NTZ. + + .. versionadded:: 4.2.0 + + Parameters + ---------- + bucket_size : :class:`~pyspark.sql.Column` or column name + A day-time or year-month interval defining the bucket size. Must be positive + and foldable. + ts : :class:`~pyspark.sql.Column` or column name + A TIMESTAMP or TIMESTAMP_NTZ value to bucket. + origin : :class:`~pyspark.sql.Column` or column name, optional + Alignment anchor. Defaults to 1970-01-01 00:00:00 (UTC for TIMESTAMP). Must be + the same type as ``ts`` and must be foldable. + + Returns + ------- + :class:`~pyspark.sql.Column` + The start of the bucket containing ``ts``, as the same type as ``ts``. + + Examples + -------- + >>> spark.conf.set("spark.sql.session.timeZone", "UTC") + >>> import datetime + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame( + ... [(datetime.datetime(2024, 1, 1, 11, 27, 0),)], ['ts']) + >>> df.select( + ... sf.time_bucket(sf.expr("INTERVAL '15' MINUTE"), 'ts').alias("bucket") + ... ).collect() + [Row(bucket=datetime.datetime(2024, 1, 1, 11, 15))] + + Shift the grid with an explicit origin: buckets run at :05, :20, :35, :50: + + >>> df.select( + ... sf.time_bucket( + ... sf.expr("INTERVAL '15' MINUTE"), + ... 'ts', + ... sf.expr("TIMESTAMP '1970-01-01 00:05:00'") + ... ).alias("bucket") + ... ).collect() + [Row(bucket=datetime.datetime(2024, 1, 1, 11, 20))] + >>> spark.conf.unset("spark.sql.session.timeZone") + """ + from pyspark.sql.classic.column import _to_java_column + + if origin is None: + return _invoke_function("time_bucket", _to_java_column(bucket_size), _to_java_column(ts)) + return _invoke_function( + "time_bucket", + _to_java_column(bucket_size), + _to_java_column(ts), + _to_java_column(origin), + ) + + @_try_remote_functions def window( timeColumn: "ColumnOrName", diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala index b3bd22e6323b5..a2edfc3838738 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala @@ -8486,6 +8486,26 @@ object functions { def timestamp_add(unit: String, quantity: Column, ts: Column): Column = Column.internalFn("timestampadd", lit(unit), quantity, ts) + /** + * Returns the start of the fixed-size bucket of `bucketSize` that contains `ts`, with buckets + * aligned to the epoch (1970-01-01 00:00:00). All computation is in UTC. + * + * @group datetime_funcs + * @since 4.2.0 + */ + def time_bucket(bucketSize: Column, ts: Column): Column = + Column.fn("time_bucket", bucketSize, ts) + + /** + * Returns the start of the fixed-size bucket of `bucketSize` that contains `ts`, with buckets + * aligned to `origin`. All computation is in UTC. + * + * @group datetime_funcs + * @since 4.2.0 + */ + def time_bucket(bucketSize: Column, ts: Column, origin: Column): Column = + Column.fn("time_bucket", bucketSize, ts, origin) + /** * Returns the difference between two times, measured in specified units. Throws a * SparkIllegalArgumentException, in case the specified unit is not supported. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index ef7b25208928c..b77cdc15f8455 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -777,6 +777,7 @@ object FunctionRegistry { expression[UnixMillis]("unix_millis"), expression[UnixMicros]("unix_micros"), expression[ConvertTimezone]("convert_timezone"), + expressionBuilder("time_bucket", TimeBucketExpressionBuilder), // collection functions expression[CreateArray]("array"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 226e098165b82..5e1225f3269c1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -24,9 +24,11 @@ import java.util.Locale import org.apache.commons.text.StringEscapeUtils -import org.apache.spark.{SparkDateTimeException, SparkIllegalArgumentException} +import org.apache.spark.{SparkDateTimeException, SparkException, SparkIllegalArgumentException} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry} +import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry, TypeCheckResult} +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess} +import org.apache.spark.sql.catalyst.expressions.Cast.{ordinalNumber, toSQLExpr, toSQLId, toSQLType, toSQLValue} import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke @@ -3897,3 +3899,178 @@ case class TimestampDiff( copy(startTimestamp = newLeft, endTimestamp = newRight) } } + +/** + * Aligns a timestamp to the start of a fixed-size interval bucket. + * + * Returns the start of the half-open bucket [start, start + bucketSize) containing ts. + * All computation is performed on UTC values. + */ +case class TimeBucket( + bucketSize: Expression, + ts: Expression, + originTs: Expression) + extends TernaryExpression with ExpectsInputTypes { + + override def nullIntolerant: Boolean = true + + override def first: Expression = bucketSize + override def second: Expression = ts + override def third: Expression = originTs + + override def inputTypes: Seq[AbstractDataType] = Seq( + TypeCollection(DayTimeIntervalType, YearMonthIntervalType), + AnyTimestampType, + AnyTimestampType) + + override def dataType: DataType = ts.dataType + + override def checkInputDataTypes(): TypeCheckResult = { + val defaultCheck = super.checkInputDataTypes() + if (defaultCheck.isFailure) return defaultCheck + + if (!bucketSize.foldable) { + return DataTypeMismatch( + errorSubClass = "NON_FOLDABLE_INPUT", + messageParameters = Map( + "inputName" -> toSQLId("bucketSize"), + "inputType" -> toSQLType(bucketSize.dataType), + "inputExpr" -> toSQLExpr(bucketSize))) + } + + val bucketSizeValue = bucketSize.eval() + if (bucketSizeValue != null) { + val isNonPositive = bucketSize.dataType match { + case _: DayTimeIntervalType => bucketSizeValue.asInstanceOf[Long] <= 0 + case _: YearMonthIntervalType => bucketSizeValue.asInstanceOf[Int] <= 0 + case other => throw SparkException.internalError( + s"Unexpected bucketSize type: $other") + } + if (isNonPositive) { + return DataTypeMismatch( + errorSubClass = "VALUE_OUT_OF_RANGE", + messageParameters = Map( + "exprName" -> "time_bucket", + "valueRange" -> "(0, inf)", + "currentValue" -> toSQLValue(bucketSizeValue, bucketSize.dataType))) + } + } + + if (!originTs.foldable) { + return DataTypeMismatch( + errorSubClass = "NON_FOLDABLE_INPUT", + messageParameters = Map( + "inputName" -> toSQLId("origin"), + "inputType" -> toSQLType(originTs.dataType), + "inputExpr" -> toSQLExpr(originTs))) + } + + if (ts.dataType != originTs.dataType) { + return DataTypeMismatch( + errorSubClass = "UNEXPECTED_INPUT_TYPE", + messageParameters = Map( + "paramIndex" -> ordinalNumber(2), + "requiredType" -> toSQLType(ts.dataType), + "inputSql" -> toSQLExpr(originTs), + "inputType" -> toSQLType(originTs.dataType))) + } + + TypeCheckSuccess + } + + override def nullSafeEval(bucketSizeVal: Any, tsVal: Any, originVal: Any): Any = { + first.dataType match { + case _: DayTimeIntervalType => + DateTimeUtils.timeBucketDTInterval( + bucketSizeVal.asInstanceOf[Long], tsVal.asInstanceOf[Long], + originVal.asInstanceOf[Long]) + case _: YearMonthIntervalType => + DateTimeUtils.timeBucketYMInterval( + bucketSizeVal.asInstanceOf[Int], tsVal.asInstanceOf[Long], + originVal.asInstanceOf[Long]) + case other => throw SparkException.internalError( + s"Unexpected bucketSize type: $other") + } + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + first.dataType match { + case _: DayTimeIntervalType => + defineCodeGen(ctx, ev, (bucketSizeCode, tsCode, originCode) => + s"$dtu.timeBucketDTInterval($bucketSizeCode, $tsCode, $originCode)") + case _: YearMonthIntervalType => + defineCodeGen(ctx, ev, (bucketSizeCode, tsCode, originCode) => + s"$dtu.timeBucketYMInterval($bucketSizeCode, $tsCode, $originCode)") + case other => throw SparkException.internalError( + s"Unexpected bucketSize type: $other") + } + } + + override def prettyName: String = "time_bucket" + + override protected def withNewChildrenInternal( + newFirst: Expression, newSecond: Expression, newThird: Expression): TimeBucket = + copy(bucketSize = newFirst, ts = newSecond, originTs = newThird) +} + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + _FUNC_(bucketSize, ts[, origin]) - Returns the start of the bucket that `ts` falls into, + where buckets are defined by the given `bucketSize` interval aligned to `origin`. All + bucketing is performed on UTC micros, the session time zone does not affect bucket + alignment. For local wall-clock alignment in a DST zone, cast the TIMESTAMP to + TIMESTAMP_NTZ. + """, + arguments = """ + Arguments: + * bucketSize - A day-time or year-month interval defining the bucket size. Must be positive and foldable. + * ts - A TIMESTAMP or TIMESTAMP_NTZ value to bucket. + * origin - Optional TIMESTAMP or TIMESTAMP_NTZ alignment anchor. Defaults to 1970-01-01 00:00:00 (UTC for TIMESTAMP). Must be the same type as ts and must be foldable. + """, + examples = """ + Examples: + > SELECT _FUNC_(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00'); + 2024-01-01 11:15:00 + > SELECT _FUNC_(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00'); + 2024-01-01 11:00:00 + > SELECT _FUNC_(INTERVAL '1' MONTH, TIMESTAMP '2024-07-20 14:30:00', TIMESTAMP '2024-06-15 09:00:00'); + 2024-07-15 09:00:00 + """, + since = "4.2.0", + group = "datetime_funcs") +// scalastyle:on line.size.limit +object TimeBucketExpressionBuilder extends ExpressionBuilder { + private def retypeNull(e: Expression, dt: DataType): Expression = e match { + case Literal(null, NullType) => Literal(null, dt) + case _ => e + } + + override def build(funcName: String, expressions: Seq[Expression]): Expression = { + expressions match { + case Seq(rawBucketSize, rawTs) => + val bucketSize = retypeNull(rawBucketSize, DayTimeIntervalType()) + // Fall back to TimestampType for bad ts types; ExpectsInputTypes will report it. + val tsType = rawTs.dataType match { + case t if AnyTimestampType.acceptsType(t) => t + case _ => TimestampType + } + val ts = retypeNull(rawTs, tsType) + TimeBucket(bucketSize, ts, Literal(0L, tsType)) + case Seq(rawBucketSize, rawTs, rawOrigin) => + val bucketSize = retypeNull(rawBucketSize, DayTimeIntervalType()) + val tsType = (rawTs.dataType, rawOrigin.dataType) match { + case (NullType, t) if AnyTimestampType.acceptsType(t) => t + case (NullType, _) => TimestampType + case (t, _) => t + } + val ts = retypeNull(rawTs, tsType) + val originTs = retypeNull(rawOrigin, tsType) + TimeBucket(bucketSize, ts, originTs) + case _ => + throw QueryCompilationErrors.wrongNumArgsError( + funcName, Seq(2, 3), expressions.length) + } + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 82072443ec0ac..006847d89fd11 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -1059,4 +1059,59 @@ object DateTimeUtils extends SparkDateTimeUtils { time, timePrecision, interval, intervalEndField) } } + + /** + * DayTimeInterval bucketing: microsecond floor division against `originMicros`. + * Returns `originMicros + floorDiv(tsMicros - originMicros, bucketMicros) * bucketMicros`. + * + * `bucketMicros` must be positive; `TimeBucket.checkInputDataTypes` enforces + * this at analysis time. + * + * @param bucketMicros bucket size in microseconds. + * @param tsMicros timestamp to bucket, in microseconds since the epoch (UTC). + * @param originMicros grid alignment anchor, in microseconds since the epoch (UTC). + */ + def timeBucketDTInterval(bucketMicros: Long, tsMicros: Long, originMicros: Long): Long = { + val diff = Math.subtractExact(tsMicros, originMicros) + val bucketOffset = Math.multiplyExact(Math.floorDiv(diff, bucketMicros), bucketMicros) + Math.addExact(originMicros, bucketOffset) + } + + /** + * YearMonthInterval bucketing: month arithmetic with end-of-month capping and step-back. + * The origin's day-of-month and time-of-day determine the bucket boundaries. + * + * `bucketMonths` must be positive; `TimeBucket.checkInputDataTypes` enforces + * this at analysis time. + * + * @param bucketMonths bucket size in months. + * @param tsMicros timestamp to bucket, in microseconds since the epoch (UTC). + * @param originMicros grid alignment anchor, in microseconds since the epoch (UTC). + */ + def timeBucketYMInterval(bucketMonths: Int, tsMicros: Long, originMicros: Long): Long = { + val tsDays = microsToDays(tsMicros, ZoneOffset.UTC) + val originDays = microsToDays(originMicros, ZoneOffset.UTC) + val originTodMicros = + Math.subtractExact(originMicros, daysToMicros(originDays, ZoneOffset.UTC)) + + val tsDate = daysToLocalDate(tsDays) + val originDate = daysToLocalDate(originDays) + val rawMonthDiff = (tsDate.getYear.toLong * 12 + tsDate.getMonthValue) - + (originDate.getYear.toLong * 12 + originDate.getMonthValue) + + var k = Math.floorDiv(rawMonthDiff, bucketMonths.toLong) + var candidateDays = dateAddMonths(originDays, + Math.toIntExact(Math.multiplyExact(k, bucketMonths.toLong))) + var candidate = Math.addExact(daysToMicros(candidateDays, ZoneOffset.UTC), originTodMicros) + + // End-of-month capping in dateAddMonths can overshoot; step back one bucket if so. + if (candidate > tsMicros) { + k -= 1 + candidateDays = dateAddMonths(originDays, + Math.toIntExact(Math.multiplyExact(k, bucketMonths.toLong))) + candidate = Math.addExact(daysToMicros(candidateDays, ZoneOffset.UTC), originTodMicros) + } + + candidate + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 540c9830deb44..b29ffcb739afd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -29,7 +29,9 @@ import scala.reflect.ClassTag import scala.util.Random import org.apache.spark.{SparkArithmeticException, SparkDateTimeException, SparkFunSuite, SparkIllegalArgumentException, SparkUpgradeException} +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils, TimestampFormatter} import org.apache.spark.sql.catalyst.util.DateTimeConstants._ @@ -2313,4 +2315,186 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { null ) } + + test("time_bucket: day-time interval") { + val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US) + sdf.setTimeZone(TimeZone.getTimeZone(UTC)) + Seq(TimestampType, TimestampNTZType).foreach { dt => + // 15-minute bucket with epoch origin + checkEvaluation( + TimeBucket( + Literal(Duration.ofMinutes(15)), + timestampLiteral("2024-01-01 11:27:00.000", sdf, dt), + timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)), + timestampAnswer("2024-01-01 11:15:00.000", sdf, dt)) + // 1-hour bucket with custom origin (:05 alignment) + checkEvaluation( + TimeBucket( + Literal(Duration.ofHours(1)), + timestampLiteral("2024-01-01 11:27:00.000", sdf, dt), + timestampLiteral("1970-01-01 00:05:00.000", sdf, dt)), + timestampAnswer("2024-01-01 11:05:00.000", sdf, dt)) + // Pre-epoch ts + checkEvaluation( + TimeBucket( + Literal(Duration.ofDays(1)), + timestampLiteral("1969-12-31 23:30:00.000", sdf, dt), + timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)), + timestampAnswer("1969-12-31 00:00:00.000", sdf, dt)) + // NULL ts -> NULL + checkEvaluation( + TimeBucket( + Literal(Duration.ofHours(1)), + Literal.create(null, dt), + timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)), + null) + // NULL bucketSize -> NULL + checkEvaluation( + TimeBucket( + Literal.create(null, DayTimeIntervalType()), + timestampLiteral("2024-01-01 11:27:00.000", sdf, dt), + timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)), + null) + // NULL origin -> NULL + checkEvaluation( + TimeBucket( + Literal(Duration.ofHours(1)), + timestampLiteral("2024-01-01 11:27:00.000", sdf, dt), + Literal.create(null, dt)), + null) + } + } + + test("time_bucket: year-month interval") { + val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US) + sdf.setTimeZone(TimeZone.getTimeZone(UTC)) + Seq(TimestampType, TimestampNTZType).foreach { dt => + // 1-month bucket + checkEvaluation( + TimeBucket( + Literal(Period.ofMonths(1)), + timestampLiteral("2024-03-15 11:27:00.000", sdf, dt), + timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)), + timestampAnswer("2024-03-01 00:00:00.000", sdf, dt)) + // 3-month (quarterly) bucket + checkEvaluation( + TimeBucket( + Literal(Period.ofMonths(3)), + timestampLiteral("2024-05-15 10:00:00.000", sdf, dt), + timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)), + timestampAnswer("2024-04-01 00:00:00.000", sdf, dt)) + // End-of-month capping with step-back: origin on 1970-01-31, 1-month bucket, + // ts in early March of a leap year -> 2024-02-29. + checkEvaluation( + TimeBucket( + Literal(Period.ofMonths(1)), + timestampLiteral("2024-03-01 12:00:00.000", sdf, dt), + timestampLiteral("1970-01-31 00:00:00.000", sdf, dt)), + timestampAnswer("2024-02-29 00:00:00.000", sdf, dt)) + // NULL bucketSize (YM) -> NULL + checkEvaluation( + TimeBucket( + Literal.create(null, YearMonthIntervalType()), + timestampLiteral("2024-03-15 11:27:00.000", sdf, dt), + timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)), + null) + // NULL ts (YM) -> NULL + checkEvaluation( + TimeBucket( + Literal(Period.ofMonths(1)), + Literal.create(null, dt), + timestampLiteral("1970-01-01 00:00:00.000", sdf, dt)), + null) + // NULL origin (YM) -> NULL + checkEvaluation( + TimeBucket( + Literal(Period.ofMonths(1)), + timestampLiteral("2024-03-15 11:27:00.000", sdf, dt), + Literal.create(null, dt)), + null) + } + } + + test("time_bucket: checkInputDataTypes") { + val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US) + sdf.setTimeZone(TimeZone.getTimeZone(UTC)) + val tsLit = timestampLiteral("2024-01-01 00:00:00.000", sdf, TimestampType) + val originLit = tsLit + val hour = Literal(Duration.ofHours(1)) + + // Non-foldable bucketSize + val nonFoldableBucket = AttributeReference("bs", DayTimeIntervalType())() + val expr1 = TimeBucket(nonFoldableBucket, tsLit, originLit) + val r1 = expr1.checkInputDataTypes().asInstanceOf[DataTypeMismatch] + assert(r1.errorSubClass == "NON_FOLDABLE_INPUT") + assert(r1.messageParameters("inputName") == "`bucketSize`") + + // Non-foldable origin + val nonFoldableOrigin = AttributeReference("o", TimestampType)() + val expr2 = TimeBucket(hour, tsLit, nonFoldableOrigin) + val r2 = expr2.checkInputDataTypes().asInstanceOf[DataTypeMismatch] + assert(r2.errorSubClass == "NON_FOLDABLE_INPUT") + assert(r2.messageParameters("inputName") == "`origin`") + + // Non-positive DT bucketSize + val expr3 = TimeBucket(Literal(Duration.ofMinutes(0)), tsLit, originLit) + val r3 = expr3.checkInputDataTypes().asInstanceOf[DataTypeMismatch] + assert(r3.errorSubClass == "VALUE_OUT_OF_RANGE") + + // Non-positive YM bucketSize + val expr4 = TimeBucket(Literal(Period.ofMonths(-1)), tsLit, originLit) + val r4 = expr4.checkInputDataTypes().asInstanceOf[DataTypeMismatch] + assert(r4.errorSubClass == "VALUE_OUT_OF_RANGE") + + // ts/origin type mismatch: TIMESTAMP ts vs TIMESTAMP_NTZ origin + val ntzOrigin = Literal(LocalDateTime.of(1970, 1, 1, 0, 0, 0)) + val expr5 = TimeBucket(hour, tsLit, ntzOrigin) + val r5 = expr5.checkInputDataTypes().asInstanceOf[DataTypeMismatch] + assert(r5.errorSubClass == "UNEXPECTED_INPUT_TYPE") + } + + test("time_bucket: ExpressionBuilder") { + val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US) + sdf.setTimeZone(TimeZone.getTimeZone(UTC)) + val hour = Literal(Duration.ofHours(1)) + val ts = timestampLiteral("2024-01-01 11:27:00.000", sdf, TimestampType) + val tsNtz = timestampLiteral("2024-01-01 11:27:00.000", sdf, TimestampNTZType) + val ntzOrigin = timestampLiteral("1970-01-01 00:00:00.000", sdf, TimestampNTZType) + + // 2-arg: default origin is epoch with ts's type (TIMESTAMP) + val built1 = TimeBucketExpressionBuilder.build("time_bucket", Seq(hour, ts)) + .asInstanceOf[TimeBucket] + assert(built1.originTs == Literal(0L, TimestampType)) + + // 2-arg with TIMESTAMP_NTZ ts: default origin is epoch with TIMESTAMP_NTZ + val built2 = TimeBucketExpressionBuilder.build("time_bucket", Seq(hour, tsNtz)) + .asInstanceOf[TimeBucket] + assert(built2.originTs == Literal(0L, TimestampNTZType)) + + // NULL ts + TIMESTAMP_NTZ origin: ts retyped to TIMESTAMP_NTZ to match origin + val built3 = TimeBucketExpressionBuilder.build( + "time_bucket", Seq(hour, Literal(null, NullType), ntzOrigin)) + .asInstanceOf[TimeBucket] + assert(built3.ts.dataType == TimestampNTZType) + + // NULL origin + TIMESTAMP_NTZ ts: origin retyped to TIMESTAMP_NTZ to match ts + val built4 = TimeBucketExpressionBuilder.build( + "time_bucket", Seq(hour, tsNtz, Literal(null, NullType))) + .asInstanceOf[TimeBucket] + assert(built4.originTs.dataType == TimestampNTZType) + + // Bare NULL as bucketSize: retyped to DayTimeIntervalType + val built5 = TimeBucketExpressionBuilder.build( + "time_bucket", Seq(Literal(null, NullType), ts)) + .asInstanceOf[TimeBucket] + assert(built5.bucketSize.dataType == DayTimeIntervalType()) + + // Wrong arg count + intercept[AnalysisException] { + TimeBucketExpressionBuilder.build("time_bucket", Seq(hour)) + } + intercept[AnalysisException] { + TimeBucketExpressionBuilder.build("time_bucket", Seq(hour, ts, ts, ts)) + } + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 605a6ffafe8c4..6b3d2e9402a25 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -1525,4 +1525,87 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { assert(result === expected) } } + + test("timeBucketDTInterval") { + // 15-minute bucket with default (epoch) origin + assert(timeBucketDTInterval(15 * MICROS_PER_MINUTE, + date(2024, 1, 1, 11, 27, 0), 0L) === date(2024, 1, 1, 11, 15, 0)) + // 1-hour bucket + assert(timeBucketDTInterval(MICROS_PER_HOUR, + date(2024, 1, 1, 11, 27, 0), 0L) === date(2024, 1, 1, 11, 0, 0)) + // Custom origin shifts alignment: grid anchored at :05 + assert(timeBucketDTInterval(MICROS_PER_HOUR, + date(2024, 1, 1, 11, 27, 0), date(1970, 1, 1, 0, 5, 0)) + === date(2024, 1, 1, 11, 5, 0)) + // 7-day weekly bucket (epoch = Thursday, so buckets run Thu-Wed) + assert(timeBucketDTInterval(7 * MICROS_PER_DAY, + date(2024, 1, 10, 11, 27, 0), 0L) === date(2024, 1, 4, 0, 0, 0)) + // ts exactly on boundary returns same instant + assert(timeBucketDTInterval(15 * MICROS_PER_MINUTE, + date(2024, 1, 1, 11, 15, 0), 0L) === date(2024, 1, 1, 11, 15, 0)) + // Origin AFTER ts: floorDiv must handle negative diff correctly + assert(timeBucketDTInterval(MICROS_PER_HOUR, + date(2024, 1, 1, 11, 27, 0), date(2025, 1, 1, 0, 30, 0)) + === date(2024, 1, 1, 10, 30, 0)) + // Pre-epoch ts + assert(timeBucketDTInterval(MICROS_PER_DAY, + date(1969, 12, 31, 23, 30, 0), 0L) === date(1969, 12, 31, 0, 0, 0)) + // 1-microsecond bucket preserves exact value + assert(timeBucketDTInterval(1L, + date(2024, 6, 20, 10, 0, 0, 123456), 0L) + === date(2024, 6, 20, 10, 0, 0, 123456)) + // Overflow in subtractExact (ts - origin underflows below Long.MinValue) + intercept[ArithmeticException] { + timeBucketDTInterval(1L, Long.MinValue, Long.MaxValue) + } + // Overflow in subtractExact (ts - origin overflows above Long.MaxValue) + intercept[ArithmeticException] { + timeBucketDTInterval(1L, Long.MaxValue, -1L) + } + // Overflow in multiplyExact (floorDiv * bucketMicros) + intercept[ArithmeticException] { + timeBucketDTInterval(3L, Long.MinValue, 0L) + } + // Overflow in addExact (origin + bucketOffset) + intercept[ArithmeticException] { + timeBucketDTInterval(Long.MaxValue, -6L, -5L) + } + } + + test("timeBucketYMInterval") { + // 1-month bucket default origin + assert(timeBucketYMInterval(1, + date(2024, 3, 15, 11, 27, 0), 0L) === date(2024, 3, 1, 0, 0, 0)) + // 3-month (quarterly) bucket + assert(timeBucketYMInterval(3, + date(2024, 5, 15, 10, 0, 0), 0L) === date(2024, 4, 1, 0, 0, 0)) + // 12-month (yearly) bucket + assert(timeBucketYMInterval(12, + date(2024, 5, 15, 10, 0, 0), 0L) === date(2024, 1, 1, 0, 0, 0)) + // Monthly with origin on 15th: grid anchored at day-of-month = 15 + assert(timeBucketYMInterval(1, + date(2024, 3, 20, 9, 0, 0), date(1970, 1, 15, 0, 0, 0)) + === date(2024, 3, 15, 0, 0, 0)) + // End-of-month capping with step-back: origin on 1970-01-31, 1-month bucket. + // AddMonths(1970-01-31, k) caps to 2024-03-31 for large k (> ts); step back to + // 2024-02-29 (leap year). + assert(timeBucketYMInterval(1, + date(2024, 3, 1, 12, 0, 0), date(1970, 1, 31, 0, 0, 0)) + === date(2024, 2, 29, 0, 0, 0)) + // Leap-year capping: origin on Feb 29, 1-year bucket, non-leap target. + assert(timeBucketYMInterval(12, + date(2025, 3, 1, 0, 0, 0), date(2024, 2, 29, 0, 0, 0)) + === date(2025, 2, 28, 0, 0, 0)) + // Pre-epoch ts + assert(timeBucketYMInterval(1, + date(1968, 7, 15, 10, 0, 0), 0L) === date(1968, 7, 1, 0, 0, 0)) + // Extreme ts: daysToMicros on the resulting day count overflows via multiplyExact. + intercept[ArithmeticException] { + timeBucketYMInterval(1, Long.MinValue, 0L) + } + // Extreme origin: daysToMicros on originMicros's day count overflows via multiplyExact. + intercept[ArithmeticException] { + timeBucketYMInterval(1, 0L, Long.MinValue) + } + } } diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 14f36cbae055b..4a43bbcf853c5 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -363,6 +363,7 @@ | org.apache.spark.sql.catalyst.expressions.ThetaIntersection | theta_intersection | SELECT theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (5, 4), (1, 4), (2, 5), (2, 5), (3, 1) tab(col1, col2) | struct | | org.apache.spark.sql.catalyst.expressions.ThetaSketchEstimate | theta_sketch_estimate | SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.ThetaUnion | theta_union | SELECT theta_sketch_estimate(theta_union(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (1, 4), (1, 4), (2, 5), (2, 5), (3, 6) tab(col1, col2) | struct | +| org.apache.spark.sql.catalyst.expressions.TimeBucketExpressionBuilder | time_bucket | SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00') | struct | | org.apache.spark.sql.catalyst.expressions.TimeDiff | time_diff | SELECT time_diff('HOUR', TIME'20:30:29', TIME'21:30:28') | struct | | org.apache.spark.sql.catalyst.expressions.TimeFromMicros | time_from_micros | SELECT time_from_micros(0) | struct | | org.apache.spark.sql.catalyst.expressions.TimeFromMillis | time_from_millis | SELECT time_from_millis(0) | struct | diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/time-bucket.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/time-bucket.sql.out new file mode 100644 index 0000000000000..818bdef2bf55e --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/time-bucket.sql.out @@ -0,0 +1,890 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SET TIME ZONE 'UTC' +-- !query analysis +SetCommand (spark.sql.session.timeZone,Some(UTC)) + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:27:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '7' DAY, TIMESTAMP '2024-01-10 11:27:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1 00:30' DAY TO MINUTE, TIMESTAMP '2024-06-20 10:00:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '0.000001' SECOND, TIMESTAMP '2024-06-20 10:00:00.123456') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP_NTZ '2024-01-01 11:27:00') +-- !query analysis +Project [time_bucket(INTERVAL '15' MINUTE, 2024-01-01 11:27:00, 1970-01-01 00:00:00) AS time_bucket(INTERVAL '15' MINUTE, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP_NTZ '1970-01-01 00:00:00')#x] ++- OneRowRelation + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:05:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:15:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 11:27:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '2025-01-01 00:30:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-15 10:23:00', TIMESTAMP_NTZ '2024-01-15 00:30:00') +-- !query analysis +Project [time_bucket(INTERVAL '01' HOUR, 2024-01-15 10:23:00, 2024-01-15 00:30:00) AS time_bucket(INTERVAL '01' HOUR, TIMESTAMP_NTZ '2024-01-15 10:23:00', TIMESTAMP_NTZ '2024-01-15 00:30:00')#x] ++- OneRowRelation + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-15 11:27:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP '2024-05-15 10:00:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' YEAR, TIMESTAMP '2024-05-15 10:00:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1-3' YEAR TO MONTH, TIMESTAMP '2024-06-20 10:00:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP_NTZ '2024-03-15 11:27:00') +-- !query analysis +Project [time_bucket(INTERVAL '1' MONTH, 2024-03-15 11:27:00, 1970-01-01 00:00:00) AS time_bucket(INTERVAL '1' MONTH, TIMESTAMP_NTZ '2024-03-15 11:27:00', TIMESTAMP_NTZ '1970-01-01 00:00:00')#x] ++- OneRowRelation + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-01 00:00:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-20 09:00:00', TIMESTAMP '1970-01-15 00:00:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-01 12:00:00', TIMESTAMP '1970-01-31 00:00:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' YEAR, TIMESTAMP '2025-03-01 00:00:00', TIMESTAMP '2024-02-29 00:00:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP '2024-02-15 10:00:00', TIMESTAMP '2024-08-01 00:00:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP_NTZ '2024-08-20 14:30:00', TIMESTAMP_NTZ '2024-01-01 00:00:00') +-- !query analysis +Project [time_bucket(INTERVAL '3' MONTH, 2024-08-20 14:30:00, 2024-01-01 00:00:00) AS time_bucket(INTERVAL '3' MONTH, TIMESTAMP_NTZ '2024-08-20 14:30:00', TIMESTAMP_NTZ '2024-01-01 00:00:00')#x] ++- OneRowRelation + + +-- !query +SELECT time_bucket(INTERVAL '1' DAY, TIMESTAMP '1969-12-31 23:30:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '1969-12-31 23:30:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1960-06-15 00:30:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '1968-07-15 10:00:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '10' MINUTE + INTERVAL '5' MINUTE, TIMESTAMP '2024-06-20 09:47:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '2' MONTH + INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '2024-01-01 00:00:00' + INTERVAL '5' MINUTE) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00' + INTERVAL '30' MINUTE) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT t, time_bucket(INTERVAL '1' HOUR, t) AS bucket + FROM VALUES (TIMESTAMP '2024-01-15 10:23:00'), (TIMESTAMP '2024-01-15 14:45:00'), (CAST(NULL AS TIMESTAMP)) tab(t) + ORDER BY t +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT t, time_bucket(INTERVAL '15' MINUTE, t) AS bucket + FROM VALUES (TIMESTAMP_NTZ '2024-01-15 10:23:00'), (TIMESTAMP_NTZ '2024-01-15 14:07:00') tab(t) + ORDER BY t +-- !query analysis +Sort [t#x ASC NULLS FIRST], true ++- Project [t#x, time_bucket(INTERVAL '15' MINUTE, t#x, 1970-01-01 00:00:00) AS bucket#x] + +- SubqueryAlias tab + +- LocalRelation [t#x] + + +-- !query +SELECT t, time_bucket(INTERVAL '1' MONTH, t) AS bucket + FROM VALUES (TIMESTAMP '2024-03-15 10:23:00'), (TIMESTAMP '2024-06-01 00:00:00') tab(t) + ORDER BY t +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SET TIME ZONE 'America/Los_Angeles' +-- !query analysis +SetCommand (spark.sql.session.timeZone,Some(America/Los_Angeles)) + + +-- !query +SELECT t, time_bucket(INTERVAL '1' MONTH, CAST(t AS TIMESTAMP_NTZ)) AS bucket + FROM VALUES + (TIMESTAMP '2024-02-15 10:00:00'), + (TIMESTAMP '2024-03-15 10:00:00'), + (TIMESTAMP '2024-04-15 10:00:00') tab(t) + ORDER BY t +-- !query analysis +Sort [t#x ASC NULLS FIRST], true ++- Project [t#x, time_bucket(INTERVAL '1' MONTH, cast(t#x as timestamp_ntz), 1970-01-01 00:00:00) AS bucket#x] + +- SubqueryAlias tab + +- LocalRelation [t#x] + + +-- !query +SET TIME ZONE 'UTC' +-- !query analysis +SetCommand (spark.sql.session.timeZone,Some(UTC)) + + +-- !query +SELECT time_bucket(NULL, TIMESTAMP '2024-01-01 11:27:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, NULL) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', NULL) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, NULL, NULL) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, NULL, TIMESTAMP '2024-01-01 00:00:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, NULL, TIMESTAMP_NTZ '2024-01-01 00:00:00') +-- !query analysis +Project [time_bucket(INTERVAL '01' HOUR, null, 2024-01-01 00:00:00) AS time_bucket(INTERVAL '01' HOUR, NULL, TIMESTAMP_NTZ '2024-01-01 00:00:00')#x] ++- OneRowRelation + + +-- !query +SELECT time_bucket(INTERVAL '0' SECOND, TIMESTAMP '2024-01-01 11:00:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '00' SECOND", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket(INTERVAL '00' SECOND, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 72, + "fragment" : "time_bucket(INTERVAL '0' SECOND, TIMESTAMP '2024-01-01 11:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '0' MONTH, TIMESTAMP '2024-01-01 11:00:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '0' MONTH", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket(INTERVAL '0' MONTH, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 71, + "fragment" : "time_bucket(INTERVAL '0' MONTH, TIMESTAMP '2024-01-01 11:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '0' YEAR, TIMESTAMP '2024-01-01 11:00:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '0' YEAR", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket(INTERVAL '0' YEAR, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 70, + "fragment" : "time_bucket(INTERVAL '0' YEAR, TIMESTAMP '2024-01-01 11:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '-15' MINUTE, TIMESTAMP '2024-01-01 11:00:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '-15' MINUTE", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket(INTERVAL '-15' MINUTE, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 74, + "fragment" : "time_bucket(INTERVAL '-15' MINUTE, TIMESTAMP '2024-01-01 11:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '-1' MONTH, TIMESTAMP '2024-01-01 11:00:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '-1' MONTH", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket(INTERVAL '-1' MONTH, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 72, + "fragment" : "time_bucket(INTERVAL '-1' MONTH, TIMESTAMP '2024-01-01 11:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '-1' YEAR, TIMESTAMP '2024-01-01 11:00:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '-1' YEAR", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket(INTERVAL '-1' YEAR, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 71, + "fragment" : "time_bucket(INTERVAL '-1' YEAR, TIMESTAMP '2024-01-01 11:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '00' MINUTE", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket((INTERVAL '15' MINUTE - INTERVAL '15' MINUTE), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 96, + "fragment" : "time_bucket(INTERVAL '15' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '5' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '-10' MINUTE", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket((INTERVAL '05' MINUTE - INTERVAL '15' MINUTE), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 95, + "fragment" : "time_bucket(INTERVAL '5' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '3' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '0' MONTH", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket((INTERVAL '3' MONTH - INTERVAL '3' MONTH), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 92, + "fragment" : "time_bucket(INTERVAL '3' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '-2' MONTH", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket((INTERVAL '1' MONTH - INTERVAL '3' MONTH), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 92, + "fragment" : "time_bucket(INTERVAL '1' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP_NTZ '2024-01-01 00:00:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"TIMESTAMP_NTZ '2024-01-01 00:00:00'\"", + "inputType" : "\"TIMESTAMP_NTZ\"", + "paramIndex" : "third", + "requiredType" : "\"TIMESTAMP\"", + "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP_NTZ '2024-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 107, + "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP_NTZ '2024-01-01 00:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 00:00:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"TIMESTAMP '2024-01-01 00:00:00'\"", + "inputType" : "\"TIMESTAMP\"", + "paramIndex" : "third", + "requiredType" : "\"TIMESTAMP_NTZ\"", + "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 107, + "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 00:00:00')" + } ] +} + + +-- !query +SELECT time_bucket('15 minutes', TIMESTAMP '2024-01-15 10:23:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"15 minutes\"", + "inputType" : "\"STRING\"", + "paramIndex" : "first", + "requiredType" : "(\"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\")", + "sqlExpr" : "\"time_bucket(15 minutes, TIMESTAMP '2024-01-15 10:23:00', TIMESTAMP '1970-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 65, + "fragment" : "time_bucket('15 minutes', TIMESTAMP '2024-01-15 10:23:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE, DATE '2024-01-15') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"DATE '2024-01-15'\"", + "inputType" : "\"DATE\"", + "paramIndex" : "second", + "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"", + "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, DATE '2024-01-15', TIMESTAMP '1970-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 59, + "fragment" : "time_bucket(INTERVAL '15' MINUTE, DATE '2024-01-15')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE, '2024-01-15 10:23:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"2024-01-15 10:23:00\"", + "inputType" : "\"STRING\"", + "paramIndex" : "second", + "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"", + "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, 2024-01-15 10:23:00, TIMESTAMP '1970-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 63, + "fragment" : "time_bucket(INTERVAL '15' MINUTE, '2024-01-15 10:23:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', DATE '2024-01-01') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"DATE '2024-01-01'\"", + "inputType" : "\"DATE\"", + "paramIndex" : "third", + "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"", + "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', DATE '2024-01-01')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 92, + "fragment" : "time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', DATE '2024-01-01')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', '2024-01-01 00:00:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"2024-01-01 00:00:00\"", + "inputType" : "\"STRING\"", + "paramIndex" : "third", + "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"", + "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', 2024-01-01 00:00:00)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 96, + "fragment" : "time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', '2024-01-01 00:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00') FROM VALUES (INTERVAL '1' HOUR) tab(bs) +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"bs\"", + "inputName" : "`bucketSize`", + "inputType" : "\"INTERVAL HOUR\"", + "sqlExpr" : "\"time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 55, + "fragment" : "time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00') FROM VALUES (INTERVAL '1' MONTH) tab(bs) +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"bs\"", + "inputName" : "`bucketSize`", + "inputType" : "\"INTERVAL MONTH\"", + "sqlExpr" : "\"time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 55, + "fragment" : "time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket((SELECT INTERVAL '1' HOUR), TIMESTAMP '2024-06-20 09:47:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"scalarsubquery()\"", + "inputName" : "`bucketSize`", + "inputType" : "\"INTERVAL HOUR\"", + "sqlExpr" : "\"time_bucket(scalarsubquery(), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 79, + "fragment" : "time_bucket((SELECT INTERVAL '1' HOUR), TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket((SELECT INTERVAL '1' MONTH), TIMESTAMP '2024-06-20 09:47:00') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"scalarsubquery()\"", + "inputName" : "`bucketSize`", + "inputType" : "\"INTERVAL MONTH\"", + "sqlExpr" : "\"time_bucket(scalarsubquery(), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 80, + "fragment" : "time_bucket((SELECT INTERVAL '1' MONTH), TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', o) FROM VALUES (TIMESTAMP '2024-01-01 00:00:00') tab(o) +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"o\"", + "inputName" : "`origin`", + "inputType" : "\"TIMESTAMP\"", + "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-06-20 09:47:00', o)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 73, + "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', o)" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00', o) FROM VALUES (TIMESTAMP '2024-01-01 00:00:00') tab(o) +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"o\"", + "inputName" : "`origin`", + "inputType" : "\"TIMESTAMP\"", + "sqlExpr" : "\"time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00', o)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 74, + "fragment" : "time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00', o)" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', (SELECT TIMESTAMP '2024-01-01 00:00:00')) +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"scalarsubquery()\"", + "inputName" : "`origin`", + "inputType" : "\"TIMESTAMP\"", + "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-06-20 09:47:00', scalarsubquery())\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 112, + "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', (SELECT TIMESTAMP '2024-01-01 00:00:00'))" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "1", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "[2, 3]", + "functionName" : "`time_bucket`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 37, + "fragment" : "time_bucket(INTERVAL '1' HOUR)" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00', TIMESTAMP '1970-01-01 00:00:00') +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "4", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "[2, 3]", + "functionName" : "`time_bucket`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 136, + "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00', TIMESTAMP '1970-01-01 00:00:00')" + } ] +} diff --git a/sql/core/src/test/resources/sql-tests/inputs/time-bucket.sql b/sql/core/src/test/resources/sql-tests/inputs/time-bucket.sql new file mode 100644 index 0000000000000..d933cbec685c5 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/time-bucket.sql @@ -0,0 +1,224 @@ +-- time_bucket function tests + +-- Pin session timezone to UTC so rendered TIMESTAMP values in the golden file +-- are stable across CI and developer machines. time_bucket itself always +-- computes in UTC regardless of session timezone. +SET TIME ZONE 'UTC'; + + +-- DayTimeInterval buckets: default (epoch) origin + +-- 15-minute bucket +SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:27:00'); + +-- 1-hour bucket +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00'); + +-- 7-day (weekly) bucket (epoch is Thursday, so buckets run Thu-Wed) +SELECT time_bucket(INTERVAL '7' DAY, TIMESTAMP '2024-01-10 11:27:00'); + +-- Compound DayTimeInterval (1 day 30 minutes) +SELECT time_bucket(INTERVAL '1 00:30' DAY TO MINUTE, TIMESTAMP '2024-06-20 10:00:00'); + +-- 1-microsecond bucket (finest supported precision) +SELECT time_bucket(INTERVAL '0.000001' SECOND, TIMESTAMP '2024-06-20 10:00:00.123456'); + +-- DayTimeInterval bucket on TIMESTAMP_NTZ +SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP_NTZ '2024-01-01 11:27:00'); + + +-- DayTimeInterval buckets: explicit origin + +-- Custom origin at :05 shifts the grid so ts 11:27 lands in [11:05, 12:05) +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:05:00'); + +-- ts exactly on a bucket boundary returns ts +SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:15:00'); + +-- ts exactly equal to origin returns origin +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 11:27:00'); + +-- Origin after ts (exercises floorDiv on negative diff) +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '2025-01-01 00:30:00'); + +-- DayTimeInterval 3-arg with TIMESTAMP_NTZ +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-15 10:23:00', TIMESTAMP_NTZ '2024-01-15 00:30:00'); + + +-- YearMonthInterval buckets: default (epoch) origin + +-- 1-month bucket +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-15 11:27:00'); + +-- 3-month (quarterly) bucket +SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP '2024-05-15 10:00:00'); + +-- 1-year bucket +SELECT time_bucket(INTERVAL '1' YEAR, TIMESTAMP '2024-05-15 10:00:00'); + +-- Compound YearMonthInterval (1 year 3 months = 15 months) +SELECT time_bucket(INTERVAL '1-3' YEAR TO MONTH, TIMESTAMP '2024-06-20 10:00:00'); + +-- YearMonthInterval bucket on TIMESTAMP_NTZ +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP_NTZ '2024-03-15 11:27:00'); + +-- ts exactly on a bucket boundary +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-01 00:00:00'); + + +-- YearMonthInterval buckets: explicit origin + +-- Origin on 15th aligns grid to the 15th of each month +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-20 09:00:00', TIMESTAMP '1970-01-15 00:00:00'); + +-- End-of-month capping + step-back: origin Jan 31, 1-month bucket -> 2024-02-29 (leap year) +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-01 12:00:00', TIMESTAMP '1970-01-31 00:00:00'); + +-- Leap-year capping: origin Feb 29, 1-year bucket -> 2025-02-28 (non-leap target) +SELECT time_bucket(INTERVAL '1' YEAR, TIMESTAMP '2025-03-01 00:00:00', TIMESTAMP '2024-02-29 00:00:00'); + +-- Origin after ts (negative month diff) +SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP '2024-02-15 10:00:00', TIMESTAMP '2024-08-01 00:00:00'); + +-- YearMonthInterval 3-arg with TIMESTAMP_NTZ and custom origin +SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP_NTZ '2024-08-20 14:30:00', TIMESTAMP_NTZ '2024-01-01 00:00:00'); + + +-- Pre-epoch timestamps and origins + +-- Pre-epoch ts, 1-day bucket +SELECT time_bucket(INTERVAL '1' DAY, TIMESTAMP '1969-12-31 23:30:00'); + +-- Pre-epoch ts, 1-hour bucket +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '1969-12-31 23:30:00'); + +-- Pre-epoch origin with post-epoch ts +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1960-06-15 00:30:00'); + +-- Pre-epoch ts, YearMonthInterval bucket +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '1968-07-15 10:00:00'); + + +-- Foldable expressions (bucket_size, origin, and ts are folded at analysis time) + +-- Foldable DayTimeInterval arithmetic in bucket_size +SELECT time_bucket(INTERVAL '10' MINUTE + INTERVAL '5' MINUTE, TIMESTAMP '2024-06-20 09:47:00'); +SELECT time_bucket(INTERVAL '1' HOUR - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00'); + +-- Foldable YearMonthInterval arithmetic in bucket_size +SELECT time_bucket(INTERVAL '2' MONTH + INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00'); + +-- Foldable arithmetic in origin +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '2024-01-01 00:00:00' + INTERVAL '5' MINUTE); + +-- Foldable arithmetic in ts +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00' + INTERVAL '30' MINUTE); + + +-- Column reference as ts + +-- DayTimeInterval bucket over TIMESTAMP column (with one NULL row) +SELECT t, time_bucket(INTERVAL '1' HOUR, t) AS bucket + FROM VALUES (TIMESTAMP '2024-01-15 10:23:00'), (TIMESTAMP '2024-01-15 14:45:00'), (CAST(NULL AS TIMESTAMP)) tab(t) + ORDER BY t; + +-- DayTimeInterval bucket over TIMESTAMP_NTZ column +SELECT t, time_bucket(INTERVAL '15' MINUTE, t) AS bucket + FROM VALUES (TIMESTAMP_NTZ '2024-01-15 10:23:00'), (TIMESTAMP_NTZ '2024-01-15 14:07:00') tab(t) + ORDER BY t; + +-- YearMonthInterval bucket over TIMESTAMP column +SELECT t, time_bucket(INTERVAL '1' MONTH, t) AS bucket + FROM VALUES (TIMESTAMP '2024-03-15 10:23:00'), (TIMESTAMP '2024-06-01 00:00:00') tab(t) + ORDER BY t; + + +-- NTZ-cast pattern: monthly buckets stay on local month starts across a DST +-- boundary (America/Los_Angeles springs forward 2024-03-10). +SET TIME ZONE 'America/Los_Angeles'; +SELECT t, time_bucket(INTERVAL '1' MONTH, CAST(t AS TIMESTAMP_NTZ)) AS bucket + FROM VALUES + (TIMESTAMP '2024-02-15 10:00:00'), + (TIMESTAMP '2024-03-15 10:00:00'), + (TIMESTAMP '2024-04-15 10:00:00') tab(t) + ORDER BY t; +SET TIME ZONE 'UTC'; + + +-- NULL propagation + +-- Typed NULL for bucket_size, ts, or origin +SELECT time_bucket(NULL, TIMESTAMP '2024-01-01 11:27:00'); +SELECT time_bucket(INTERVAL '1' HOUR, NULL); +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', NULL); + +-- Both ts and origin NULL +SELECT time_bucket(INTERVAL '1' HOUR, NULL, NULL); + +-- NULL ts with explicit typed origin drives ts retyping via the builder +SELECT time_bucket(INTERVAL '1' HOUR, NULL, TIMESTAMP '2024-01-01 00:00:00'); +SELECT time_bucket(INTERVAL '1' HOUR, NULL, TIMESTAMP_NTZ '2024-01-01 00:00:00'); + + +-- Error: bucket_size must be positive + +-- Zero literal (DT, YM MONTH, YM YEAR) +SELECT time_bucket(INTERVAL '0' SECOND, TIMESTAMP '2024-01-01 11:00:00'); +SELECT time_bucket(INTERVAL '0' MONTH, TIMESTAMP '2024-01-01 11:00:00'); +SELECT time_bucket(INTERVAL '0' YEAR, TIMESTAMP '2024-01-01 11:00:00'); + +-- Negative literal (DT, YM MONTH, YM YEAR) +SELECT time_bucket(INTERVAL '-15' MINUTE, TIMESTAMP '2024-01-01 11:00:00'); +SELECT time_bucket(INTERVAL '-1' MONTH, TIMESTAMP '2024-01-01 11:00:00'); +SELECT time_bucket(INTERVAL '-1' YEAR, TIMESTAMP '2024-01-01 11:00:00'); + +-- Foldable arithmetic producing zero or negative +SELECT time_bucket(INTERVAL '15' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00'); +SELECT time_bucket(INTERVAL '5' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00'); +SELECT time_bucket(INTERVAL '3' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00'); +SELECT time_bucket(INTERVAL '1' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00'); + + +-- Error: argument types + +-- ts and origin must be the same TIMESTAMP flavor (both TIMESTAMP or both TIMESTAMP_NTZ) +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP_NTZ '2024-01-01 00:00:00'); +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 00:00:00'); + +-- bucket_size must be an interval (not a string) +SELECT time_bucket('15 minutes', TIMESTAMP '2024-01-15 10:23:00'); + +-- ts must be TIMESTAMP or TIMESTAMP_NTZ (not DATE or string) +SELECT time_bucket(INTERVAL '15' MINUTE, DATE '2024-01-15'); +SELECT time_bucket(INTERVAL '15' MINUTE, '2024-01-15 10:23:00'); + +-- origin must be TIMESTAMP or TIMESTAMP_NTZ (not DATE or string) +SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', DATE '2024-01-01'); +SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', '2024-01-01 00:00:00'); + + +-- Error: bucket_size and origin must be foldable + +-- Non-foldable bucket_size via column reference (DT and YM) +SELECT time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00') FROM VALUES (INTERVAL '1' HOUR) tab(bs); +SELECT time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00') FROM VALUES (INTERVAL '1' MONTH) tab(bs); + +-- Non-foldable bucket_size via scalar subquery (DT and YM) +SELECT time_bucket((SELECT INTERVAL '1' HOUR), TIMESTAMP '2024-06-20 09:47:00'); +SELECT time_bucket((SELECT INTERVAL '1' MONTH), TIMESTAMP '2024-06-20 09:47:00'); + +-- Non-foldable origin via column reference (DT and YM bucket) +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', o) FROM VALUES (TIMESTAMP '2024-01-01 00:00:00') tab(o); +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00', o) FROM VALUES (TIMESTAMP '2024-01-01 00:00:00') tab(o); + +-- Non-foldable origin via scalar subquery +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', (SELECT TIMESTAMP '2024-01-01 00:00:00')); + + +-- Error: wrong number of arguments + +-- 1-arg (too few) +SELECT time_bucket(INTERVAL '1' HOUR); + +-- 4-arg (too many) +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00', TIMESTAMP '1970-01-01 00:00:00'); diff --git a/sql/core/src/test/resources/sql-tests/results/time-bucket.sql.out b/sql/core/src/test/resources/sql-tests/results/time-bucket.sql.out new file mode 100644 index 0000000000000..714accadabf4b --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/time-bucket.sql.out @@ -0,0 +1,1025 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SET TIME ZONE 'UTC' +-- !query schema +struct +-- !query output +spark.sql.session.timeZone UTC + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:27:00') +-- !query schema +struct +-- !query output +2024-01-01 11:15:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00') +-- !query schema +struct +-- !query output +2024-01-01 11:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '7' DAY, TIMESTAMP '2024-01-10 11:27:00') +-- !query schema +struct +-- !query output +2024-01-04 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '1 00:30' DAY TO MINUTE, TIMESTAMP '2024-06-20 10:00:00') +-- !query schema +struct +-- !query output +2024-06-20 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '0.000001' SECOND, TIMESTAMP '2024-06-20 10:00:00.123456') +-- !query schema +struct +-- !query output +2024-06-20 10:00:00.123456 + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP_NTZ '2024-01-01 11:27:00') +-- !query schema +struct +-- !query output +2024-01-01 11:15:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:05:00') +-- !query schema +struct +-- !query output +2024-01-01 11:05:00 + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-01 11:15:00') +-- !query schema +struct +-- !query output +2024-01-01 11:15:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 11:27:00') +-- !query schema +struct +-- !query output +2024-01-01 11:27:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '2025-01-01 00:30:00') +-- !query schema +struct +-- !query output +2024-01-01 10:30:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-15 10:23:00', TIMESTAMP_NTZ '2024-01-15 00:30:00') +-- !query schema +struct +-- !query output +2024-01-15 09:30:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-15 11:27:00') +-- !query schema +struct +-- !query output +2024-03-01 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP '2024-05-15 10:00:00') +-- !query schema +struct +-- !query output +2024-04-01 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' YEAR, TIMESTAMP '2024-05-15 10:00:00') +-- !query schema +struct +-- !query output +2024-01-01 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '1-3' YEAR TO MONTH, TIMESTAMP '2024-06-20 10:00:00') +-- !query schema +struct +-- !query output +2023-10-01 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP_NTZ '2024-03-15 11:27:00') +-- !query schema +struct +-- !query output +2024-03-01 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-01 00:00:00') +-- !query schema +struct +-- !query output +2024-03-01 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-20 09:00:00', TIMESTAMP '1970-01-15 00:00:00') +-- !query schema +struct +-- !query output +2024-03-15 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-03-01 12:00:00', TIMESTAMP '1970-01-31 00:00:00') +-- !query schema +struct +-- !query output +2024-02-29 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' YEAR, TIMESTAMP '2025-03-01 00:00:00', TIMESTAMP '2024-02-29 00:00:00') +-- !query schema +struct +-- !query output +2025-02-28 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP '2024-02-15 10:00:00', TIMESTAMP '2024-08-01 00:00:00') +-- !query schema +struct +-- !query output +2024-02-01 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '3' MONTH, TIMESTAMP_NTZ '2024-08-20 14:30:00', TIMESTAMP_NTZ '2024-01-01 00:00:00') +-- !query schema +struct +-- !query output +2024-07-01 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' DAY, TIMESTAMP '1969-12-31 23:30:00') +-- !query schema +struct +-- !query output +1969-12-31 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '1969-12-31 23:30:00') +-- !query schema +struct +-- !query output +1969-12-31 23:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1960-06-15 00:30:00') +-- !query schema +struct +-- !query output +2024-01-01 10:30:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '1968-07-15 10:00:00') +-- !query schema +struct +-- !query output +1968-07-01 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '10' MINUTE + INTERVAL '5' MINUTE, TIMESTAMP '2024-06-20 09:47:00') +-- !query schema +struct +-- !query output +2024-06-20 09:45:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00') +-- !query schema +struct +-- !query output +2024-06-20 09:45:00 + + +-- !query +SELECT time_bucket(INTERVAL '2' MONTH + INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00') +-- !query schema +struct +-- !query output +2024-04-01 00:00:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '2024-01-01 00:00:00' + INTERVAL '5' MINUTE) +-- !query schema +struct +-- !query output +2024-06-20 09:05:00 + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00' + INTERVAL '30' MINUTE) +-- !query schema +struct +-- !query output +2024-06-20 10:00:00 + + +-- !query +SELECT t, time_bucket(INTERVAL '1' HOUR, t) AS bucket + FROM VALUES (TIMESTAMP '2024-01-15 10:23:00'), (TIMESTAMP '2024-01-15 14:45:00'), (CAST(NULL AS TIMESTAMP)) tab(t) + ORDER BY t +-- !query schema +struct +-- !query output +NULL NULL +2024-01-15 10:23:00 2024-01-15 10:00:00 +2024-01-15 14:45:00 2024-01-15 14:00:00 + + +-- !query +SELECT t, time_bucket(INTERVAL '15' MINUTE, t) AS bucket + FROM VALUES (TIMESTAMP_NTZ '2024-01-15 10:23:00'), (TIMESTAMP_NTZ '2024-01-15 14:07:00') tab(t) + ORDER BY t +-- !query schema +struct +-- !query output +2024-01-15 10:23:00 2024-01-15 10:15:00 +2024-01-15 14:07:00 2024-01-15 14:00:00 + + +-- !query +SELECT t, time_bucket(INTERVAL '1' MONTH, t) AS bucket + FROM VALUES (TIMESTAMP '2024-03-15 10:23:00'), (TIMESTAMP '2024-06-01 00:00:00') tab(t) + ORDER BY t +-- !query schema +struct +-- !query output +2024-03-15 10:23:00 2024-03-01 00:00:00 +2024-06-01 00:00:00 2024-06-01 00:00:00 + + +-- !query +SET TIME ZONE 'America/Los_Angeles' +-- !query schema +struct +-- !query output +spark.sql.session.timeZone America/Los_Angeles + + +-- !query +SELECT t, time_bucket(INTERVAL '1' MONTH, CAST(t AS TIMESTAMP_NTZ)) AS bucket + FROM VALUES + (TIMESTAMP '2024-02-15 10:00:00'), + (TIMESTAMP '2024-03-15 10:00:00'), + (TIMESTAMP '2024-04-15 10:00:00') tab(t) + ORDER BY t +-- !query schema +struct +-- !query output +2024-02-15 10:00:00 2024-02-01 00:00:00 +2024-03-15 10:00:00 2024-03-01 00:00:00 +2024-04-15 10:00:00 2024-04-01 00:00:00 + + +-- !query +SET TIME ZONE 'UTC' +-- !query schema +struct +-- !query output +spark.sql.session.timeZone UTC + + +-- !query +SELECT time_bucket(NULL, TIMESTAMP '2024-01-01 11:27:00') +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, NULL) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', NULL) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, NULL, NULL) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, NULL, TIMESTAMP '2024-01-01 00:00:00') +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, NULL, TIMESTAMP_NTZ '2024-01-01 00:00:00') +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT time_bucket(INTERVAL '0' SECOND, TIMESTAMP '2024-01-01 11:00:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '00' SECOND", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket(INTERVAL '00' SECOND, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 72, + "fragment" : "time_bucket(INTERVAL '0' SECOND, TIMESTAMP '2024-01-01 11:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '0' MONTH, TIMESTAMP '2024-01-01 11:00:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '0' MONTH", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket(INTERVAL '0' MONTH, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 71, + "fragment" : "time_bucket(INTERVAL '0' MONTH, TIMESTAMP '2024-01-01 11:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '0' YEAR, TIMESTAMP '2024-01-01 11:00:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '0' YEAR", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket(INTERVAL '0' YEAR, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 70, + "fragment" : "time_bucket(INTERVAL '0' YEAR, TIMESTAMP '2024-01-01 11:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '-15' MINUTE, TIMESTAMP '2024-01-01 11:00:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '-15' MINUTE", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket(INTERVAL '-15' MINUTE, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 74, + "fragment" : "time_bucket(INTERVAL '-15' MINUTE, TIMESTAMP '2024-01-01 11:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '-1' MONTH, TIMESTAMP '2024-01-01 11:00:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '-1' MONTH", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket(INTERVAL '-1' MONTH, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 72, + "fragment" : "time_bucket(INTERVAL '-1' MONTH, TIMESTAMP '2024-01-01 11:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '-1' YEAR, TIMESTAMP '2024-01-01 11:00:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '-1' YEAR", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket(INTERVAL '-1' YEAR, TIMESTAMP '2024-01-01 11:00:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 71, + "fragment" : "time_bucket(INTERVAL '-1' YEAR, TIMESTAMP '2024-01-01 11:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '00' MINUTE", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket((INTERVAL '15' MINUTE - INTERVAL '15' MINUTE), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 96, + "fragment" : "time_bucket(INTERVAL '15' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '5' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '-10' MINUTE", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket((INTERVAL '05' MINUTE - INTERVAL '15' MINUTE), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 95, + "fragment" : "time_bucket(INTERVAL '5' MINUTE - INTERVAL '15' MINUTE, TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '3' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '0' MONTH", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket((INTERVAL '3' MONTH - INTERVAL '3' MONTH), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 92, + "fragment" : "time_bucket(INTERVAL '3' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE", + "sqlState" : "42K09", + "messageParameters" : { + "currentValue" : "INTERVAL '-2' MONTH", + "exprName" : "time_bucket", + "sqlExpr" : "\"time_bucket((INTERVAL '1' MONTH - INTERVAL '3' MONTH), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"", + "valueRange" : "(0, inf)" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 92, + "fragment" : "time_bucket(INTERVAL '1' MONTH - INTERVAL '3' MONTH, TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP_NTZ '2024-01-01 00:00:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"TIMESTAMP_NTZ '2024-01-01 00:00:00'\"", + "inputType" : "\"TIMESTAMP_NTZ\"", + "paramIndex" : "third", + "requiredType" : "\"TIMESTAMP\"", + "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP_NTZ '2024-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 107, + "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP_NTZ '2024-01-01 00:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 00:00:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"TIMESTAMP '2024-01-01 00:00:00'\"", + "inputType" : "\"TIMESTAMP\"", + "paramIndex" : "third", + "requiredType" : "\"TIMESTAMP_NTZ\"", + "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 107, + "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP_NTZ '2024-01-01 11:27:00', TIMESTAMP '2024-01-01 00:00:00')" + } ] +} + + +-- !query +SELECT time_bucket('15 minutes', TIMESTAMP '2024-01-15 10:23:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"15 minutes\"", + "inputType" : "\"STRING\"", + "paramIndex" : "first", + "requiredType" : "(\"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\")", + "sqlExpr" : "\"time_bucket(15 minutes, TIMESTAMP '2024-01-15 10:23:00', TIMESTAMP '1970-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 65, + "fragment" : "time_bucket('15 minutes', TIMESTAMP '2024-01-15 10:23:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE, DATE '2024-01-15') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"DATE '2024-01-15'\"", + "inputType" : "\"DATE\"", + "paramIndex" : "second", + "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"", + "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, DATE '2024-01-15', TIMESTAMP '1970-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 59, + "fragment" : "time_bucket(INTERVAL '15' MINUTE, DATE '2024-01-15')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE, '2024-01-15 10:23:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"2024-01-15 10:23:00\"", + "inputType" : "\"STRING\"", + "paramIndex" : "second", + "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"", + "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, 2024-01-15 10:23:00, TIMESTAMP '1970-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 63, + "fragment" : "time_bucket(INTERVAL '15' MINUTE, '2024-01-15 10:23:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', DATE '2024-01-01') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"DATE '2024-01-01'\"", + "inputType" : "\"DATE\"", + "paramIndex" : "third", + "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"", + "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', DATE '2024-01-01')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 92, + "fragment" : "time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', DATE '2024-01-01')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', '2024-01-01 00:00:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"2024-01-01 00:00:00\"", + "inputType" : "\"STRING\"", + "paramIndex" : "third", + "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"", + "sqlExpr" : "\"time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', 2024-01-01 00:00:00)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 96, + "fragment" : "time_bucket(INTERVAL '15' MINUTE, TIMESTAMP '2024-01-15 10:23:00', '2024-01-01 00:00:00')" + } ] +} + + +-- !query +SELECT time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00') FROM VALUES (INTERVAL '1' HOUR) tab(bs) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"bs\"", + "inputName" : "`bucketSize`", + "inputType" : "\"INTERVAL HOUR\"", + "sqlExpr" : "\"time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 55, + "fragment" : "time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00') FROM VALUES (INTERVAL '1' MONTH) tab(bs) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"bs\"", + "inputName" : "`bucketSize`", + "inputType" : "\"INTERVAL MONTH\"", + "sqlExpr" : "\"time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 55, + "fragment" : "time_bucket(bs, TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket((SELECT INTERVAL '1' HOUR), TIMESTAMP '2024-06-20 09:47:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"scalarsubquery()\"", + "inputName" : "`bucketSize`", + "inputType" : "\"INTERVAL HOUR\"", + "sqlExpr" : "\"time_bucket(scalarsubquery(), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 79, + "fragment" : "time_bucket((SELECT INTERVAL '1' HOUR), TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket((SELECT INTERVAL '1' MONTH), TIMESTAMP '2024-06-20 09:47:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"scalarsubquery()\"", + "inputName" : "`bucketSize`", + "inputType" : "\"INTERVAL MONTH\"", + "sqlExpr" : "\"time_bucket(scalarsubquery(), TIMESTAMP '2024-06-20 09:47:00', TIMESTAMP '1970-01-01 00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 80, + "fragment" : "time_bucket((SELECT INTERVAL '1' MONTH), TIMESTAMP '2024-06-20 09:47:00')" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', o) FROM VALUES (TIMESTAMP '2024-01-01 00:00:00') tab(o) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"o\"", + "inputName" : "`origin`", + "inputType" : "\"TIMESTAMP\"", + "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-06-20 09:47:00', o)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 73, + "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', o)" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00', o) FROM VALUES (TIMESTAMP '2024-01-01 00:00:00') tab(o) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"o\"", + "inputName" : "`origin`", + "inputType" : "\"TIMESTAMP\"", + "sqlExpr" : "\"time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00', o)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 74, + "fragment" : "time_bucket(INTERVAL '1' MONTH, TIMESTAMP '2024-06-20 09:47:00', o)" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', (SELECT TIMESTAMP '2024-01-01 00:00:00')) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"scalarsubquery()\"", + "inputName" : "`origin`", + "inputType" : "\"TIMESTAMP\"", + "sqlExpr" : "\"time_bucket(INTERVAL '01' HOUR, TIMESTAMP '2024-06-20 09:47:00', scalarsubquery())\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 112, + "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-06-20 09:47:00', (SELECT TIMESTAMP '2024-01-01 00:00:00'))" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "1", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "[2, 3]", + "functionName" : "`time_bucket`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 37, + "fragment" : "time_bucket(INTERVAL '1' HOUR)" + } ] +} + + +-- !query +SELECT time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00', TIMESTAMP '1970-01-01 00:00:00') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "4", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "[2, 3]", + "functionName" : "`time_bucket`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 136, + "fragment" : "time_bucket(INTERVAL '1' HOUR, TIMESTAMP '2024-01-01 11:27:00', TIMESTAMP '1970-01-01 00:00:00', TIMESTAMP '1970-01-01 00:00:00')" + } ] +}