Convert a String column into a Date/Datetime/Time column.
Description
Similar to the strptime() function.
Usage
<Expr>$str$strptime(
dtype,
format = NULL,
...,
strict = TRUE,
exact = TRUE,
cache = TRUE,
ambiguous = "raise"
)
Arguments
dtype
|
The data type to convert into. Can be either pl$Date,
pl$Datetime(), or pl$Time.
|
format
|
Format to use for conversion. Refer to
the
chrono crate documentation for the full specification. Example:
“%Y-%m-%d %H:%M:%S”. If NULL (default), the
format is inferred from the data. Notice that time zone
%Z is not supported and will just
ignore timezones. Numeric time zones like
%z or
%:z are supported.
|
…
|
Not used. |
strict
|
If TRUE (default), raise an error if a single string cannot
be parsed. If FALSE, produce a polars null.
|
exact
|
If TRUE (default), require an exact format match. If
FALSE, allow the format to match anywhere in the target
string. Conversion to the Time type is always exact. Note that using
exact = FALSE introduces a performance penalty - cleaning
your data beforehand will almost certainly be more performant.
|
cache
|
Use a cache of unique, converted dates to apply the datetime conversion. |
ambiguous
|
Determine how to deal with ambiguous datetimes:
|
Details
When parsing a Datetime the column precision will be inferred from the
format string, if given, e.g.: “%F %T%.3f” =>
pl$Datetime("ms"). If no fractional second component is
found then the default is “us” (microsecond).
Value
Expr of Date, Datetime or Time type
See Also
-
\$str$to_date() -
\$str$to_datetime() -
\$str$to_time()
Examples
library("polars0")
# Dealing with a consistent format
s = as_polars_series(c("2020-01-01 01:00Z", "2020-01-01 02:00Z"))
s$str$strptime(pl$Datetime(), "%Y-%m-%d %H:%M%#z")
#> polars Series: shape: (2,)
#> Series: '' [datetime[μs, UTC]]
#> [
#> 2020-01-01 01:00:00 UTC
#> 2020-01-01 02:00:00 UTC
#> ]
#> polars Series: shape: (2,)
#> Series: '' [datetime[μs, UTC]]
#> [
#> 2020-01-01 01:00:00 UTC
#> 2020-01-01 02:00:00 UTC
#> ]
# Datetime with timezone is interpreted as UTC timezone
as_polars_series("2020-01-01T01:00:00+09:00")$str$strptime(pl$Datetime())
#> polars Series: shape: (1,)
#> Series: '' [datetime[μs, UTC]]
#> [
#> 2019-12-31 16:00:00 UTC
#> ]
# Dealing with different formats.
s = as_polars_series(
c(
"2021-04-22",
"2022-01-04 00:00:00",
"01/31/22",
"Sun Jul 8 00:34:60 2001"
),
"date"
)
s$to_frame()$select(
pl$coalesce(
pl$col("date")$str$strptime(pl$Date, "%F", strict = FALSE),
pl$col("date")$str$strptime(pl$Date, "%F %T", strict = FALSE),
pl$col("date")$str$strptime(pl$Date, "%D", strict = FALSE),
pl$col("date")$str$strptime(pl$Date, "%c", strict = FALSE)
)
)
#> shape: (4, 1)
#> ┌────────────┐
#> │ date │
#> │ --- │
#> │ date │
#> ╞════════════╡
#> │ 2021-04-22 │
#> │ 2022-01-04 │
#> │ 2022-01-31 │
#> │ 2001-07-08 │
#> └────────────┘
# Ignore invalid time
s = as_polars_series(
c(
"2023-01-01 11:22:33 -0100",
"2023-01-01 11:22:33 +0300",
"invalid time"
)
)
s$str$strptime(
pl$Datetime("ns"),
format = "%Y-%m-%d %H:%M:%S %z",
strict = FALSE
)
#> polars Series: shape: (3,)
#> Series: '' [datetime[ns, UTC]]
#> [
#> 2023-01-01 12:22:33 UTC
#> 2023-01-01 08:22:33 UTC
#> null
#> ]