Replace values by different values
Description
This allows one to recode values in a column.
Usage
<Expr>$replace(old, new, default = NULL, return_dtype = NULL)
Arguments
old
|
Can be several things:
|
new
|
Either a vector of length 1, a vector of same length as old
or an Expr. If missing, old must be a named list.
|
default
|
The default replacement if the value is not in old . Can be
an Expr. If NULL (default), then the value doesn’t change.
|
return_dtype
|
The data type of the resulting expression. If set to NULL
(default), the data type is determined automatically based on the other
inputs.
|
Value
Expr
Examples
library(polars)
df = pl$DataFrame(a = c(1, 2, 2, 3))
# "old" and "new" can take vectors of length 1 or of same length
df$with_columns(replaced = pl$col("a")$replace(2, 100))
#> shape: (4, 2)
#> ┌─────┬──────────┐
#> │ a ┆ replaced │
#> │ --- ┆ --- │
#> │ f64 ┆ f64 │
#> ╞═════╪══════════╡
#> │ 1.0 ┆ 1.0 │
#> │ 2.0 ┆ 100.0 │
#> │ 2.0 ┆ 100.0 │
#> │ 3.0 ┆ 3.0 │
#> └─────┴──────────┘
#> shape: (4, 2)
#> ┌─────┬──────────┐
#> │ a ┆ replaced │
#> │ --- ┆ --- │
#> │ f64 ┆ f64 │
#> ╞═════╪══════════╡
#> │ 1.0 ┆ 1.0 │
#> │ 2.0 ┆ 100.0 │
#> │ 2.0 ┆ 100.0 │
#> │ 3.0 ┆ 200.0 │
#> └─────┴──────────┘
# "old" can be a named list where names are values to replace, and values are
# the replacements
mapping = list(`2` = 100, `3` = 200)
df$with_columns(replaced = pl$col("a")$replace(mapping, default = -1))
#> shape: (4, 2)
#> ┌─────┬──────────┐
#> │ a ┆ replaced │
#> │ --- ┆ --- │
#> │ f64 ┆ f64 │
#> ╞═════╪══════════╡
#> │ 1.0 ┆ -1.0 │
#> │ 2.0 ┆ 100.0 │
#> │ 2.0 ┆ 100.0 │
#> │ 3.0 ┆ 200.0 │
#> └─────┴──────────┘
df = pl$DataFrame(a = c("x", "y", "z"))
mapping = list(x = 1, y = 2, z = 3)
df$with_columns(replaced = pl$col("a")$replace(mapping))
#> shape: (3, 2)
#> ┌─────┬──────────┐
#> │ a ┆ replaced │
#> │ --- ┆ --- │
#> │ str ┆ str │
#> ╞═════╪══════════╡
#> │ x ┆ 1.0 │
#> │ y ┆ 2.0 │
#> │ z ┆ 3.0 │
#> └─────┴──────────┘
# one can specify the data type to return instead of automatically inferring it
df$with_columns(replaced = pl$col("a")$replace(mapping, return_dtype = pl$Int8))
#> shape: (3, 2)
#> ┌─────┬──────────┐
#> │ a ┆ replaced │
#> │ --- ┆ --- │
#> │ str ┆ i8 │
#> ╞═════╪══════════╡
#> │ x ┆ 1 │
#> │ y ┆ 2 │
#> │ z ┆ 3 │
#> └─────┴──────────┘
# "old", "new", and "default" can take Expr
df = pl$DataFrame(a = c(1, 2, 2, 3), b = c(1.5, 2.5, 5, 1))
df$with_columns(
replaced = pl$col("a")$replace(
old = pl$col("a")$max(),
new = pl$col("b")$sum(),
default = pl$col("b"),
)
)
#> shape: (4, 3)
#> ┌─────┬─────┬──────────┐
#> │ a ┆ b ┆ replaced │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪══════════╡
#> │ 1.0 ┆ 1.5 ┆ 1.5 │
#> │ 2.0 ┆ 2.5 ┆ 2.5 │
#> │ 2.0 ┆ 5.0 ┆ 5.0 │
#> │ 3.0 ┆ 1.0 ┆ 10.0 │
#> └─────┴─────┴──────────┘