uncomment this code chunks
Opened this issue · 0 comments
EmilHvitfeldt commented
feature-engineering-az/categorical-cleaning.qmd
Lines 126 to 192 in 53646e2
```{r} | |
#| label: elevators-dv_manufacturer-stringdist | |
#| eval: false | |
#| echo: false | |
library(tidyverse) | |
self_stringdist <- function(x) { | |
x <- unique(x) | |
out <- stringdist::stringdistmatrix(x, x) |> | |
reshape2::melt() | |
out$Var1 <- x[out$Var1] | |
out$Var2 <- x[out$Var2] | |
out[out$value != 0, ] | |
} | |
elevators::elevators_raw |> | |
filter(!is.na(DV_MANUFACTURER)) |> | |
relocate(DV_MANUFACTURER) |> | |
pull(DV_MANUFACTURER) |> | |
self_stringdist() |> | |
arrange(value) | |
``` | |
```{r} | |
#| label: elevators-dv_manufacturer-table | |
#| eval: false | |
#| echo: false | |
library(tidyverse) | |
elevators::elevators_raw |> | |
select(DV_MANUFACTURER) |> | |
drop_na() |> | |
filter(str_detect(DV_MANUFACTURER, "^O")) |> | |
filter(str_detect(DV_MANUFACTURER, "OLD", negate = TRUE)) |> | |
pull(DV_MANUFACTURER) |> | |
table() | |
``` | |
```{r} | |
#| label: wine_ratings | |
#| eval: false | |
#| echo: false | |
wine_ratings <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-28/winemag-data-130k-v2.csv") | |
library(tidyverse) | |
wine_ratings |> | |
dplyr::count(region_1) |> | |
View() | |
library(tidyverse) | |
self_stringdist <- function(x) { | |
x <- unique(x) | |
out <- stringdist::stringdistmatrix(x, x) |> | |
reshape2::melt() | |
out$Var1 <- x[out$Var1] | |
out$Var2 <- x[out$Var2] | |
out[out$value != 0, ] | |
} | |
wine_ratings$region_1 |> | |
self_stringdist() |> | |
as_tibble() |> | |
arrange(value) | |
``` |