CrumpLabExampleBlog - Data Tidying

look at untidy data

untidy_data <- read_csv("data/untidy_data.csv", show_col_types = FALSE)

knitr::kable(untidy_data)

customer_id	itemsprice_2018	itemsprice_2019	itemsprice_2020	totalprice_2018	totalprice_2019	totalprice_2020
1	2 (3.91)	8 (4.72)	10 (5.59)	7.82	37.76	55.90
2	1 (3.91)	6 (4.72)	1 (5.59)	3.91	28.32	5.59
3	4 (3.91)	5 (4.72)	5 (5.59)	15.64	23.60	27.95
4	10 (3.91)	1 (4.72)	3 (5.59)	39.10	4.72	16.77
5	3 (3.91)	9 (4.72)	8 (5.59)	11.73	42.48	44.72

look at tidy data

tidy_data <- read_csv("data/tidy_data.csv", show_col_types = FALSE)

knitr::kable(tidy_data)

customer_id	year	items	price_per_item	totalprice
1	2018	2	3.91	7.82
1	2019	8	4.72	37.76
1	2020	10	5.59	55.90
2	2018	1	3.91	3.91
2	2019	6	4.72	28.32
2	2020	1	5.59	5.59
3	2018	4	3.91	15.64
3	2019	5	4.72	23.60
3	2020	5	5.59	27.95
4	2018	10	3.91	39.10
4	2019	1	4.72	4.72
4	2020	3	5.59	16.77
5	2018	3	3.91	11.73
5	2019	9	4.72	42.48
5	2020	8	5.59	44.72

convert messy to tidy

tidy_data_2 <- read_csv(file = "data/untidy_data.csv",
                      show_col_types = FALSE) %>%
  pivot_longer(
    cols = itemsprice_2018:totalprice_2020,
    names_to = c("category", "year"),
    names_sep = "_", 
    values_to = "value", 
    names_transform = list(year = as.integer),
    values_transform = list(value = as.character) 
  ) %>%
  pivot_wider(
    id_cols = c(customer_id, year),
    names_from = category,
    values_from = value
  ) %>%
  separate(
    col = itemsprice,
    into = c("items", "price_per_item"),
    sep = " ", 
    remove = TRUE, 
    convert = TRUE
  ) %>%
  mutate(
    price_per_item = stringr::str_replace_all(
      string = price_per_item, 
      pattern = "[()]", 
      replacement = ""
    )
  ) %>%
  type_convert(
    trim_ws = TRUE
  )


── Column specification ────────────────────────────────────────────────────────
cols(
  price_per_item = col_double(),
  totalprice = col_double()
)