<- tibble(
customers id = 1:5,
city = c("Port Ellen", "Dufftown", NA, "Aberlour", "Tobermory"),
postcode = c("PA42 7DU", "AB55 4DH", NA, "AB38 7RY", "PA75 6NR")
)
<- tibble(
orders id = c(2, 3, 4, 4, 5, 5, 6, 6, 7),
items = c(10, 18, 21, 23, 9, 11, 11, 12, 3)
)
what doers dplyr mutate do?
<- tibble(
orders_B id = c(2, 3, 4, 4, 5, 5, 6, 6, 7),
items = c(10, 18, 21, 23, 9, 11, 11, 12, 3)
)
# add a new column
%>%
orders_B mutate(satisfaction = NA)
# A tibble: 9 × 3
id items satisfaction
<dbl> <dbl> <lgl>
1 2 10 NA
2 3 18 NA
3 4 21 NA
4 4 23 NA
5 5 9 NA
6 5 11 NA
7 6 11 NA
8 6 12 NA
9 7 3 NA
# add mutiplie new column
%>%
orders_B mutate(satisfaction = NA,
numbers = 1:9)
# A tibble: 9 × 4
id items satisfaction numbers
<dbl> <dbl> <lgl> <int>
1 2 10 NA 1
2 3 18 NA 2
3 4 21 NA 3
4 4 23 NA 4
5 5 9 NA 5
6 5 11 NA 6
7 6 11 NA 7
8 6 12 NA 8
9 7 3 NA 9
# assign back to tibble
<- orders_B %>%
orders_B mutate(satisfaction = NA,
numbers = 1:9)
%>%
orders_B mutate(numbers_as_strings = as.character(numbers))
# A tibble: 9 × 5
id items satisfaction numbers numbers_as_strings
<dbl> <dbl> <lgl> <int> <chr>
1 2 10 NA 1 1
2 3 18 NA 2 2
3 4 21 NA 3 3
4 4 23 NA 4 4
5 5 9 NA 5 5
6 5 11 NA 6 6
7 6 11 NA 7 7
8 6 12 NA 8 8
9 7 3 NA 9 9
%>%
orders_B mutate(items_minus_id = items - id)
# A tibble: 9 × 5
id items satisfaction numbers items_minus_id
<dbl> <dbl> <lgl> <int> <dbl>
1 2 10 NA 1 8
2 3 18 NA 2 15
3 4 21 NA 3 17
4 4 23 NA 4 19
5 5 9 NA 5 4
6 5 11 NA 6 6
7 6 11 NA 7 5
8 6 12 NA 8 6
9 7 3 NA 9 -4
Dplyr filter
<- full_join(customers, orders, by = "id") full_data
Warning in full_join(customers, orders, by = "id"): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 4 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
warning.
full_data
# A tibble: 10 × 4
id city postcode items
<dbl> <chr> <chr> <dbl>
1 1 Port Ellen PA42 7DU NA
2 2 Dufftown AB55 4DH 10
3 3 <NA> <NA> 18
4 4 Aberlour AB38 7RY 21
5 4 Aberlour AB38 7RY 23
6 5 Tobermory PA75 6NR 9
7 5 Tobermory PA75 6NR 11
8 6 <NA> <NA> 11
9 6 <NA> <NA> 12
10 7 <NA> <NA> 3
%>%
full_data filter(items > 10)
# A tibble: 6 × 4
id city postcode items
<dbl> <chr> <chr> <dbl>
1 3 <NA> <NA> 18
2 4 Aberlour AB38 7RY 21
3 4 Aberlour AB38 7RY 23
4 5 Tobermory PA75 6NR 11
5 6 <NA> <NA> 11
6 6 <NA> <NA> 12
%>%
full_data filter(postcode == "AB38 7RY")
# A tibble: 2 × 4
id city postcode items
<dbl> <chr> <chr> <dbl>
1 4 Aberlour AB38 7RY 21
2 4 Aberlour AB38 7RY 23
%>%
full_data filter(postcode == "AB38 7RY" & items > 21)
# A tibble: 1 × 4
id city postcode items
<dbl> <chr> <chr> <dbl>
1 4 Aberlour AB38 7RY 23
%>%
full_data filter(postcode == "AB38 7RY",
> 21) items
# A tibble: 1 × 4
id city postcode items
<dbl> <chr> <chr> <dbl>
1 4 Aberlour AB38 7RY 23
%>%
full_data filter(city != "Aberlour")
# A tibble: 4 × 4
id city postcode items
<dbl> <chr> <chr> <dbl>
1 1 Port Ellen PA42 7DU NA
2 2 Dufftown AB55 4DH 10
3 5 Tobermory PA75 6NR 9
4 5 Tobermory PA75 6NR 11
%>%
full_data filter( is.na(city) == FALSE,
is.na(items) == FALSE)
# A tibble: 5 × 4
id city postcode items
<dbl> <chr> <chr> <dbl>
1 2 Dufftown AB55 4DH 10
2 4 Aberlour AB38 7RY 21
3 4 Aberlour AB38 7RY 23
4 5 Tobermory PA75 6NR 9
5 5 Tobermory PA75 6NR 11
%>%
full_data filter( is.na(city) == TRUE | is.na(items) == TRUE)
# A tibble: 5 × 4
id city postcode items
<dbl> <chr> <chr> <dbl>
1 1 Port Ellen PA42 7DU NA
2 3 <NA> <NA> 18
3 6 <NA> <NA> 11
4 6 <NA> <NA> 12
5 7 <NA> <NA> 3