library(tidyverse) # includes readr & tibble
library(rio) # for almost any data import/export
library(haven) # for SPSS, Stata,and SAS files
library(readxl) # for Excel files
library(googlesheets4) # for Google Sheets
Load libraries
Options for viewing data
data(cars) # make data from R package available in global environment
#View(cars) # view in editor pane
#print(cars) # print to console
#cars # print to console
# show columns as rows
glimpse(cars)
Rows: 50
Columns: 2
$ speed <dbl> 4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13, 13…
$ dist <dbl> 2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24, 28, 26, 34…
#show first 10 lines of table
head(cars)
speed dist
1 4 2
2 4 10
3 7 4
4 7 22
5 8 16
6 9 10
# print nicely to web
::kable(head(cars)) knitr
speed | dist |
---|---|
4 | 2 |
4 | 10 |
7 | 4 |
7 | 22 |
8 | 16 |
9 | 10 |
Produce summaries
summary(cars)
speed dist
Min. : 4.0 Min. : 2.00
1st Qu.:12.0 1st Qu.: 26.00
Median :15.0 Median : 36.00
Mean :15.4 Mean : 42.98
3rd Qu.:19.0 3rd Qu.: 56.00
Max. :25.0 Max. :120.00
library(summarytools)
#view(dfSummary(starwars))
Importing data
Data was downloaded from link in textbook.
library(rio)
<- import(file = "data/demo.tsv")
demo_tsv
glimpse(demo_tsv)
Rows: 6
Columns: 6
$ character <chr> "A", "B", "C", "D", "E", "F"
$ factor <chr> "high", "low", "med", "high", "low", "med"
$ integer <int> 1, 2, 3, 4, 5, 6
$ double <dbl> 1.5, 2.5, 3.5, 4.5, 5.5, 6.5
$ logical <lgl> TRUE, TRUE, FALSE, FALSE, NA, TRUE
$ date <IDate> 2022-04-04, 2022-04-03, 2022-04-02, 2022-04-01, 2022-03-31, …
$factor <- as.factor(demo_tsv$factor)
demo_tsv
glimpse(demo_tsv)
Rows: 6
Columns: 6
$ character <chr> "A", "B", "C", "D", "E", "F"
$ factor <fct> high, low, med, high, low, med
$ integer <int> 1, 2, 3, 4, 5, 6
$ double <dbl> 1.5, 2.5, 3.5, 4.5, 5.5, 6.5
$ logical <lgl> TRUE, TRUE, FALSE, FALSE, NA, TRUE
$ date <IDate> 2022-04-04, 2022-04-03, 2022-04-02, 2022-04-01, 2022-03-31, …
$
demo_tsv$factor demo_tsv
NULL
<- import("data/demo.tsv") # tab-separated values
demo_tsv <- import("data/demo.csv") # comma-separated values
demo_csv <- import("data/demo.xlsx") # Excel format
demo_xls <- import("data/demo.sav") # SPSS format demo_sav
gs4_deauth() # skip authorisation for public data
<- googlesheets4::read_sheet(
demo_gs4 ss = "16dkq0YL0J7fyAwT1pdgj1bNNrheckAU_2-DKuuM6aGI"
)
gs4_get(ss = "16dkq0YL0J7fyAwT1pdgj1bNNrheckAU_2-DKuuM6aGI")
Spreadsheet name: demo
ID: 16dkq0YL0J7fyAwT1pdgj1bNNrheckAU_2-DKuuM6aGI
Locale: en_US
Time zone: Europe/London
# of sheets: 1
(Sheet name): (Nominal extent in rows x columns)
Sheet1: 1000 x 26
<- readr::read_csv("data/demo.csv")
demo
spec(demo)
cols(
character = col_character(),
factor = col_character(),
integer = col_double(),
double = col_double(),
logical = col_logical(),
date = col_date(format = "")
)
<- cols(
corrected_cols character = col_character(),
factor = col_factor(levels = c("low", "med", "high")),
integer = col_integer(),
double = col_double(),
logical = col_logical(),
date = col_date(format = "%Y-%m-%d")
)
<- readr::read_csv("data/demo.csv", col_types = corrected_cols)
demo
glimpse(demo)
Rows: 6
Columns: 6
$ character <chr> "A", "B", "C", "D", "E", "F"
$ factor <fct> high, low, med, high, low, med
$ integer <int> 1, 2, 3, 4, 5, 6
$ double <dbl> 1.5, 2.5, 3.5, 4.5, 5.5, 6.5
$ logical <lgl> TRUE, TRUE, FALSE, FALSE, NA, TRUE
$ date <date> 2022-04-04, 2022-04-03, 2022-04-02, 2022-04-01, 2022-03-31, …
Write your own data
<- tibble(
avatar name = c("Katara", "Toph", "Sokka"),
bends = c("water", "earth", NA),
friendly = TRUE,
numbers = c(1,2,3),
more_numbers = 1:3,
even_more = seq(1,3,1)
)
# print it
avatar
# A tibble: 3 × 6
name bends friendly numbers more_numbers even_more
<chr> <chr> <lgl> <dbl> <int> <dbl>
1 Katara water TRUE 1 1 1
2 Toph earth TRUE 2 2 2
3 Sokka <NA> TRUE 3 3 3
<- tribble(
avatar_by_row ~name, ~bends, ~friendly, ~numbers,
"Katara", "water", TRUE, 1L,
"Toph", "earth", TRUE, 2L,
"Sokka", NA, TRUE, 3L
)
avatar_by_row
# A tibble: 3 × 4
name bends friendly numbers
<chr> <chr> <lgl> <int>
1 Katara water TRUE 1
2 Toph earth TRUE 2
3 Sokka <NA> TRUE 3
Export data
export(avatar, "avatar.csv")