Notes: Setting up R environment by loading the PACKAGES and LIBRARIES
#install.packages("tidyverse")
#install.packages("skimr")
#install.packages("janitor")
#install.packages("lubridate")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(skimr)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(readxl)
trip03_22 <- read_excel("C:/Users/kchud/NewOne/PORTFOLIO/Cyclistic_Bikeshare/data/excel/202203-divvy-tripdata.xlsx")
trip02_22 <- read_excel("C:/Users/kchud/NewOne/PORTFOLIO/Cyclistic_Bikeshare/data/excel/202202-divvy-tripdata.xlsx")
trip01_22 <- read_excel("C:/Users/kchud/NewOne/PORTFOLIO/Cyclistic_Bikeshare/data/excel/202201-divvy-tripdata.xlsx")
trip12_21 <- read_excel("C:/Users/kchud/NewOne/PORTFOLIO/Cyclistic_Bikeshare/data/excel/202112-divvy-tripdata.xlsx")
trip11_21 <- read_excel("C:/Users/kchud/NewOne/PORTFOLIO/Cyclistic_Bikeshare/data/excel/202111-divvy-tripdata.xlsx")
trip10_21 <- read_excel("C:/Users/kchud/NewOne/PORTFOLIO/Cyclistic_Bikeshare/data/excel/202110-divvy-tripdata.xlsx")
trip09_21 <- read_excel("C:/Users/kchud/NewOne/PORTFOLIO/Cyclistic_Bikeshare/data/excel/202109-divvy-tripdata.xlsx")
trip08_21 <- read_excel("C:/Users/kchud/NewOne/PORTFOLIO/Cyclistic_Bikeshare/data/excel/202108-divvy-tripdata.xlsx")
trip07_21 <- read_excel("C:/Users/kchud/NewOne/PORTFOLIO/Cyclistic_Bikeshare/data/excel/202107-divvy-tripdata.xlsx")
trip06_21 <- read_excel("C:/Users/kchud/NewOne/PORTFOLIO/Cyclistic_Bikeshare/data/excel/202106-divvy-tripdata.xlsx")
trip05_21 <- read_excel("C:/Users/kchud/NewOne/PORTFOLIO/Cyclistic_Bikeshare/data/excel/202105-divvy-tripdata.xlsx")
trip04_21 <- read_excel("C:/Users/kchud/NewOne/PORTFOLIO/Cyclistic_Bikeshare/data/excel/202104-divvy-tripdata.xlsx")
colnames(trip03_22)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual"
colnames(trip02_22)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual"
colnames(trip01_22)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual"
colnames(trip12_21)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual"
colnames(trip11_21)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual"
colnames(trip10_21)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual"
colnames(trip09_21)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual"
colnames(trip08_21)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual"
colnames(trip07_21)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual"
colnames(trip06_21)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual"
colnames(trip05_21)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual"
colnames(trip04_21)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual"
str(trip03_22)
## tibble [284,042 x 13] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:284042] "47EC0A7F82E65D52" "8494861979B0F477" "EFE527AF80B66109" "9F446FD9DEE3F389" ...
## $ rideable_type : chr [1:284042] "classic_bike" "electric_bike" "classic_bike" "classic_bike" ...
## $ started_at : POSIXct[1:284042], format: "2022-03-21 13:45:01" "2022-03-16 09:37:16" ...
## $ ended_at : POSIXct[1:284042], format: "2022-03-21 13:51:18" "2022-03-16 09:43:34" ...
## $ start_station_name: chr [1:284042] "Wabash Ave & Wacker Pl" "Michigan Ave & Oak St" "Broadway & Berwyn Ave" "Wabash Ave & Wacker Pl" ...
## $ start_station_id : chr [1:284042] "TA1307000131" "13042" "13109" "TA1307000131" ...
## $ end_station_name : chr [1:284042] "Kingsbury St & Kinzie St" "Orleans St & Chestnut St (NEXT Apts)" "Broadway & Ridge Ave" "Franklin St & Jackson Blvd" ...
## $ end_station_id : chr [1:284042] "KA1503000043" "620" "15578" "TA1305000025" ...
## $ start_lat : num [1:284042] 41.9 41.9 42 41.9 41.9 ...
## $ start_lng : num [1:284042] -87.6 -87.6 -87.7 -87.6 -87.6 ...
## $ end_lat : num [1:284042] 41.9 41.9 42 41.9 41.9 ...
## $ end_lng : num [1:284042] -87.6 -87.6 -87.7 -87.6 -87.7 ...
## $ member_casual : chr [1:284042] "member" "member" "member" "member" ...
str(trip02_22)
## tibble [115,609 x 13] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:115609] "E1E065E7ED285C02" "1602DCDC5B30FFE3" "BE7DD2AF4B55C4AF" "A1789BDF844412BE" ...
## $ rideable_type : chr [1:115609] "classic_bike" "classic_bike" "classic_bike" "classic_bike" ...
## $ started_at : POSIXct[1:115609], format: "2022-02-19 18:08:41" "2022-02-20 17:41:30" ...
## $ ended_at : POSIXct[1:115609], format: "2022-02-19 18:23:56" "2022-02-20 17:45:56" ...
## $ start_station_name: chr [1:115609] "State St & Randolph St" "Halsted St & Wrightwood Ave" "State St & Randolph St" "Southport Ave & Waveland Ave" ...
## $ start_station_id : chr [1:115609] "TA1305000029" "TA1309000061" "TA1305000029" "13235" ...
## $ end_station_name : chr [1:115609] "Clark St & Lincoln Ave" "Southport Ave & Wrightwood Ave" "Canal St & Adams St" "Broadway & Sheridan Rd" ...
## $ end_station_id : chr [1:115609] "13179" "TA1307000113" "13011" "13323" ...
## $ start_lat : num [1:115609] 41.9 41.9 41.9 41.9 41.9 ...
## $ start_lng : num [1:115609] -87.6 -87.6 -87.6 -87.7 -87.6 ...
## $ end_lat : num [1:115609] 41.9 41.9 41.9 42 41.9 ...
## $ end_lng : num [1:115609] -87.6 -87.7 -87.6 -87.6 -87.6 ...
## $ member_casual : chr [1:115609] "member" "member" "member" "member" ...
str(trip01_22)
## tibble [103,770 x 13] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:103770] "C2F7DD78E82EC875" "A6CF8980A652D272" "BD0F91DFF741C66D" "CBB80ED419105406" ...
## $ rideable_type : chr [1:103770] "electric_bike" "electric_bike" "classic_bike" "classic_bike" ...
## $ started_at : POSIXct[1:103770], format: "2022-01-13 11:59:47" "2022-01-10 08:41:56" ...
## $ ended_at : POSIXct[1:103770], format: "2022-01-13 12:02:44" "2022-01-10 08:46:17" ...
## $ start_station_name: chr [1:103770] "Glenwood Ave & Touhy Ave" "Glenwood Ave & Touhy Ave" "Sheffield Ave & Fullerton Ave" "Clark St & Bryn Mawr Ave" ...
## $ start_station_id : chr [1:103770] "525" "525" "TA1306000016" "KA1504000151" ...
## $ end_station_name : chr [1:103770] "Clark St & Touhy Ave" "Clark St & Touhy Ave" "Greenview Ave & Fullerton Ave" "Paulina St & Montrose Ave" ...
## $ end_station_id : chr [1:103770] "RP-007" "RP-007" "TA1307000001" "TA1309000021" ...
## $ start_lat : num [1:103770] 42 42 41.9 42 41.9 ...
## $ start_lng : num [1:103770] -87.7 -87.7 -87.7 -87.7 -87.6 ...
## $ end_lat : num [1:103770] 42 42 41.9 42 41.9 ...
## $ end_lng : num [1:103770] -87.7 -87.7 -87.7 -87.7 -87.6 ...
## $ member_casual : chr [1:103770] "casual" "casual" "member" "casual" ...
str(trip12_21)
## tibble [247,540 x 13] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:247540] "46F8167220E4431F" "73A77762838B32FD" "4CF42452054F59C5" "3278BA87BF698339" ...
## $ rideable_type : chr [1:247540] "electric_bike" "electric_bike" "electric_bike" "classic_bike" ...
## $ started_at : POSIXct[1:247540], format: "2021-12-07 15:06:07" "2021-12-11 03:43:29" ...
## $ ended_at : POSIXct[1:247540], format: "2021-12-07 15:13:42" "2021-12-11 04:10:23" ...
## $ start_station_name: chr [1:247540] "Laflin St & Cullerton St" "LaSalle Dr & Huron St" "Halsted St & North Branch St" "Halsted St & North Branch St" ...
## $ start_station_id : chr [1:247540] "13307" "KP1705001026" "KA1504000117" "KA1504000117" ...
## $ end_station_name : chr [1:247540] "Morgan St & Polk St" "Clarendon Ave & Leland Ave" "Broadway & Barry Ave" "LaSalle Dr & Huron St" ...
## $ end_station_id : chr [1:247540] "TA1307000130" "TA1307000119" "13137" "KP1705001026" ...
## $ start_lat : num [1:247540] 41.9 41.9 41.9 41.9 41.9 ...
## $ start_lng : num [1:247540] -87.7 -87.6 -87.6 -87.6 -87.7 ...
## $ end_lat : num [1:247540] 41.9 42 41.9 41.9 41.9 ...
## $ end_lng : num [1:247540] -87.7 -87.7 -87.6 -87.6 -87.6 ...
## $ member_casual : chr [1:247540] "member" "casual" "member" "member" ...
str(trip11_21)
## tibble [359,978 x 13] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:359978] "7C00A93E10556E47" "90854840DFD508BA" "0A7D10CDD144061C" "2F3BE33085BCFF02" ...
## $ rideable_type : chr [1:359978] "electric_bike" "electric_bike" "electric_bike" "electric_bike" ...
## $ started_at : POSIXct[1:359978], format: "2021-11-27 13:27:38" "2021-11-27 13:38:25" ...
## $ ended_at : POSIXct[1:359978], format: "2021-11-27 13:46:38" "2021-11-27 13:56:10" ...
## $ start_station_name: chr [1:359978] NA NA NA NA ...
## $ start_station_id : chr [1:359978] NA NA NA NA ...
## $ end_station_name : chr [1:359978] NA NA NA NA ...
## $ end_station_id : chr [1:359978] NA NA NA NA ...
## $ start_lat : num [1:359978] 41.9 42 42 41.9 41.9 ...
## $ start_lng : num [1:359978] -87.7 -87.7 -87.7 -87.8 -87.6 ...
## $ end_lat : num [1:359978] 42 41.9 42 41.9 41.9 ...
## $ end_lng : num [1:359978] -87.7 -87.7 -87.7 -87.8 -87.6 ...
## $ member_casual : chr [1:359978] "casual" "casual" "casual" "casual" ...
str(trip10_21)
## tibble [631,226 x 13] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:631226] "620BC6107255BF4C" "4471C70731AB2E45" "26CA69D43D15EE14" "362947F0437E1514" ...
## $ rideable_type : chr [1:631226] "electric_bike" "electric_bike" "electric_bike" "electric_bike" ...
## $ started_at : POSIXct[1:631226], format: "2021-10-22 12:46:42" "2021-10-21 09:12:37" ...
## $ ended_at : POSIXct[1:631226], format: "2021-10-22 12:49:50" "2021-10-21 09:14:14" ...
## $ start_station_name: chr [1:631226] "Kingsbury St & Kinzie St" NA NA NA ...
## $ start_station_id : chr [1:631226] "KA1503000043" NA NA NA ...
## $ end_station_name : chr [1:631226] NA NA NA NA ...
## $ end_station_id : chr [1:631226] NA NA NA NA ...
## $ start_lat : num [1:631226] 41.9 41.9 41.9 41.9 41.9 ...
## $ start_lng : num [1:631226] -87.6 -87.7 -87.7 -87.7 -87.7 ...
## $ end_lat : num [1:631226] 41.9 41.9 41.9 41.9 41.9 ...
## $ end_lng : num [1:631226] -87.6 -87.7 -87.7 -87.7 -87.7 ...
## $ member_casual : chr [1:631226] "member" "member" "member" "member" ...
str(trip09_21)
## tibble [756,147 x 13] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:756147] "9DC7B962304CBFD8" "F930E2C6872D6B32" "6EF72137900BB910" "78D1DE133B3DBF55" ...
## $ rideable_type : chr [1:756147] "electric_bike" "electric_bike" "electric_bike" "electric_bike" ...
## $ started_at : POSIXct[1:756147], format: "2021-09-28 16:07:10" "2021-09-28 14:24:51" ...
## $ ended_at : POSIXct[1:756147], format: "2021-09-28 16:09:54" "2021-09-28 14:40:05" ...
## $ start_station_name: chr [1:756147] NA NA NA NA ...
## $ start_station_id : chr [1:756147] NA NA NA NA ...
## $ end_station_name : chr [1:756147] NA NA NA NA ...
## $ end_station_id : chr [1:756147] NA NA NA NA ...
## $ start_lat : num [1:756147] 41.9 41.9 41.8 41.8 41.9 ...
## $ start_lng : num [1:756147] -87.7 -87.6 -87.7 -87.7 -87.7 ...
## $ end_lat : num [1:756147] 41.9 42 41.8 41.8 41.9 ...
## $ end_lng : num [1:756147] -87.7 -87.7 -87.7 -87.7 -87.7 ...
## $ member_casual : chr [1:756147] "casual" "casual" "casual" "casual" ...
str(trip08_21)
## tibble [804,352 x 13] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:804352] "99103BB87CC6C1BB" "EAFCCCFB0A3FC5A1" "9EF4F46C57AD234D" "5834D3208BFAF1DA" ...
## $ rideable_type : chr [1:804352] "electric_bike" "electric_bike" "electric_bike" "electric_bike" ...
## $ started_at : POSIXct[1:804352], format: "2021-08-10 17:15:49" "2021-08-10 17:23:14" ...
## $ ended_at : POSIXct[1:804352], format: "2021-08-10 17:22:44" "2021-08-10 17:39:24" ...
## $ start_station_name: chr [1:804352] NA NA NA NA ...
## $ start_station_id : chr [1:804352] NA NA NA NA ...
## $ end_station_name : chr [1:804352] NA NA NA NA ...
## $ end_station_id : chr [1:804352] NA NA NA NA ...
## $ start_lat : num [1:804352] 41.8 41.8 42 42 41.8 ...
## $ start_lng : num [1:804352] -87.7 -87.7 -87.7 -87.7 -87.6 ...
## $ end_lat : num [1:804352] 41.8 41.8 42 42 41.8 ...
## $ end_lng : num [1:804352] -87.7 -87.6 -87.7 -87.7 -87.6 ...
## $ member_casual : chr [1:804352] "member" "member" "member" "member" ...
str(trip07_21)
## tibble [822,410 x 13] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:822410] "0A1B623926EF4E16" "B2D5583A5A5E76EE" "6F264597DDBF427A" "379B58EAB20E8AA5" ...
## $ rideable_type : chr [1:822410] "docked_bike" "classic_bike" "classic_bike" "classic_bike" ...
## $ started_at : POSIXct[1:822410], format: "2021-07-02 14:44:36" "2021-07-07 16:57:42" ...
## $ ended_at : POSIXct[1:822410], format: "2021-07-02 15:19:58" "2021-07-07 17:16:09" ...
## $ start_station_name: chr [1:822410] "Michigan Ave & Washington St" "California Ave & Cortez St" "Wabash Ave & 16th St" "California Ave & Cortez St" ...
## $ start_station_id : chr [1:822410] "13001" "17660" "SL-012" "17660" ...
## $ end_station_name : chr [1:822410] "Halsted St & North Branch St" "Wood St & Hubbard St" "Rush St & Hubbard St" "Carpenter St & Huron St" ...
## $ end_station_id : chr [1:822410] "KA1504000117" "13432" "KA1503000044" "13196" ...
## $ start_lat : num [1:822410] 41.9 41.9 41.9 41.9 41.9 ...
## $ start_lng : num [1:822410] -87.6 -87.7 -87.6 -87.7 -87.7 ...
## $ end_lat : num [1:822410] 41.9 41.9 41.9 41.9 41.9 ...
## $ end_lng : num [1:822410] -87.6 -87.7 -87.6 -87.7 -87.7 ...
## $ member_casual : chr [1:822410] "casual" "casual" "member" "member" ...
str(trip06_21)
## tibble [729,595 x 13] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:729595] "99FEC93BA843FB20" "06048DCFC8520CAF" "9598066F68045DF2" "B03C0FE48C412214" ...
## $ rideable_type : chr [1:729595] "electric_bike" "electric_bike" "electric_bike" "electric_bike" ...
## $ started_at : POSIXct[1:729595], format: "2021-06-13 14:31:28" "2021-06-04 11:18:02" ...
## $ ended_at : POSIXct[1:729595], format: "2021-06-13 14:34:11" "2021-06-04 11:24:19" ...
## $ start_station_name: chr [1:729595] NA NA NA NA ...
## $ start_station_id : chr [1:729595] NA NA NA NA ...
## $ end_station_name : chr [1:729595] NA NA NA NA ...
## $ end_station_id : chr [1:729595] NA NA NA NA ...
## $ start_lat : num [1:729595] 41.8 41.8 41.8 41.8 41.8 ...
## $ start_lng : num [1:729595] -87.6 -87.6 -87.6 -87.6 -87.6 ...
## $ end_lat : num [1:729595] 41.8 41.8 41.8 41.8 41.8 ...
## $ end_lng : num [1:729595] -87.6 -87.6 -87.6 -87.6 -87.6 ...
## $ member_casual : chr [1:729595] "member" "member" "member" "member" ...
str(trip05_21)
## tibble [531,633 x 13] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:531633] "C809ED75D6160B2A" "DD59FDCE0ACACAF3" "0AB83CB88C43EFC2" "7881AC6D39110C60" ...
## $ rideable_type : chr [1:531633] "electric_bike" "electric_bike" "electric_bike" "electric_bike" ...
## $ started_at : POSIXct[1:531633], format: "2021-05-30 11:58:15" "2021-05-30 11:29:14" ...
## $ ended_at : POSIXct[1:531633], format: "2021-05-30 12:10:39" "2021-05-30 12:14:09" ...
## $ start_station_name: chr [1:531633] NA NA NA NA ...
## $ start_station_id : chr [1:531633] NA NA NA NA ...
## $ end_station_name : chr [1:531633] NA NA NA NA ...
## $ end_station_id : chr [1:531633] NA NA NA NA ...
## $ start_lat : num [1:531633] 41.9 41.9 41.9 41.9 41.9 ...
## $ start_lng : num [1:531633] -87.6 -87.6 -87.7 -87.7 -87.7 ...
## $ end_lat : num [1:531633] 41.9 41.8 41.9 41.9 41.9 ...
## $ end_lng : num [1:531633] -87.6 -87.6 -87.7 -87.7 -87.7 ...
## $ member_casual : chr [1:531633] "casual" "casual" "casual" "casual" ...
str(trip04_21)
## tibble [337,230 x 13] (S3: tbl_df/tbl/data.frame)
## $ ride_id : chr [1:337230] "6C992BD37A98A63F" "1E0145613A209000" "E498E15508A80BAD" "1887262AD101C604" ...
## $ rideable_type : chr [1:337230] "classic_bike" "docked_bike" "docked_bike" "classic_bike" ...
## $ started_at : POSIXct[1:337230], format: "2021-04-12 18:25:36" "2021-04-27 17:27:11" ...
## $ ended_at : POSIXct[1:337230], format: "2021-04-12 18:56:55" "2021-04-27 18:31:29" ...
## $ start_station_name: chr [1:337230] "State St & Pearson St" "Dorchester Ave & 49th St" "Loomis Blvd & 84th St" "Honore St & Division St" ...
## $ start_station_id : chr [1:337230] "TA1307000061" "KA1503000069" "20121" "TA1305000034" ...
## $ end_station_name : chr [1:337230] "Southport Ave & Waveland Ave" "Dorchester Ave & 49th St" "Loomis Blvd & 84th St" "Southport Ave & Waveland Ave" ...
## $ end_station_id : chr [1:337230] "13235" "KA1503000069" "20121" "13235" ...
## $ start_lat : num [1:337230] 41.9 41.8 41.7 41.9 41.7 ...
## $ start_lng : num [1:337230] -87.6 -87.6 -87.7 -87.7 -87.7 ...
## $ end_lat : num [1:337230] 41.9 41.8 41.7 41.9 41.7 ...
## $ end_lng : num [1:337230] -87.7 -87.6 -87.7 -87.7 -87.7 ...
## $ member_casual : chr [1:337230] "member" "casual" "casual" "member" ...
all_trips <- bind_rows(trip03_22, trip02_22, trip01_22, trip12_21, trip11_21, trip10_21, trip09_21, trip08_21, trip07_21, trip06_21, trip05_21, trip04_21)
colnames(all_trips)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual"
nrow(all_trips)
## [1] 5723532
dim(all_trips)
## [1] 5723532 13
head(all_trips)
## # A tibble: 6 x 13
## ride_id rideable_type started_at ended_at start_station_n~
## <chr> <chr> <dttm> <dttm> <chr>
## 1 47EC0A~ classic_bike 2022-03-21 13:45:01 2022-03-21 13:51:18 Wabash Ave & Wa~
## 2 849486~ electric_bike 2022-03-16 09:37:16 2022-03-16 09:43:34 Michigan Ave & ~
## 3 EFE527~ classic_bike 2022-03-23 19:52:02 2022-03-23 19:54:48 Broadway & Berw~
## 4 9F446F~ classic_bike 2022-03-01 19:12:26 2022-03-01 19:22:14 Wabash Ave & Wa~
## 5 431128~ classic_bike 2022-03-21 18:37:01 2022-03-21 19:19:11 DuSable Lake Sh~
## 6 9AA8A1~ classic_bike 2022-03-07 17:10:22 2022-03-07 17:15:04 Bissell St & Ar~
## # ... with 8 more variables: start_station_id <chr>, end_station_name <chr>,
## # end_station_id <chr>, start_lat <dbl>, start_lng <dbl>, end_lat <dbl>,
## # end_lng <dbl>, member_casual <chr>
summary(all_trips)
## ride_id rideable_type started_at
## Length:5723532 Length:5723532 Min. :2021-04-01 00:03:18
## Class :character Class :character 1st Qu.:2021-06-22 15:20:26
## Mode :character Mode :character Median :2021-08-17 18:25:49
## Mean :2021-08-26 22:25:18
## 3rd Qu.:2021-10-14 19:48:10
## Max. :2022-03-31 23:59:47
##
## ended_at start_station_name start_station_id
## Min. :2021-04-01 00:14:29 Length:5723532 Length:5723532
## 1st Qu.:2021-06-22 15:47:37 Class :character Class :character
## Median :2021-08-17 18:44:32 Mode :character Mode :character
## Mean :2021-08-26 22:46:50
## 3rd Qu.:2021-10-14 20:03:28
## Max. :2022-04-01 22:10:12
##
## end_station_name end_station_id start_lat start_lng
## Length:5723532 Length:5723532 Min. :41.64 Min. :-87.84
## Class :character Class :character 1st Qu.:41.88 1st Qu.:-87.66
## Mode :character Mode :character Median :41.90 Median :-87.64
## Mean :41.90 Mean :-87.65
## 3rd Qu.:41.93 3rd Qu.:-87.63
## Max. :45.64 Max. :-73.80
##
## end_lat end_lng member_casual
## Min. :41.39 Min. :-88.97 Length:5723532
## 1st Qu.:41.88 1st Qu.:-87.66 Class :character
## Median :41.90 Median :-87.64 Mode :character
## Mean :41.90 Mean :-87.65
## 3rd Qu.:41.93 3rd Qu.:-87.63
## Max. :42.17 Max. :-87.49
## NA's :4716 NA's :4716
all_trips$date <- as.Date(all_trips$started_at)
all_trips$year <- format(as.Date(all_trips$date), "%Y")
all_trips$month <- format(as.Date(all_trips$date), "%m")
all_trips$year_month <- paste(all_trips$year,"_",all_trips$month)
all_trips$day <- format(as.Date(all_trips$date), "%d")
all_trips$day_of_week <- format(as.Date(all_trips$date),"%A")
all_trips <- all_trips %>%
mutate(part_of_week = case_when(day_of_week == "Monday" ~ "Workday",
day_of_week == "Tuesday" ~ "Workday",
day_of_week == "Wednesday" ~ "Workday",
day_of_week == "Thursday" ~ "Workday",
day_of_week == "Friday" ~ "Workday",
day_of_week == "Saturday" ~ "Weekend",
day_of_week == "Sunday" ~ "Weekend"))
all_trips$day_hour <- format(as.POSIXct(all_trips$started_at), format = "%H")
all_trips$ride_length_s <- difftime(all_trips$ended_at, all_trips$started_at)
all_trips$ride_length_s <- as.numeric(as.character(all_trips$ride_length_s))
is.numeric(all_trips$ride_length_s)
## [1] TRUE
all_trips <- all_trips %>%
mutate(ride_length_m = ride_length_s/60)
all_trips$ride_length_m <- round(all_trips$ride_length_m, digits = 0)
all_trips$ride_number <- rep(c(1),5723532)
is.numeric(all_trips$ride_number)
## [1] TRUE
View(all_trips)
all_trips2 <- all_trips[!(all_trips$ride_length_s < 0),]
nrow(all_trips2)-nrow(all_trips)
## [1] -145
all_trips2$ride_id[duplicated(all_trips2$ride_id)]
## character(0)
summary(all_trips2$ride_length_m)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 7.00 12.00 21.54 21.00 55944.00
mean(all_trips2$ride_length_m)
## [1] 21.54192
median(all_trips2$ride_length_m)
## [1] 12
min(all_trips2$ride_length_m)
## [1] 0
max(all_trips2$ride_length_m)
## [1] 55944
aggregate(all_trips2$ride_length_m ~ all_trips2$member_casual, FUN=mean)
## all_trips2$member_casual all_trips2$ride_length_m
## 1 casual 31.73913
## 2 member 13.36824
aggregate(all_trips2$ride_length_m ~ all_trips2$member_casual, FUN=median)
## all_trips2$member_casual all_trips2$ride_length_m
## 1 casual 16
## 2 member 9
aggregate(all_trips2$ride_length_m ~ all_trips2$member_casual, FUN=min)
## all_trips2$member_casual all_trips2$ride_length_m
## 1 casual 0
## 2 member 0
aggregate(all_trips2$ride_length_m ~ all_trips2$member_casual, FUN=max)
## all_trips2$member_casual all_trips2$ride_length_m
## 1 casual 55944
## 2 member 1560
all_trips2$day_of_week <- ordered(all_trips2$day_of_week, levels=c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
aggregate(all_trips2$ride_length_m~all_trips2$member_casual+all_trips2$day_of_week, FUN = mean)
## all_trips2$member_casual all_trips2$day_of_week all_trips2$ride_length_m
## 1 casual Monday 31.48139
## 2 member Monday 12.96697
## 3 casual Tuesday 27.43349
## 4 member Tuesday 12.52101
## 5 casual Wednesday 27.76503
## 6 member Wednesday 12.58763
## 7 casual Thursday 27.87952
## 8 member Thursday 12.56968
## 9 casual Friday 30.10219
## 10 member Friday 13.13726
## 11 casual Saturday 34.27940
## 12 member Saturday 14.99226
## 13 casual Sunday 37.40623
## 14 member Sunday 15.34271
all_trips2 %>%
group_by(member_casual) %>%
summarise(number_of_ridetrips = n())
## # A tibble: 2 x 2
## member_casual number_of_ridetrips
## <chr> <int>
## 1 casual 2546482
## 2 member 3176905
all_trips2 %>%
group_by(member_casual, rideable_type) %>%
summarise(number_of_ridetrips = n())
## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.
## # A tibble: 5 x 3
## # Groups: member_casual [2]
## member_casual rideable_type number_of_ridetrips
## <chr> <chr> <int>
## 1 casual classic_bike 1257615
## 2 casual docked_bike 303984
## 3 casual electric_bike 984883
## 4 member classic_bike 1992992
## 5 member electric_bike 1183913
all_trips2 %>%
group_by(member_casual, day_of_week) %>%
summarise(number_of_ridetrips = n())
## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.
## # A tibble: 14 x 3
## # Groups: member_casual [2]
## member_casual day_of_week number_of_ridetrips
## <chr> <ord> <int>
## 1 casual Monday 292993
## 2 casual Tuesday 276371
## 3 casual Wednesday 286400
## 4 casual Thursday 293632
## 5 casual Friday 364277
## 6 casual Saturday 550008
## 7 casual Sunday 482801
## 8 member Monday 439428
## 9 member Tuesday 490095
## 10 member Wednesday 499901
## 11 member Thursday 475330
## 12 member Friday 453108
## 13 member Saturday 431326
## 14 member Sunday 387717
## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'part_of_week', 'member_casual'. You can
## override using the `.groups` argument.
## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.
all_trips3 <- select(all_trips2, -c(started_at, ended_at, start_station_id, end_station_id, ride_length_s, end_lat, end_lng, date, ride_id))
write.csv(all_trips3,"C:/Users/kchud/NewOne/PORTFOLIO/Cyclistic_Bikeshare/data/excel/Cyclistic_new.csv", row.names = TRUE)
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.