问题描述
在此处上传 Divvy 数据集(csv 文件)
Oct_2020_tripdata <- read_csv("Oct 2020.csv")
Nov_2020_tripdata <- read_csv("Nov 2020.csv")
Dec_2020_tripdata <- read_csv("Dec 2020.csv")
Jan_2021_tripdata <- read_csv("Jan 2021.csv")
Feb_2021_tripdata <- read_csv("Feb 2021.csv")
Mar_2021_tripdata <- read_csv("Mar 2021.csv")
将相关列转换为字符,以便它们可以正确堆叠
Oct_2020_tripdata <- mutate(Oct_2020_tripdata, start_station_id = as.character(start_station_id)
,end_station_id = as.character(end_station_id))
Nov_2020_tripdata <- mutate(Nov_2020_tripdata, start_station_id = as.character(start_station_id)
,end_station_id = as.character(end_station_id))
Dec_2020_tripdata <- mutate(Dec_2020_tripdata, start_station_id = as.character(start_station_id)
,end_station_id = as.character(end_station_id))
Jan_2021_tripdata <- mutate(Jan_2021_tripdata, start_station_id = as.character(start_station_id)
,end_station_id = as.character(end_station_id))
Feb_2021_tripdata <- mutate(Feb_2021_tripdata, start_station_id = as.character(start_station_id)
,end_station_id = as.character(end_station_id))
Mar_2021_tripdata <- mutate(Mar_2021_tripdata, start_station_id = as.character(start_station_id)
,end_station_id = as.character(end_station_id))
绑定数据框
all_trips <- bind_rows(Oct_2020_tripdata, Nov_2020_tripdata, Dec_2020_tripdata,
Jan_2021_tripdata, Feb_2021_tripdata, Mar_2021_tripdata)
删除缺少值的行
colSums(is.na(all_trips))
all_trips_cleaned <- all_trips[complete.cases(all_trips), ]
过滤大于ended_at 的started_at 数据
all_trips_cleaned <- all_trips_cleaned %>%
filter(all_trips_cleaned$started_at < all_trips_cleaned$ended_at)
创建新列以列出每次骑行的日期、月份、日期和年份
all_trips_cleaned$date <- as.Date(all_trips_cleaned$started_at, format= "%m/%d/%Y")
all_trips_cleaned$month <- format(as.Date(all_trips_cleaned$date), "%m")
all_trips_cleaned$day <- format(as.Date(all_trips_cleaned$date), "%d")
all_trips_cleaned$year <- format(as.Date(all_trips_cleaned$date), "%Y")
all_trips_cleaned$day_of_week <- format(as.Date(all_trips_cleaned$date), "%A")
尝试添加一个新列以使用 R 将每个骑行长度(以秒为单位)计算为数字
all_trips_cleaned$ride_length <- as.numeric(difftime(all_trips_cleaned$ended_at, all_trips_cleaned$started_at))
这是我收到的错误消息:
Error in as.POSIXlt.character(x, tz, ...) :
character string is not in a standard unambiguous format
标签: rdifftimeposixlt