-
Notifications
You must be signed in to change notification settings - Fork 1
/
00_schedule.R
143 lines (125 loc) · 4.87 KB
/
00_schedule.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
library(dplyr)
library(tidyr)
library(purrr)
library(rvest)
library(stringr)
rm(list = ls())
# get html
url <- "https://moovitapp.com/index/en/public_transit-lines-Jakarta-2044-851786"
h <- read_html(url)
# forming a list url table
link <- h %>%
html_elements("div.lines-container") %>%
html_elements("li.line-item") %>%
html_elements("a") %>%
html_attr("href")
route <- h %>%
html_elements("div.lines-container") %>%
html_elements("li.line-item") %>%
html_elements("strong.line-title") %>%
html_text()
contra <- gsub("\\d{1}$", "1", link)
route_name <- gsub("^.+line-(\\w+)-Jakarta.+", "\\1", link)
sch_url <- bind_cols(route_name, route, link, contra)
names(sch_url) <- c("name", "route", "url_direction", "url_contra")
# get alert
# get_alert <- function(x){
# x %>%
# read_html() %>%
# html_elements("label.alert-header") %>%
# html_text() %>%
# str_squish()
# }
#
# # caution: takes time!
# alert_direction <- map(sch_url$url_direction, get_alert)
# alert_direction <- map_chr(alert_direction, length)
# alert_direction <- ifelse(alert_direction > 0, "No Service", "Operation")
#
# # caution: takes time!
# alert_contra <- map(sch_url$url_contra, get_alert)
# alert_contra <- map_chr(alert_contra, length)
# alert_contra <- ifelse(alert_contra > 0, "No Service", "Operation")
#
# # add alert column
# sch_url <- bind_cols(sch_url, alert_direction, alert_contra) %>%
# rename("alert_direction" = "...5",
# "alert_contra" = "...6") %>%
# mutate(load_date = Sys.Date())
#
# # save url list
# saveRDS(sch_url, "data/schedule_url.rds")
# sch_url <- readRDS("data/schedule_url.rds")
# function for pull schedules
get_schedule <- function(url_char) {
read_html(url_char) %>% html_table()
}
# function for time transformation
trans_schedule <- function(t) {
t %>%
rename("day" = "Day", "oh" = "Operating Hours") %>%
mutate(oh = ifelse(oh == "Not Operational", NA, oh)) %>%
separate(col = "oh", into = c("start_time", "end_time"), sep = " - ") %>%
mutate(start_time = ifelse(str_detect(start_time, "AM"),
str_remove(start_time, "\\s?AM"),
paste(as.numeric(gsub(pattern = "^(\\d{1,2})\\:\\d{2}\\sPM$",
replacement = "\\1",
x = .$start_time)) + 12,
gsub(pattern = "^\\d{1,2}:(\\d{2})\\sPM$",
replacement = "\\1",
x = .$start_time),
sep = ":")),
start_time = ifelse(!is.na(start_time), paste(start_time, "00", sep = ":"), start_time),
end_time = ifelse(str_detect(end_time, "AM"),
str_remove(end_time, "\\s?AM"),
paste(as.numeric(gsub(pattern = "^(\\d{1,2})\\:\\d{2}\\sPM$",
replacement = "\\1",
x = .$end_time)) + 12,
gsub(pattern = "^\\d{1,2}:(\\d{2})\\sPM$",
replacement = "\\1",
x = .$end_time),
sep = ":")),
end_time = ifelse(!is.na(end_time), paste(end_time, "00", sep = ":"), end_time))
}
# gather schedule tables (crawling) will takes time
sch_list <- sch_url %>%
pivot_longer(cols = 3:4, names_to = "direction", values_to = "url") %>%
select(-matches("alert_")) %>%
mutate(schedule = map(url, get_schedule))
sc <- sch_list %>%
select(name, route, direction, schedule) %>%
mutate(direction = ifelse(direction == "url_contra", 1, 0),
load_date = Sys.Date()) %>%
unnest(schedule) %>%
distinct()
# time transformation
sc <- sc %>%
select(name, route, direction, schedule, load_date) %>%
unnest(schedule) %>%
trans_schedule()
# rename schedule for suitability with gtfs headers
names(sc) <- c("route_id", "trip", "direction_id", "day",
"start_time", "end_time", "load_date")
# detect non directional trip names
ndtrip <- sc %>%
select(route_id, trip) %>%
mutate(strip = str_detect(trip, "-")) %>%
filter(strip == FALSE) %>%
.$route_id %>%
unique()
# add headsign in schedule data
sc <- sc %>%
mutate(trip = ifelse(route_id %in% ndtrip,
paste(trip, trip, sep = "-"),
trip)) %>%
separate(col = "trip", into = c("trip_0", "trip_1"),
sep = "-") %>%
mutate(trip_0 = str_trim(trip_0),
trip_1 = str_trim(trip_1)) %>%
mutate(trip_headsign = ifelse(direction_id == 1,
paste(trip_1, trip_0, sep = " - "),
paste(trip_0, trip_1, sep = " - ")),
.after = route_id) %>%
select(-trip_0, -trip_1)
# save data
saveRDS(sc, "data/tj_schedule.rds")