已部署到shinyapps.io,詳見SHMetrocss
## encoding options(encoding = "UTF-8") ## for chines ## use getOption("encoding") to see if things were changed loc <- function(os, language = "english") { switch(language, english = ifelse(os == "Windows", "English_United States.1252", "en_US.UTF-8"), chinese = ifelse(os == "Windows", "Chinese", "zh_CN.utf-8")) } ## set locale Sys.setlocale(category = "LC_ALL", loc(Sys.info()[["sysname"]], "chinese")) ##加載工做環境和所需包 setwd("/Users/jeevanyue/Rproject/map/SHMetro") library(data.table) library(bit64) library(dplyr) library(tidyr) library(scales) library(lubridate) #日期處理包 library(shiny) library(leaflet) library(lattice) library(plotly) library(chorddiag) #繪製chord
#地鐵站進站數據和出站數據 shmetro_in <- fread("data/shmetro_in.csv",encoding="UTF-8") shmetro_out <- fread("data/shmetro_out.csv",encoding="UTF-8") ## 進出地鐵站數據 shmetro_line_in_out <- fread("data/shmetro_line_in_out.csv",encoding="UTF-8") ## 進出地鐵站關聯 in_out <- shmetro_line_in_out %>% spread(line_out,count) in_out[is.na(in_out)]<-0 ## 地鐵站經緯度 stations <- fread("data/stations.csv",encoding="UTF-8") stations <- stations %>% select(c(1:5)) %>% arrange(line,line_id) stations_no <- nrow(stations) for (i in 1:stations_no) { s <- stations$station[i] stations$lines[i] <- paste(stations[stations$station==s,]$line,sep="",collapse="/") }
stations <- fread("data/stations.csv",encoding="UTF-8") stations <- stations %>% select(c(1:5)) %>% arrange(line,line_id) stations_no <- nrow(stations) for (i in 1:stations_no) { s <- stations$station[i] stations$lines[i] <- paste(stations[stations$station==s,]$line,sep="",collapse="/") } invisible(gc())
交通卡的交易信息有7個字段,分別是:卡號、交易日期、交易時間、站點名稱、行業名稱、交易金額、交易性質。html
卡號:交通卡卡號react
交易日期:日期格式yyyy-mm-ddexpress
交易時間:時間個是hh:mm:sswindows
站點名稱:內容包括線路和站名,如:"1號線莘莊"session
行業名稱:都是"地鐵"app
交易金額:0和大於0的值,0表示進站,大於0的值表示出戰dom
交易性質:"優惠"和"非優惠"ide
#mac下用以下讀取 system.time(trade <- read.csv("/Users/jeevanyue/Desktop/SPTCC-20150401.csv",header = F,sep=",", fileEncoding = "GB2312")) #windows下用以下讀取 #system.time(trade <- fread("SPTCC-20150401/SPTCC-20150401.csv",integer64='character',stringsAsFactors=F)) #trade <- read.csv('data/SPTCC-20150401_Sample.txt',header=T,encoding='UTF-8',stringsAsFactors = F) #重命名 names(trade) <- c('card_id','date','time','station','vehicle','money','property') #篩選地鐵數據 trade_metro <- trade %>% filter(vehicle=='地鐵') rm(trade) invisible(gc()) #將"station"(原含義爲線路和站名),分爲"line"和"station" trade_metro <- trade_metro %>% separate(station, c('line', 'station'), sep = '號線') invisible(gc()) #按五分鐘統計時間,向上取整 trade_metro <- trade_metro %>% mutate(M5=ceiling(period_to_seconds(hms(time))/300)) invisible(gc()) #刪除不須要的字段 trade_metro <- trade_metro %>% select(-vehicle,-property,-date) invisible(gc()) #時間格式 #trade_metro$time <- strptime(paste("2015-04-01", trade_metro$time, sep=' '), "%Y-%m-%d %H:%M:%S", tz = "GMT")
## 對與stations地鐵站名不一致的trade數據進行處理 trade_metro[trade_metro$station=="淞浜路",]$station <- "淞濱路" trade_metro[trade_metro$station=="大木橋路 ",]$station <- "大木橋路" trade_metro[trade_metro$station=="上海大學站",]$station <- "上海大學"
## 進站數據 trade_metro_in <- trade_metro %>% filter(money==0) %>% select(card_id,"time_in"=time,"line_in"=line,"station_in"=station,"M5_in"=M5) ## 出站數據 trade_metro_out <- trade_metro %>% filter(money>0)%>% select(card_id,"time_out"=time,"line_out"=line,"station_out"=station,money,"M5_out"=M5)
上海火車站爲虛擬換乘,刪除半小時內3/4換1和1換3/4的數據函數
3/4換1的數據
## 3/4換1的數據,統計發如今上海火車站3/4號線出站以3號線名義出站 trade_metro_out_34 <- trade_metro_out %>% filter(station_out=='上海火車站') %>% filter(line_out==3 | line_out==4) trade_metro_in_1 <- trade_metro_in %>% filter(station_in=='上海火車站', line_in==1) ## merge出站和進站的數據 trade_metro_out34_in1 <- merge(trade_metro_out_34,trade_metro_in_1,all.x=T) %>% mutate(duration=period_to_seconds(hms(time_in)) - period_to_seconds(hms(time_out))) %>% filter(duration>0,duration<=60*30) ### 數據大了什麼樣的數據都有,發現有幾我的在3/4號線出站後,半小時內在1號線進站屢次 #trade_metro_out34_in1 <- na.omit(trade_metro_out34_in1) ## 根據卡號和進站時間,查詢最近的進站時間,做爲本次進站時間 trade_metro_out34_in1 <- data.table(trade_metro_out34_in1) trade_metro_out34_in1[, duration_min := min(duration), by=list(card_id, M5_in)] trade_metro_out34_in1 <- trade_metro_out34_in1 %>% filter(duration==duration_min) %>% select(-duration_min) ## 統計發現絕大部分人在10分鐘內完成換乘 #histogram(ceiling(trade_metro_out34_in1$duration/60)) ## rbind出站數據 trade_metro_out <- rbind(trade_metro_out, trade_metro_out34_in1[,c(1:6)]) ## 刪除所有重複的出站數據 trade_metro_out <- trade_metro_out[!(duplicated(trade_metro_out) | duplicated(trade_metro_out, fromLast = TRUE)), ] ## rbind進站數據 trade_metro_in <- rbind(trade_metro_in, trade_metro_out34_in1[,c(1,7:10)]) ## 刪除所有重複的進站數據 trade_metro_in <- trade_metro_in[!(duplicated(trade_metro_in) | duplicated(trade_metro_in, fromLast = TRUE)), ]
1換3/4的數據
## 1換3/4的數據,統計發如今3/4號線上海火車站以3號線名義進站 trade_metro_out_1 <- trade_metro_out %>% filter(station_out=='上海火車站',line_out==1) trade_metro_in_34 <- trade_metro_in %>% filter(station_in=='上海火車站') %>% filter(line_in==3 | line_in==4) ## merge出站和進站的數據 trade_metro_out1_in34 <- merge(trade_metro_out_1,trade_metro_in_34,all.x=T, all.y=F) %>% mutate(duration=period_to_seconds(hms(time_in)) - period_to_seconds(hms(time_out))) %>% filter(duration>0,duration<=60*30) #trade_metro_out1_in34 <- na.omit(trade_metro_out1_in34) ## 根據卡號和進站時間,查詢最近的進站時間,做爲本次進站時間 trade_metro_out1_in34 <- data.table(trade_metro_out1_in34) trade_metro_out1_in34[, duration_min := min(duration), by=list(card_id, M5_in)] trade_metro_out1_in34 <- trade_metro_out1_in34 %>% filter(duration==duration_min) %>% select(-duration_min) ## 統計發現絕大部分人在10分鐘內完成換乘 #histogram(ceiling(trade_metro_out1_in34$duration/60)) ## rbind出站數據 trade_metro_out <- rbind(trade_metro_out, trade_metro_out1_in34[,c(1:6)]) ## 刪除所有重複的出站數據 trade_metro_out <- trade_metro_out[!(duplicated(trade_metro_out) | duplicated(trade_metro_out, fromLast = TRUE)), ] ## rbind進站數據 trade_metro_in <- rbind(trade_metro_in, trade_metro_out1_in34[,c(1,7:10)]) ## 刪除所有重複的進站數據 trade_metro_in <- trade_metro_in[!(duplicated(trade_metro_in) | duplicated(trade_metro_in, fromLast = TRUE)), ]
根據消費金額爲0,每5分鐘統計每站地鐵的進站人數
trade_metro_in_station <- trade_metro_in %>% group_by(station_in, M5_in) %>% summarise(count=n()) %>% select(station=station_in, M5=M5_in, count) #trade_metro_in_station <- na.omit(trade_metro_in_station) invisible(gc()) ## 合併地鐵站座標 shmetro_in <- merge(trade_metro_in_station,stations,all.x=T, all.y=F) #rm(trade_metro_in_station) ## 查看未匹配到的地鐵站 #l <- shmetro_in[is.na(shmetro_in$gps_lat),] #unique(l$station) #trade_metro_in_station[trade_metro_in_station$station=="淞浜路",]$station <- "淞濱路" #trade_metro_in_station[trade_metro_in_station$station=="大木橋路 ",]$station <- "大木橋路" #trade_metro_in_station[trade_metro_in_station$station=="上海大學站",]$station <- "上海大學" #stations[grepl("淞濱路", stations$station),]$station #trade_metro_in_station[grepl("淞浜路", trade_metro_in_station$station),]$station <- "淞濱路" #shmetro_in <- na.omit(shmetro_in) invisible(gc()) #write.csv(shmetro_in,"shmetro_in.csv",row.names = F,fileEncoding="UTF-8")
根據消費金額大雨0,每5分鐘統計每站地鐵的出站人數
trade_metro_out_station <- trade_metro_out %>% group_by(station_out, M5_out) %>% summarise(count=n()) %>% select(station=station_out, M5=M5_out, count) #trade_metro_out_station <- na.omit(trade_metro_out_station) invisible(gc()) ## 合併地鐵站座標 shmetro_out <- merge(trade_metro_out_station,stations,all.x=T, all.y=F) #rm(trade_metro_out_station) #shmetro_out <- na.omit(shmetro_out) invisible(gc()) #write.csv(shmetro_out,"shmetro_out.csv",row.names = F,fileEncoding="UTF-8")
## merge進站和出站數據,並計算乘坐時間 trade_metro_in_out <- merge(trade_metro_in, trade_metro_out, all.x=T, all.y=F) %>% mutate(duration=period_to_seconds(hms(time_out)) - period_to_seconds(hms(time_in)), duration_M5=M5_out-M5_in) %>% filter(duration>0) #根據卡號和進站時間,查詢最近出站的時間,做爲本次出站時間 trade_metro_in_out <- data.table(trade_metro_in_out) trade_metro_in_out[, duration_min := min(duration), by=list(card_id, M5_in)] trade_metro_in_out <- trade_metro_in_out %>% filter(duration==duration_min) %>% select(-duration_min) #統計進站線路A->出站線路B的筆數 shmetro_line_in_out <- trade_metro_in_out %>% group_by(line_in,line_out) %>% summarise(count=n()) shmetro_line_in_out$line_in <- as.numeric(shmetro_line_in_out$line_in) shmetro_line_in_out$line_out <- as.numeric(shmetro_line_in_out$line_out) #排序 shmetro_line_in_out <- arrange(shmetro_line_in_out,line_in,line_out) #將出站線路數據轉換爲屬性字段 in_out <- shmetro_line_in_out %>% spread(line_out,count) in_out[is.na(in_out)]<-0 #write.csv(shmetro_line_in_out,"shmetro_line_in_out.csv",row.names = F,fileEncoding="UTF-8")
#地鐵顏色 lines_color <- data.frame("line"=c(1:13,16),"color"=c("#ED3229","#36B854","#FFD823","#320176","#823094","#CF047A","#F3560F","#008CC1","#91C5DB","#C7AFD3","#8C2222","#007a61","#ec91cc","#32D2CA")) pal <- colorFactor(as.character(lines_color$color), domain = stations$line) #輔助函數繪製線路 draw_line_add <- function(l_no,line_s_id=NULL){ line_color <- lines_color[lines_color$line==l_no,]$color line_data <- stations[stations$line==l_no,] if(is.null(line_s_id)){ draw_lines <- Shanghai %>% addPolylines(lat=line_data$gps_lat,lng=line_data$gps_lon,color=line_color,weight=2) }else{ draw_lines <- Shanghai %>% addPolylines(lat=line_data$gps_lat[line_s_id],lng=line_data$gps_lon[line_s_id],color=line_color,weight=2) } return(draw_lines) } ## 上海線路地圖 Shanghai <- leaflet() %>% setView(lng = 121.60, lat = 31.20, zoom = 10) %>% addProviderTiles("CartoDB.Positron") %>% addLegend(position = "bottomleft",pal=pal,values = stations$line) for(l in unique(stations$line)){ line_length <- nrow(stations[stations$line==l,]) if(l==4){ #因爲4號線爲環線,需將首尾相連 Shanghai <- draw_line_add(l_no=l) Shanghai <- draw_line_add(l_no=l,line_s_id=c(1,line_length)) }else if(l==10){ #因爲10號線在龍溪路站之後分爲兩條線路,需分兩端繪製 Shanghai <- draw_line_add(l_no=l,line_s_id=c(1:(line_length-3))) Shanghai <- draw_line_add(l_no=l,line_s_id=c(24,(line_length-2):line_length)) }else if(l==11){ #因爲11號線在嘉定新城站之後分爲兩條線路,需分兩端繪製 Shanghai <- draw_line_add(l_no=l,line_s_id=c(1:(line_length-7))) Shanghai <- draw_line_add(l_no=l,line_s_id=c(28,(line_length-6):line_length)) }else{ Shanghai <- draw_line_add(l_no=l) } }
##繪製chord圖 metro_chord <- data.matrix(as.data.frame(in_out)[,c(2:15)]) haircolors <- in_out$line_in dimnames(metro_chord) <- list(have = haircolors, prefer = colnames(metro_chord)) groupColors <- c("#ED3229","#36B854","#FFD823","#320176","#823094","#CF047A","#F3560F","#008CC1","#91C5DB","#C7AFD3","#8C2222","#007a61","#ec91cc","#32D2CA") #chorddiag(metro_chord, groupColors = groupColors, margin=50, showTicks=F, groupnamePadding = 5)
b <- list(x = 0, y = 1,bgcolor = "#00FFFFFF") yax <- list( title = "", zeroline = FALSE, showline = FALSE, showticklabels = FALSE, showgrid = FALSE ) xax <- list( title = "", titlefont = list(size = 8), tickangle = -20, color = "black" )
ui <- shinyUI(navbarPage("SHMetro", tabPanel("進站流量", div(class="outer", #tags$style(type = "text/css", "html, body {width:100%;height:100%}"), tags$style(type = "text/css", ".outer {position: fixed; top: 41px; left: 0; right: 0; bottom: 0; overflow: hidden; padding: 0}"), leafletOutput("map", width = "100%", height = "100%"), absolutePanel(top = 10, right = 10, h4(textOutput("output_slider_time")), sliderInput("slider_time", "Time:", #min=as.POSIXct(min(filter(shmetro_in, M5>30)$M5)*5*60, origin = "2015-04-01", tz = "GMT"), #max=as.POSIXct(max(shmetro_in$M5)*5*60, origin = "2015-04-01", tz = "GMT"), #value=as.POSIXct(min(shmetro_in$M5)*5*60, origin = "2015-04-01", tz = "GMT"), min = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"), max = as.POSIXct(24*60*60, origin = "2015-04-01", tz = "GMT"), value = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"), step = 60*5, timeFormat = "%T", timezone = "GMT"), selectInput("select_line", "Line", c("All",lines_color$line)), h4("TOP 5"), plotlyOutput("in_top5",height = 200), checkboxInput("legend", "Show legend", TRUE) ) ) ), tabPanel("出站流量", div(class="outer", #tags$style(type = "text/css", "html, body {width:100%;height:100%}"), tags$style(type = "text/css", ".outer {position: fixed; top: 41px; left: 0; right: 0; bottom: 0; overflow: hidden; padding: 0}"), leafletOutput("map_out", width = "100%", height = "100%"), absolutePanel(top = 10, right = 10, h4(textOutput("output_slider_time_out")), sliderInput("slider_time_out", "Time:", #min=as.POSIXct(min(filter(shmetro_in, M5>30)$M5)*5*60, origin = "1960-01-01", tz = "GMT"), #max=as.POSIXct(max(shmetro_in$M5)*5*60, origin = "1960-01-01", tz = "GMT"), #value=as.POSIXct(min(shmetro_in$M5)*5*60, origin = "1960-01-01", tz = "GMT"), min = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"), max = as.POSIXct(24*60*60, origin = "2015-04-01", tz = "GMT"), value = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"), step = 60*5, timeFormat = "%T", timezone = "GMT"), selectInput("select_line_out", "Line", c("All",lines_color$line)), h4("TOP 5"), plotlyOutput("out_top5",height = 200), checkboxInput("legend_out", "Show legend", TRUE) ) ) ), tabPanel("線路關聯", div(class="outer", #tags$style(type = "text/css", "html, body {width:100%;height:100%}"), tags$style(type = "text/css", ".outer {position: fixed; top: 41px; left: 0; right: 0; bottom: 0; overflow: hidden; padding: 0}"), chorddiagOutput("line_chord", width = "100%",height="100%") ) ) ) ) server <- shinyServer(function(input, output, session) { ## 進站流量統計 # Reactive expression for the data subsetted to what the user selected filteredData <- reactive({ if(input$select_line=="All"){ shmetro_in %>% filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time,"%H:%M:%S")))/300)) }else{ shmetro_in %>% filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time,"%H:%M:%S")))/300),line==as.numeric(input$select_line)) } }) stations_in_top5 <- reactive({ filteredData() %>% group_by(station) %>% summarise(count=sum(count),line=min(line)) %>% arrange(desc(count)) %>% head(5) %>% as.data.frame() }) ## time output$output_slider_time <- renderText({ paste0("Time: ", format(input$slider_time,"%H:%M:%S")) }) output$map <- renderLeaflet({ Shanghai %>% addCircles(stations$gps_lon, stations$gps_lat,color = pal(stations$line), radius=1,popup = paste(stations$station,stations$lines),fillOpacity = 1,stroke = FALSE) %>% clearMarkerClusters() %>% clearMarkers() }) observe({ data_in_circle <- data.table(filteredData())[, count := sum(count), by=list(station, M5)] %>% arrange(count) leafletProxy("map", data = data_in_circle) %>% clearMarkerClusters() %>% clearMarkers() %>% addCircleMarkers(data_in_circle$gps_lon,data_in_circle$gps_lat, color = pal(data_in_circle$line), fillOpacity = 0.5,stroke = FALSE, popup=paste(data_in_circle$station,data_in_circle$line,data_in_circle$count,sep=","), radius=(data_in_circle$count)^(1/2.5)) }) # top5 output$in_top5 <- renderPlotly({ # If no stations_in_top5 are in view, don't plot if (nrow(stations_in_top5()) == 0) return(NULL) plot_ly(stations_in_top5(), x = stations_in_top5()$station, y = stations_in_top5()$count, type = "bar", marker = list(color = pal(stations_in_top5()$line)), bgcolor = "#00FFFFFF") %>% layout(showlegend=FALSE, yaxis=yax,xaxis=xax,plot_bgcolor='#00FFFFFF', paper_bgcolor='#00FFFFFF') }) # Use a separate observer to recreate the legend as needed. observe({ proxy <- leafletProxy("map") # Remove any existing legend, and only if the legend is # enabled, create a new one. proxy %>% clearControls() if (input$legend) { proxy %>% addLegend(position = "bottomleft",pal=pal,values = stations$line) } }) ## 出站流量統計 # Reactive expression for the data subsetted to what the user selected filteredData_out <- reactive({ if(input$select_line_out=="All"){ shmetro_out %>% filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time_out,"%H:%M:%S")))/300)) }else{ shmetro_in %>% filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time_out,"%H:%M:%S")))/300),line==as.numeric(input$select_line_out)) } }) stations_out_top5 <- reactive({ filteredData_out() %>% group_by(station) %>% summarise(count=sum(count),line=min(line)) %>% arrange(desc(count)) %>% head(5) %>% as.data.frame() }) ## time output$output_slider_time_out <- renderText({ paste0("Time: ", format(input$slider_time_out,"%H:%M:%S")) }) output$map_out <- renderLeaflet({ Shanghai %>% addCircles(stations$gps_lon, stations$gps_lat,color = pal(stations$line), radius=1,popup = paste(stations$station,stations$lines),fillOpacity = 1,stroke = FALSE) %>% clearMarkerClusters() %>% clearMarkers() }) observe({ data_out_circle <- data.table(filteredData_out())[, count := sum(count), by=list(station, M5)] %>% arrange(count) leafletProxy("map_out", data = filteredData_out()) %>% clearMarkerClusters() %>% clearMarkers() %>% addCircleMarkers(data_out_circle$gps_lon, data_out_circle$gps_lat, color = pal(data_out_circle$line),fillOpacity = 0.5,stroke = FALSE, popup=paste(data_out_circle$station,data_out_circle$line,data_out_circle$count,sep=","), radius=(data_out_circle$count)^(1/2.5)) }) # top5 output$out_top5 <- renderPlotly({ # If no stations_in_top5 are in view, don't plot if (nrow(stations_out_top5()) == 0) return(NULL) plot_ly(stations_out_top5(), x = stations_out_top5()$station, y = stations_out_top5()$count, type = "bar", marker = list(color = pal(stations_out_top5()$line)), bgcolor = "#00FFFFFF") %>% layout(showlegend=FALSE, yaxis=yax,xaxis=xax,plot_bgcolor='#00FFFFFF', paper_bgcolor='#00FFFFFF') }) # Use a separate observer to recreate the legend as needed. observe({ proxy <- leafletProxy("map_out") # Remove any existing legend, and only if the legend is # enabled, create a new one. proxy %>% clearControls() if (input$legend_out) { proxy %>% addLegend(position = "bottomleft",pal=pal,values = stations$line) } }) ## 線路關聯 output$line_chord <- renderChorddiag({ chorddiag(metro_chord, groupColors = groupColors, showTicks=F, groupnamePadding = 5) }) })
shinyApp(ui = ui,server = server)
進站流量
出站流量
進出地鐵