In-class_Ex5

Author

Low Ji Xiong

Overview

Hands-on experience on the following tasks: - performing geocoding using data downloaded from data.gov.sg - calibrating Geographically Weighted Poisson Regression

Getting Started

devtools::install_github("LukeCe/spflow")
Skipping install of 'spflow' from a github remote, the SHA1 (df913677) has not changed since last install.
  Use `force = TRUE` to force installation

The code chunks below installs and launches these R packages into R environment.

pacman::p_load(sf, spdep, sp, Matrix, spflow, reshape2, knitr, tmap, tidyverse)

Data Preparation

Before we can calibrate Spatial Econometric Interaction Models by using spflow package, 3 data sets are required.

  • a spatial weights,
  • a tiblle data.frame consists of the origins, destination, flows and distances between the origins and destination, and
  • a tibble data.frame consists of explanatory variables

Building the geographical area

For the purpose of this study, MPSZ-2019 from URA will be used, which will be imported as follows.

mpsz <- st_read(dsn = "data/geospatial", layer = "MPSZ-2019")%>%
  st_transform(crs = 3414)
Reading layer `MPSZ-2019' from data source 
  `C:\jayexx\ISSS624\In-class_Exercises\In-class_Ex5\data\geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS:  WGS 84

Import BusStop layer downloaded from LTA DataMall into RStudio and save it as a sf data frame called busstop.

busstop <- st_read(dsn = "data/geospatial",
                   layer = "BusStop") %>%
  st_transform(crs = 3414)
Reading layer `BusStop' from data source 
  `C:\jayexx\ISSS624\In-class_Exercises\In-class_Ex5\data\geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 5161 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 3970.122 ymin: 26482.1 xmax: 48284.56 ymax: 52983.82
Projected CRS: SVY21

Populate the planning subzone code (i.e. SUBZONE_C) of mpsz sf data frame into busstop sf data frame as follows.

busstop_mpsz <- st_intersection(busstop, mpsz) %>%
  select(BUS_STOP_N, SUBZONE_C) %>%
  st_drop_geometry()
Warning: attribute variables are assumed to be spatially constant throughout
all geometries

Analysis will be focused on planning SZ with BS. In view of this the Point-in-polygon count analysis is used.

mpsz$'BUSSTOP_COUNT' <- lengths(
  st_intersects(mpsz, busstop))

Preparing the flow data

odbus6_9 <- read_rds("data/rds/odbus6_9.rds")
od_data <- left_join(odbus6_9, busstop_mpsz, by = c("ORIGIN_PT_CODE" = "BUS_STOP_N"))%>%
  rename(ORIGIN_BS = ORIGIN_PT_CODE, 
         ORIGIN_SZ = SUBZONE_C,
         DESTIN_BS = DESTINATION_PT_CODE)
Warning in left_join(odbus6_9, busstop_mpsz, by = c(ORIGIN_PT_CODE = "BUS_STOP_N")): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 23333 of `x` matches multiple rows in `y`.
ℹ Row 163 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.

retrieve the rds data

mpsz_nb <- read_rds("data/rds/mpsz_nb.rds")
mpsz_flow <- read_rds("data/rds/mpsz_flow.rds")
mpsz_var <- read_rds("data/rds/mpsz_var.rds")

Creationg ‘spflow_network-class’ objects

‘spflow_network-class’ is an S4 class that contains all info on a spatial network which is composed by a set of nodes that are linked by some neighbourhood relation. It can be created by using [’spflow_network()]’(https://lukece.github.io)

For our model, we choose the contiguity based neighbourhood struture.

mpsz_net <- spflow_network(
  id_net = "sg",
  node_neighborhood = 
    nb2mat(mpsz_nb$by_contiguity), #to use adaptive weight
  node_data = mpsz_var,   
  node_key_column = "SZ_CODE")

mpsz_net
Spatial network nodes with id: sg
--------------------------------------------------
Number of nodes: 313
Average number of links per node: 6.077
Density of the neighborhood matrix: 1.94% (non-zero connections)

Data on nodes:
                SZ_NAME SZ_CODE BUSSTOP_COUNT AGE7_12 AGE13_24 AGE25_64
1      INSTITUTION HILL  RVSZ05             2     330      360     2260
2        ROBERTSON QUAY  SRSZ01            10     320      350     2200
3          FORT CANNING  MUSZ02             6       0       10       30
4      MARINA EAST (MP)  MPSZ05             2       0        0        0
5               SENTOSA  SISZ01             1     200      260     1440
6        CITY TERMINALS  BMSZ17            10       0        0        0
---                 ---     ---           ---     ---      ---      ---
308            NEE SOON  YSSZ07            12      90      140      590
309       UPPER THOMSON  BSSZ01            47    1590     3660    15980
310          SHANGRI-LA  AMSZ05            12     810     1920     9650
311          TOWNSVILLE  AMSZ04             9     980     2000    11320
312           MARYMOUNT  BSSZ02            25    1610     4060    16860
313 TUAS VIEW EXTENSION  TSSZ06            11       0        0        0
    SCHOOL_COUNT BUSINESS_COUNT RETAILS_COUNT FINSERV_COUNT ENTERTN_COUNT
1              1              6            26             3             0
2              0              4           207            18             6
3              0              7            17             0             3
4              0              0             0             0             0
5              0              1            84            29             2
6              0             11            14             4             0
---          ---            ---           ---           ---           ---
308            0              0             7             0             0
309            3             21           305            30             0
310            3              0            53             9             0
311            1              0            83            11             0
312            3             19           135             8             0
313            0             53             3             1             0
    FB_COUNT LR_COUNT COORD_X COORD_Y
1          4        3  103.84    1.29
2         38       11  103.84    1.29
3          4        7  103.85    1.29
4          0        0  103.88    1.29
5         38       20  103.83    1.25
6         15        0  103.85    1.26
---      ---      ---     ---     ---
308        0        0  103.81     1.4
309        5       11  103.83    1.36
310        0        0  103.84    1.37
311        1        1  103.85    1.36
312        3       11  103.84    1.35
313        0        0  103.61    1.26
mpsz_net_pairs <- spflow_network_pair(
  id_orig_net = "sg",
  id_dest_net = "sg",
  pair_data = mpsz_flow,
  orig_key_column = "ORIGIN_SZ",
  dest_key_column = "DESTIN_SZ")

mpsz_net_pairs
Spatial network pair with id: sg_sg
--------------------------------------------------
Origin network id: sg (with 313 nodes)
Destination network id: sg (with 313 nodes)
Number of pairs: 97969
Completeness of pairs: 100.00% (97969/97969)

Data on node-pairs:
      DESTIN_SZ ORIGIN_SZ DISTANCE TRIPS
1        RVSZ05    RVSZ05        0    67
314      SRSZ01    RVSZ05   305.74   251
627      MUSZ02    RVSZ05   951.83     0
940      MPSZ05    RVSZ05  5254.07     0
1253     SISZ01    RVSZ05     4975     0
1566     BMSZ17    RVSZ05  3176.16     0
---         ---       ---      ---   ---
96404    YSSZ07    TSSZ06 26972.97     0
96717    BSSZ01    TSSZ06 25582.48     0
97030    AMSZ05    TSSZ06 26714.79     0
97343    AMSZ04    TSSZ06 27572.74     0
97656    BSSZ02    TSSZ06  26681.7     0
97969    TSSZ06    TSSZ06        0   270
mpsz_multi_net <- spflow_network_multi(mpsz_net,
                                       mpsz_net_pairs)
mpsz_multi_net
Collection of spatial network nodes and pairs
--------------------------------------------------
Contains 1 spatial network nodes  
    With id :  sg
Contains 1 spatial network pairs  
    With id :  sg_sg

Availability of origin-destination pair information:

 ID_ORIG_NET ID_DEST_NET ID_NET_PAIR COMPLETENESS     C_PAIRS  C_ORIG  C_DEST
          sg          sg       sg_sg      100.00% 97969/97969 313/313 313/313

Correlation Analysis

Multicollinearity refers

pair

cor_formula <- log(1 + TRIPS) ~
  BUSSTOP_COUNT +
  AGE7_12 +
  AGE7_12 +
  AGE7_12 + 
  SCHOOL_COUNT +
  BUSINESS_COUNT +
  RETAILS_COUNT +
  FINSERV_COUNT +
  P_(log(DISTANCE + 1))

cor_mat <- pair_cor(
  mpsz_multi_net,
  spflow_formula = cor_formula,
  add_lags_x = FALSE)

colnames(cor_mat) <- paste0(substr(colnames(cor_mat),1,3),"...")

cor_image(cor_mat)
base_model <- spflow(
  spflow_formula = log(1 + TRIPS) ~
    O_(BUSSTOP_COUNT +
         AGE25_64) +
    D_(SCHOOL_COUNT +
         BUSINESS_COUNT +
         RETAILS_COUNT +
         FINSERV_COUNT) +
    P_(log(DISTANCE + 1)),
  spflow_networks = mpsz_multi_net)

base_model
--------------------------------------------------
Spatial interaction model estimated by: MLE  
Spatial correlation structure: SDM (model_9)
Dependent variable: log(1 + TRIPS)

--------------------------------------------------
Coefficients:
                          est     sd   t.stat  p.val
rho_d                   0.680  0.004  192.555  0.000
rho_o                   0.678  0.004  187.732  0.000
rho_w                  -0.396  0.006  -65.592  0.000
(Intercept)             0.410  0.065    6.267  0.000
(Intra)                 1.312  0.081   16.262  0.000
D_SCHOOL_COUNT          0.017  0.002    7.885  0.000
D_SCHOOL_COUNT.lag1     0.002  0.004    0.552  0.581
D_BUSINESS_COUNT        0.000  0.000    3.015  0.003
D_BUSINESS_COUNT.lag1   0.000  0.000   -0.249  0.804
D_RETAILS_COUNT         0.000  0.000   -0.306  0.759
D_RETAILS_COUNT.lag1    0.000  0.000    0.152  0.880
D_FINSERV_COUNT         0.002  0.000    6.787  0.000
D_FINSERV_COUNT.lag1   -0.002  0.001   -3.767  0.000
O_BUSSTOP_COUNT         0.002  0.000    6.806  0.000
O_BUSSTOP_COUNT.lag1   -0.001  0.000   -2.364  0.018
O_AGE25_64              0.000  0.000    7.336  0.000
O_AGE25_64.lag1         0.000  0.000   -2.797  0.005
P_log(DISTANCE + 1)    -0.050  0.007   -6.794  0.000

--------------------------------------------------
R2_corr: 0.6942947  
Observations: 97969  
Model coherence: Validated
#par(old_par)
corr_residual <- pair_cor(base_model)
colnames(corr_residual) <- substr(colnames(corr_residual),1,3)
cor_image(corr_residual)

spflow_formula <- log(1 + TRIPS) ~
    O_(BUSSTOP_COUNT +
         AGE25_64) +
    D_(SCHOOL_COUNT +
         BUSINESS_COUNT +
         RETAILS_COUNT +
         FINSERV_COUNT) +
    P_(log(DISTANCE + 1))

model_control <- spflow_control(
  estimation_method = "mle",
  model = "model_1")

mle_model <- spflow(
  spflow_formula,
  spflow_networks = mpsz_multi_net,
  estimation_control = model_control)

mle_model
spflow_formula <- log(1 + TRIPS) ~
    O_(BUSSTOP_COUNT +
         AGE25_64) +
    D_(SCHOOL_COUNT +
         BUSINESS_COUNT +
         RETAILS_COUNT +
         FINSERV_COUNT) +
    P_(log(DISTANCE + 1))

model_control <- spflow_control(
  estimation_method = "mle",
  model = "model_2")

mle_model2 <- spflow(
  spflow_formula,
  spflow_networks = mpsz_multi_net,
  estimation_control = model_control)

mle_model2
spflow_formula <- log(1 + TRIPS) ~
    O_(BUSSTOP_COUNT +
         AGE25_64) +
    D_(SCHOOL_COUNT +
         BUSINESS_COUNT +
         RETAILS_COUNT +
         FINSERV_COUNT) +
    P_(log(DISTANCE + 1))

model_control <- spflow_control(
  estimation_method = "mle",
  model = "model_8")

mle_model8 <- spflow(
  spflow_formula,
  spflow_networks = mpsz_multi_net,
  estimation_control = model_control)

mle_model8