library(spOccupancy)
library(tidyverse)
library(INLA)
library(inlabru)
library(scico)
library(sf)
library(stars)
library(patchwork)
library(kableExtra)
library(scales)
library(tidyterra)
rm(list = ls())
= theme(axis.title.x=element_blank(),
theme_maps axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),)
Occupacy model with space-varying time trends
This code reproduces the case study “Occupancy model with spatially-varying coefficients for the Gray Catbird” in “Spatio-temporal Occupancy Models with INLA”. The dataset and part of the code for preparing the data, is taken from the supplementary material of (Doser et al. 2022)
# load data ---------------------------------------------
load(here::here("data_SCV_model/spOcc-bbs-data.rda"))
= data.list
data.list_orig # Loads the data.list object, which is formatted for spOccupancy.
# Species info
<- dimnames(data.list$y)[[1]]
sp.names
# Create an index for the observers
# this is created as
= apply(data.list$y,c(1,2,3), sum, na.rm = T)
N_species_obs = apply(data.list$y, c(1,2,3), function(x)
N_species_counts all(!is.na(x)))
!N_species_counts] = NA
N_species_obs[= apply(N_species_obs,c(2,3), function(x)
N_species_obs2 if((sum(x)==0 | all(is.na(x)))) return(NA)
{else return(sum(x[x>0]))})
= apply(data.list$y, c(1,2,3),function(x) ifelse(!all(x==0),1,0))
species_observed = apply(species_observed,c(2,3), sum)
species_observed = data.frame(n_species = as.vector(N_species_obs2)/5,
obs_index tot_species = as.vector(species_observed),
obs = as.vector(data.list$det.covs$obs),
year = as.vector(data.list$det.covs$year.det),
site = rep(1:1846,20))%>%
::filter(!is.na(n_species)) %>%
dplyrmutate(score = n_species/tot_species) %>%
group_by(obs) %>% summarise(score = mean(score))
# choose the species
# choose the species
<- 'GRCA'
curr.sp print(curr.sp)
[1] "GRCA"
<- which(sp.names == curr.sp)
sp.indx $y <- data.list$y[sp.indx, , , ]
data.list# Only use locations within the breeding range of the species.
# Load in species range information based on bird life data.
load(here::here("data_SCV_model/bird-life-processed.rda"))
#load("C:/Users/jb538u/NTNU/Occupancy_model_Git/Occupancy-Models-in-INLA-/docs/paper/data_SCV_model/bird-life-processed.rda")
$y <- data.list$y[range.ind[sp.indx, ] == 1, , ]
data.list$occ.covs$years <- data.list$occ.covs$years[range.ind[sp.indx, ] == 1, ]
data.list$occ.covs$BCR <- data.list$occ.covs$BCR[range.ind[sp.indx, ] == 1]
data.list$det.covs$day <- data.list$det.covs$day[range.ind[sp.indx, ] == 1, ]
data.list$det.covs$obs <- data.list$det.covs$obs[range.ind[sp.indx, ] == 1, ]
data.list$det.covs$obs.first.year <- data.list$det.covs$obs.first.year[range.ind[sp.indx, ] == 1, ]
data.list$det.covs$year.det <- data.list$det.covs$year.det[range.ind[sp.indx, ] == 1, ]
data.list$coords <- data.list$coords[range.ind[sp.indx, ] == 1, ] data.list
<- st_as_sf(as.data.frame(data.list$coords),
coords.sf coords = c("X", "Y"),
crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=km +no_defs")
<- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE))
usa <- st_as_sf(maps::map("usa", fill = TRUE, plot = FALSE))
usa.no.states # Restrict to east of the 100th meridian
<- st_bbox(usa)
usa.bbox 1] <- -100
usa.bbox[<- as.vector(usa.bbox)
usa.bbox sf_use_s2(FALSE)
# Full data
<- st_crop(st_make_valid(usa), xmin = usa.bbox[1], ymin = usa.bbox[2],
east.us xmax = usa.bbox[3], ymax = usa.bbox[4])
<- east.us %>%
east.us st_transform(st_crs(coords.sf))
<- st_crop(st_make_valid(usa.no.states), xmin = usa.bbox[1], ymin = usa.bbox[2],
east.us.no.states xmax = usa.bbox[3], ymax = usa.bbox[4])
<- east.us.no.states %>%
east.us.no.states st_transform(st_crs(coords.sf))
# BCRS, in case you want to visualize the maps with these
<- st_read(dsn = here::here("data_SCV_model/BCR_Terrestrial/"), layer = "BCR_Terrestrial_master") bcrs
Reading layer `BCR_Terrestrial_master' from data source
`C:\Users\jaf_i\OneDrive\Documentos\GitHub\Occupancy-Models-in-INLA-\docs\website\data_SCV_model\BCR_Terrestrial'
using driver `ESRI Shapefile'
Simple feature collection with 373 features and 11 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1413 ymin: 14.53287 xmax: 179.7785 ymax: 83.11063
Geodetic CRS: NAD83
#bcrs <- st_read(dsn = here::here("C:/Users/jb538u/NTNU/Occupancy_model_Git/Occupancy-Models-in-INLA-/docs/paper/data_SCV_model/BCR_Terrestrial/"), layer = "BCR_Terrestrial_master")
<- "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=km +no_defs"
my.proj <- st_as_sf(as.data.frame(data.list$coords),
coords.sf coords = c("X", "Y"),
crs = my.proj)
<- bcrs %>%
bcrs.albers st_transform(crs = my.proj)
# Clip bcrs to region of ne.states
<- st_intersection(st_make_valid(bcrs.albers), st_make_valid(east.us))
bcrs.ne
<- bcrs.ne %>%
bcrs.ne.grouped group_by(BCR) %>%
summarize(geometry = st_union(geometry))
= c(data.list$occ.covs$BCR, bcrs.ne.grouped$BCR )
all_bcr = as.numeric(as.factor(all_bcr)) all_bcr_idx
Data for INLA
= dim(data.list$y)[1]
nsites = c()
Y = c()
year for(i in 1:20)
{= rbind(Y, data.list$y[,i,])
Y = c(year, rep(1999+i, nsites))
year #cat(length(counts), length(year), length(n_samples), "\n")
}
= data.frame(
Xocc year = year,
year_idx = year-min(year)+1,
obs = as.vector(data.list$det.covs$obs),
bcr = rep(data.list$occ.covs$BCR, 20),
bcr_idx = rep(all_bcr_idx[1:length(data.list$occ.covs$BCR)], 20),
obs = as.vector(data.list$det.covs$obs),
obs_first_year = as.vector(data.list$det.covs$obs.first.year),
Int_occ = 1,
scale_year = scale(year),
x = rep(data.list$coords[,1],20),
y = rep(data.list$coords[,2],20),
spatialfield = NA)
= left_join(Xocc, obs_index, by = "obs")
Xocc
# we have 3 covariates in the detection part
= cbind(1,
Xdet scale(as.vector(data.list$det.covs$day)),
scale(as.vector(data.list$det.covs$day))^2,
1,
scale(as.vector(data.list$det.covs$day)),
scale(as.vector(data.list$det.covs$day))^2,
1,
scale(as.vector(data.list$det.covs$day)),
scale(as.vector(data.list$det.covs$day))^2,
1,
scale(as.vector(data.list$det.covs$day)),
scale(as.vector(data.list$det.covs$day))^2,
1,
scale(as.vector(data.list$det.covs$day)),
scale(as.vector(data.list$det.covs$day))^2
)
= !apply(Y,1,function(x)all(is.na(x)))
keep = Y[keep,]
Y = Xocc[keep,]
Xocc = Xdet[keep,]
Xdet
= as.list(Xocc)
data_list $Y = Y
data_list$X = Xdet
data_list
%>%
Xocc ggplot() +
geom_point(aes(x,y), size = 0.5) +
scale_color_viridis_c(na.value = "transparent") +
geom_sf(data = bcrs.ne.grouped, alpha = 0, col = 'grey') +
facet_wrap(.~year) +
xlab("") + ylab("") + theme_maps +
ggtitle("Yearly detections per site")
Set up mesh and SPDE
# set up mesh -------------------------------------------------------------
= inla.mesh.2d(loc= data.list$coords,
mesh max.edge = 300,
cutoff = 30)
= inla.mesh.2d(loc= data.list$coords,
mesh max.edge = c(300,600),
offset = c(100,500),
cutoff = 40)
<- inla.spde2.pcmatern(
spde mesh = mesh,
prior.range = c(800, 0.5),
prior.sigma = c(1, 0.5))
ggplot() + inlabru::gg(mesh) + theme_maps + coord_equal()
Model 1: fixed time trend
Model for occurrences: \[ \text{logit}(\pi_{st}) = \beta_0 + \beta_t\ t + \omega(s) \] Model for detection \[ \text{logit}(p_s) = \alpha_0 + \alpha X(s) \]
# model with fixed covariate ----------------------------------------------
<- inla.spde.make.A(mesh = mesh,
A_sp loc = cbind(Xocc$x, Xocc$y))
= inla.mdata(Y,X) ~
formula -1 + Int_occ + scale_year +
f(spatialfield, model=spde,A.local = A_sp)
= system.time(model <- inla(formula, #the formula
time1 data=data_list,
family= 'occupancy',
control.fixed = list(prec = 1, prec.intercept = 1),
control.compute = list(dic = TRUE, waic = TRUE,
config = TRUE),
control.inla = list(int.strategy = "eb"),
control.family = list(control.link = list(model = "logit"),
link.simple = "logit",
hyper = list(beta1 = list(param = c(0,1),
initial = 0),
beta2 = list(param = c(0,1),
initial = 0),
beta3 = list(param = c(0,1),
initial = 0)))))
Model 2: Continuously varying time trend
Model for occurrences: \[ \text{logit}(\pi_{st}) = \beta_0 + \beta_t(s)\ t + \omega(s) \] Model for detection \[ \text{logit}(p_s) = \alpha_0 + \alpha X(s) \]
<- inla.spde.make.A(mesh = mesh,
A_sp1 loc = cbind(Xocc$x, Xocc$y))
<- inla.spde.make.A(mesh = mesh,
A_sp2 loc = cbind(Xocc$x, Xocc$y),
weights = Xocc$scale_year)
$i1 <- rep(NA,length(data_list$spatialfield))
data_list$i2 <- rep(NA,length(data_list$spatialfield))
data_list
= inla.mdata(Y,X) ~
formula2 -1 + Int_occ +
+
scale_year f(i1, model=spde,A.local = A_sp1) +
f(i2, model=spde, A.local = A_sp2)
# f(time, model = "iid") +
= system.time(model2 <- inla(formula2, #the formula
time2 data=data_list,
family= 'occupancy',
control.fixed = list(prec = 1, prec.intercept = 1),
control.compute = list(dic = TRUE, waic = TRUE,
config = TRUE),
# verbose = TRUE,
control.inla = list(int.strategy = "eb"),
control.family = list(control.link = list(model = "logit"),
link.simple = "logit",
hyper = list(beta1 = list(param = c(0,1/3),
initial = 0),
beta2 = list(param = c(0,1/3),
initial = 0),
beta3 = list(param = c(0,1/3),
initial= 0)))))
Model 3: Regional varying time trend
Model for occurrences:
\[ \text{logit}(\pi_{st}) = \beta_0 + \beta_t(\text{bcr}_s)\ t + \omega(s) + \epsilon_t \]
Model for detection
\[ \text{logit}(p_s) = \alpha_0 + \alpha X(s) \]
<- inla.spde.make.A(mesh = mesh,
A_sp
loc = cbind(Xocc$x, Xocc$y))
= inla.mdata(Y,X) ~
formula3 -1 + Int_occ +
#f(time, model = "iid") +
+ scale_year +
f(bcr_idx,scale_year, model="iid", values = sort(unique(all_bcr_idx))) +
f(spatialfield, model = spde, A.local = A_sp)
<- inla(formula3, #the formula
model3
data=data_list, #the data stack
family= 'occupancy', #which family the data comes from
control.fixed = list(prec = 1, prec.intercept = 1),
control.compute = list(dic = TRUE, waic = TRUE,
config = TRUE), #model diagnostics and config = TRUE gives you the GMRF
# verbose = TRUE,
control.inla = list(int.strategy = "eb"),#, cmin =0),
control.family = list(control.link = list(model = "logit"),
link.simple = "logit",
hyper = list(beta1 = list(param = c(0,1),
initial = 0.5),
beta2 = list(param = c(0,1),
initial = 0),
beta3 = list(param = c(0,1),
initial= 0),
beta4 = list(param = c(0,1),
initial= 0))))
Detection parameters
= rbind(model$summary.hyperpar[1:3,c(1,3,5)],
tab1 $summary.hyperpar[1:3,c(1,3,5)],
model2$summary.hyperpar[1:3,c(1,3,5)])
model3rownames(tab1) = c (c("Int detection1", "day1", "day^2_1"),
c("Int detection2", "day2", "day^2_2"),
c("Int detection3", "day3", "day^2_3"))
kable(tab1, booktabs = TRUE, digits = 2) %>% pack_rows(
index = c("Model 1" = 3, "Model 2" = 3, "Model 3" = 3)
)
mean | 0.025quant | 0.975quant | |
---|---|---|---|
Model 1 | |||
Int detection1 | 0.57 | 0.55 | 0.59 |
day1 | 0.14 | 0.13 | 0.16 |
day^2_1 | -0.11 | -0.12 | -0.10 |
Model 2 | |||
Int detection2 | 0.58 | 0.47 | 0.69 |
day2 | 0.14 | 0.12 | 0.16 |
day^2_2 | -0.11 | -0.12 | -0.10 |
Model 3 | |||
Int detection3 | 0.57 | 0.55 | 0.59 |
day3 | 0.14 | 0.13 | 0.16 |
day^2_3 | -0.11 | -0.12 | -0.10 |
Model Comparison
# compute LGOCV scores
<- Xocc %>% mutate(id = 1:nrow(Xocc)) %>% st_as_sf(coords =c("x","y"))
df_sf <- st_buffer(df_sf, dist = 150)
buffer_25
<- inla.group.cv(result = model,num.level.sets = 3)
loocv_m1 = mean(log(loocv_m1$cv),na.rm=T)
ULOOCV
<- inla.group.cv(result = model2,num.level.sets = 3)
loocv_m2 = mean(log(loocv_m2$cv),na.rm=T)
ULOOCV2
<- inla.group.cv(result = model3,num.level.sets = 3)
loocv_m3 = mean(log(loocv_m3$cv),na.rm=T)
ULOOCV3
= c(ULOOCV,
score
ULOOCV2, ULOOCV3)
WAIC | MLIK | LGOCV_score | |
---|---|---|---|
Model 1 | 135149.7 | -67999.81 | -2.57 |
Model 2 | 134970.7 | -67950.04 | -2.56 |
Model 3 | 135105.3 | -67990.05 | -2.57 |
Predictions
# prediction dataset
= inla.posterior.sample(1000, model)
samples = inla.posterior.sample(1000, model2)
samples2 = inla.posterior.sample(1000, model3)
samples3
load(here::here('data_SCV_model/pred-coordinates.rda'))
0.sf <- st_as_sf(as.data.frame(coords.0),
coords.coords = c("X", "Y"),
crs = st_crs(coords.sf))
= (st_intersects(coords.0.sf, bcrs.ne.grouped, sparse = FALSE))
aa = apply(aa,1,function(x)
idx ifelse((!all(is.na(x))), which((x)),NA))
0.sf$bcr = bcrs.ne.grouped$BCR[idx]
coords.0.sf$bcr_idx = idx
coords.
0.sf = coords.0.sf %>% filter(!is.na(bcr_idx))
coords.= inla.spde.make.A(mesh, st_coordinates(coords.0.sf)) A_pred
Temporal trends
Here are plots of the spatial temporal trends according to the three models.
= function(..., year)
func
{= scale_year
fix return(c(fix))
}
= function(..., year)
func2
{= scale_year
fix = (A_pred %*% i2)[,1]
spat2 return((fix + spat2))
}
= function(..., year)
func3
{= scale_year
fix = bcr_idx[coords.0.sf$bcr_idx]
time
return((fix +time))
}= 1
year = inla.posterior.sample.eval(func, samples, year = year)
aa = inla.posterior.sample.eval(func2, samples2, year = year)
aa2 = inla.posterior.sample.eval(func3, samples3, year = year)
aa3
= data.frame(x = st_coordinates(coords.0.sf)[,1],
dd y = st_coordinates(coords.0.sf)[,2],
z = apply(aa,1,mean),
z1_1 = apply(aa,1,quantile, 0.025),
z1_2 = apply(aa,1,quantile, 0.975),
z2 = apply(aa2,1,mean),
z2_1 = apply(aa2,1,quantile, 0.025),
z2_2 = apply(aa2,1,quantile, 0.975),
z3 = apply(aa3,1,mean),
z3_1 = apply(aa3,1,quantile, 0.025),
z3_2 = apply(aa3,1,quantile, 0.975) )
= dd %>%
dd mutate(check1 = factor(case_when(z1_1>0 ~ 1,
<0 ~ -1,
z1_2<0 & z1_2 >0 ~ 0), levels = c("-1","0","1")),
z1_1check2 = factor(case_when(z2_1>0 ~ 1,
<0 ~ -1,
z2_2<0 & z2_2 >0 ~ 0), levels = c("-1","0","1")),
z2_1check3 = factor(case_when(z3_1>0 ~ 1,
<0 ~ -1,
z3_2<0 & z3_2 >0 ~ 0), levels = c("-1","0","1")))
z3_1
<- st_as_stars(data.frame(dd), dims = c('x', 'y'))
pred.stars = range(c(dd$z,dd$z2, dd$z3), na.rm = T)
lims
<- dd |>
trend1 ::select(c(x,y,z,z2,z3)) |>
dplyr::rename(`SVCM 1` = z,
dplyr`SVCM 2` = z2,
`SVCM 3` = z3) %>%
pivot_longer(-c(x,y))
<- dd |>
trend2 ::select(c(x,y,check1,check2,check3)) |>
dplyr::rename(`SVCM 1` = check1,
dplyr`SVCM 2` = check2,
`SVCM 3` = check3) %>%
pivot_longer(-c(x,y))
%>% ggplot() + geom_tile(aes(x,y,fill = value)) +
trend1 facet_wrap(.~name) + coord_equal() +
scale_fill_scico(name= "Mean \ntemporal trend" , limits = lims) +
theme_maps
%>% ggplot() + geom_tile(aes(x,y,fill = value)) +
trend2 facet_wrap(.~name) + coord_equal() +
scale_fill_manual(name="Change in trend",
values = c("-1" = "maroon",
"0" = "grey",
"1" = "cyan4"),
labels = c("Negative", "No change", "Positive"), na.translate = F)+
theme_maps