Fitting Occupancy models with R-INLA

1 Introduction

In this document we show how to fit a space time occupancy model for the dataset hbefTrends in the spOccupacy library (Doser et al. 2022).

Code
library(INLA) 
library(tidyverse)
library(spOccupancy)
library(scico)
library(patchwork)
library(kableExtra)
library(sf)
library(terra)
library(tidyterra)

rm(list = ls())

plot_inla_effects = function(effect)
{
  p1 = ggplot(data.frame(effect)) +
    geom_line(aes(ID, -mean)) + 
    geom_ribbon(aes(ID, ymin = -X0.025quant, ymax = -X0.975quant),
                alpha = 0.5)
  print(p1)
  
}


theme_maps = theme(axis.line=element_blank(),
                   axis.text.x=element_blank(),
                   axis.text.y=element_blank(),
                   axis.ticks=element_blank(),
                   axis.title.x=element_blank(),
                   axis.title.y=element_blank()
                   #legend.position="none",
                   #panel.background=element_blank(),
                   #panel.border=element_blank(),
                   #panel.grid.major=element_blank(),
                   #panel.grid.minor=element_blank(),
                   #plot.background=element_blank()
)

2 Load and prepare the data

We first load the data and select the species of interest. Here we choose BTBW which is not one of the rare species present in the dataset.

Afterwards we prepare the data in the format that is required by the R-INLA library

Code
data(hbefTrends)

revi.data <- hbefTrends
sp.names <- dimnames(hbefTrends$y)[[1]]
revi.data$y <- revi.data$y[sp.names == 'REVI', , , ]
revi.data$coords = revi.data$coords/1000 #get this in km!

## data preparation for inla -----------------------------------------------
Y = data.frame(revi.data$y[,1,])
Xdet.day = revi.data$det.covs$day[,1,]
Xdet.tod = revi.data$det.covs$tod[,1,]

for(i in 2:9)
{
  Y = rbind(Y, data.frame(revi.data$y[,i,]))
  Xdet.day = rbind(Xdet.day, revi.data$det.covs$day[,i,])
  Xdet.tod = rbind(Xdet.tod, revi.data$det.covs$tod[,i,])
}
Xdet.day = (Xdet.day - mean(Xdet.day, na.rm = T))/sd(Xdet.day,na.rm = T) 
Xdet.tod = (Xdet.tod - mean(Xdet.tod, na.rm = T))/sd(Xdet.tod,na.rm = T) 

Xdet = cbind(1, Xdet.day[,1], Xdet.tod[,1],
             1, Xdet.day[,2], Xdet.tod[,2],
             1, Xdet.day[,3], Xdet.tod[,3])


Xocc = data.frame(x = rep(revi.data$coords[,1],9),
                  y = rep(revi.data$coords[,2],9),
                  elev = rep(revi.data$occ.covs$elev,9),
                  scale_elev = scale(rep(revi.data$occ.covs$elev,9)),
                 scale_elev2 = scale(rep(revi.data$occ.covs$elev,9))^2,
                  site = rep(1:373,9),
                  time = rep(1:9,each = 373),
                  Int_occ = 1,
                 spatialfield = NA) %>%
  mutate(scale_time = scale(time))
Code
elev_raster= rast(data.frame(x =  hbefElev$Easting/1000,
                                    y = hbefElev$Northing/1000,
                                    elev =  hbefElev$val))

elev_raster$scale_elev = (elev_raster$elev - mean(revi.data$occ.covs$elev)) / sd(revi.data$occ.covs$elev)
elev_raster$scale_elev2 = elev_raster$scale_elev^2 

all_elev = inla.group(c(Xocc$elev, values(elev_raster$elev)))
val_elev = sort(na.omit(unique(all_elev)))
Xocc$group_elev = all_elev[1:dim(Xocc)[1]]
elev_group_raster = elev_raster$elev
values(elev_group_raster) <- all_elev[-c(1:dim(Xocc)[1])]



pred_df = data.frame(x = rep(crds(elev_raster, na.rm = F)[,1],9),
                     y = rep(crds(elev_raster, na.rm = F)[,2],9),
                     scale_elev = rep(values(elev_raster$scale_elev),9),
                     scale_elev2 = rep(values(elev_raster$scale_elev2),9),
                     group_elev = rep(values(elev_group_raster),9),
                     time = rep(c(1:9), each= length(values(elev_raster$elev)))) %>%
  dplyr::filter(!is.na(scale_elev))

3 Model fit

3.1 Model 1

This is a separable space time model with linear predictors \[ \eta_{st} = \beta_0 + f_1(\text{elev}) + f_2(t) + f_3(s) \] where \(f_1(\text{elev})\) is a smooth (RW2) effect of the elevation \(f_2(t)\) is a AR1 effect of time \(f_3(s)\) is an IID effect of location

Code
formula1 = inla.mdata(Y,X) ~ -1 + Int_occ +  
  scale_elev + 
  scale_elev2 + 
  f(site, model =  "iid") + 
  f(time, model = "iid")


data_list = as.list(Xocc)
data_list$Y = Y
data_list$X = Xdet


time0 = system.time(model1 <- inla(formula1, 
                                   data=data_list,   
                                   family= 'occupancy',   
                                   verbose = FALSE,
                                   control.compute = list( config = TRUE,
                                                           dic  = T,
                                                           waic = T),
                                   control.fixed = list(prec.intercept = 1/2.72,
                                                        prec = 1/2.72),
                                   control.predictor = list(link  =1),
                                   control.family = list(control.link = list(model = "logit"), 
                                                         link.simple = "logit",
                                                         hyper = list(beta1 = list(param = c(0,1/2.72), 
                                                                                   initial = 0),
                                                                      beta2 = list(param = c(0,1/2.72)),
                                                                      beta3 = list(param = c(0,1/2.72))
                                                                      ))))
Warning in normalizePath(path.expand(path), winslash, mustWork):
path[1]="C:/Users/jaf_i/AppData/Local/Temp/RtmpecWRYM/file12d881fcb1e49": The
system cannot find the file specified

3.2 Model 2

Code
# set up spatial model for INLA
boundary = inla.nonconvex.hull(points = revi.data$coords, convex = .3)
mesh = inla.mesh.2d(boundary = boundary,
                    #   loc = cbind(data$X, data$Y),
                    max.edge = c(0.1,0.7),
                    min.angle = 20,
                    offset = c(.01, 1),
                    cutoff = 0.12,
)
ggplot() + inlabru::gg(mesh) +
  coord_equal() + theme_maps

Code
spde <- inla.spde2.pcmatern(
  mesh = mesh, 
  prior.range = c(5, 0.01), # prior for range
  prior.sigma = c(1, 0.5))  # prior for sd parameter

This is a space-time model with linear predictor as \[ \eta_{st} = \beta_0 + f_1(\text{elev}) + f_2(t) + \omega(s) \] with \(f_1()\) and \(f_2()\) as before while \(\omega(s)\) is a Gaussian spatial field

Code
A_sp <- inla.spde.make.A(mesh = mesh, 
                         loc = cbind(Xocc$x, Xocc$y))

formula3_2 <- inla.mdata(Y,X) ~ 
  -1 + Int_occ +  
  scale_elev + scale_elev2 + 
  f(time, model = "ar1") + 
   f(spatialfield, model=spde,A.local = A_sp) 


time2 = system.time(model3_2 <- inla(formula3_2, 
                                     data=data_list,  
                                     family= 'occupancy',  
                                     control.fixed =  list(prec = 1, prec.intercept = 1),
                                     control.compute = list(waic = TRUE, config = TRUE), 
                                     verbose = F,
                                     control.family = list(control.link = list(model = "logit"), 
                                                           link.simple = "logit",
                                                           hyper = list(beta1 = list(param = c(0,1/3), 
                                                                                   initial = 0),
                                                                      beta2 = list(param = c(0,1/3)),
                                                                      beta3 = list(param = c(0,1/3))))))
Warning in normalizePath(path.expand(path), winslash, mustWork):
path[1]="C:/Users/jaf_i/AppData/Local/Temp/RtmpecWRYM/file12d883d1b74f7": The
system cannot find the file specified

3.3 Model 3

The last model is defined as: \[ \eta_{st} = \beta_0 + f_1(\text{elev}) + \omega(s,t) \] with \(f_1()\) is as before while \(\omega(s,t)\) is a space-time gaussian spatial field with AR1 time component

Code
h.spec <- list(rho = list(prior = 'pc.cor0', param = c(0.5, 0.1)))
spde <- inla.spde2.pcmatern(
  mesh = mesh, 
  prior.range = c(5, 0.7),
  prior.sigma = c(1, 0.5),
  constr = T) 
A_sp2 <- inla.spde.make.A(mesh = mesh, group = Xocc$time,
                         loc = cbind(Xocc$x, Xocc$y))

formula3_3 <- inla.mdata(Y,X) ~ 
  -1   + Int_occ + 
  scale_elev +
  scale_elev2 + 
  f(time, model = "ar1") + 
  f(spatialfield,
    model=spde,
    A.local = A_sp2,
    group = time,
    control.group = list(model = 'ar1',hyper = h.spec))

time3 = system.time(model3_3 <- inla(formula3_3, #the formula
                                     data=data_list,  #the data stack
                                     family= 'occupancy',   #which family the data comes from
                                     control.fixed =  list(prec = 1, prec.intercept = 1),

                                     control.compute = list(waic = TRUE,config = TRUE), #model diagnostics and config = TRUE gives you the GMRF
                                     verbose = F,
                                     control.family = list(control.link = list(model = "logit"), 
                                                                                    link.simple = "logit",
                                                           hyper = list(beta1 = list(param = c(0,10), 
                                                                                   initial = 0),
                                                                      beta2 = list(param = c(0,10)),
                                                                      beta3 = list(param = c(0,10))))))

4 Results and Predictions

Code
sample1 = inla.posterior.sample(1000, model1)
sample2 = inla.posterior.sample(1000, model3_2)
sample3 = inla.posterior.sample(1000, model3_3)

4.1 Results

4.1.1 Model comparison

LGOC

Code
df_sf <- Xocc %>% mutate(id = 1:nrow(Xocc)) %>% st_as_sf(coords =c("x","y"))

# create buffer of size 300 (based on estimated range) centred at each site

buffer_25 <- st_buffer(df_sf, dist = 0.75) 

# empty lists to include the indexes of the leave-out-group for each observation i
I_i <- list()

# loop though each observation and store the leave-out-group based on the buffer
for( i in 1:nrow(df_sf)){
  
  # Temporal filtering of data within a 2 years of span of  observation i
  df_sf_subset <- df_sf %>% 
    filter( between(time,left = df_sf$time[i]-2, right = df_sf$time[i]+2)) 
  # Spatial filtering of the observations that are within the buffer of the ith observation
  Buffer_i <-df_sf_subset %>% st_intersects(buffer_25[i,],sparse = FALSE) %>% # identify 
    unlist()
  
  # obtain the indexes of the leave out group
  I_i[[i]] <-  df_sf_subset[Buffer_i,] %>%  pull(id)
  
}

Code
loocv_m1_auto <- inla.group.cv(result = model1,num.level.sets = 3)
ULOOCV_auto  = mean(log(loocv_m1_auto$cv),na.rm=T)

loocv_m2_auto <- inla.group.cv(result = model3_2,group.cv = loocv_m1_auto)
ULOOCV2_auto  = mean(log(loocv_m2_auto$cv),na.rm=T)

loocv_m3_auto <- inla.group.cv(result = model3_3,group.cv = loocv_m1_auto)
ULOOCV3_auto = mean(log(loocv_m3_auto$cv),na.rm=T)


loocv_m1_manual <- inla.group.cv(result = model1,groups = I_i )
ULOOCV_manual = mean(log(loocv_m1_manual$cv),na.rm=T)

loocv_m2_manual <- inla.group.cv(result = model3_2,group.cv = loocv_m1_manual)
ULOOCV2_manual = mean(log(loocv_m2_manual$cv),na.rm=T)

loocv_m3_manual <- inla.group.cv(result = model3_3,group.cv = loocv_m1_manual)
ULOOCV3_manual = mean(log(loocv_m3_manual$cv),na.rm=T)


 data.frame(auto= c(ULOOCV_auto,ULOOCV2_auto,ULOOCV3_auto),
            manual=c(ULOOCV_manual,ULOOCV2_manual,ULOOCV3_manual))
       auto    manual
1 -1.620063 -1.628624
2 -1.610109 -1.630825
3 -1.600649 -1.620714
Code
table = data.frame(elapsed_Time = c(time0["elapsed"], time2["elapsed"], time3["elapsed"]),
                    DIC = c(model1$dic$dic, model3_2$dic$dic, model3_3$dic$dic),
                    WAIC = c(model1$waic$waic, model3_2$waic$waic, model3_3$waic$waic),
                    mlik = c(model1$mlik[1,1],model3_2$mlik[1,1],model3_3$mlik[1,1]),
                    LGOCV_auto = c(ULOOCV_auto,ULOOCV2_auto,ULOOCV3_auto),
                    LGOCV_manual = c(ULOOCV_manual,ULOOCV2_manual,ULOOCV3_manual))
 rownames(table) = paste("Model", c(1:3))
 kable(table)
elapsed_Time DIC WAIC mlik LGOCV_auto LGOCV_manual
Model 1 8.31 9841.242 9819.030 -5053.715 -1.620063 -1.628624
Model 2 64.45 9841.242 9796.865 -5023.269 -1.610109 -1.630825
Model 3 6544.94 9841.242 9748.467 -5004.667 -1.600649 -1.620714
Code
#tt = round(table,2)
#cbind(row.names(table),paste(tt[,1],"(",tt[,2],",",tt[,3],")", sep = ""))

tt = round(rbind(model1$summary.fixed[,c(1,3,5)],
      model3_2$summary.fixed[,c(1,3,5)],
      model3_3$summary.fixed[,c(1,3,5)]),2)
cbind(row.names(tt),paste(tt[,1],"(",tt[,2],",",tt[,3],")", sep = "")) 
      [,1]           [,2]                
 [1,] "Int_occ"      "1.67(0.89,2.42)"   
 [2,] "scale_elev"   "-1.44(-1.64,-1.25)"
 [3,] "scale_elev2"  "-0.63(-0.78,-0.48)"
 [4,] "Int_occ1"     "1.03(-0.79,2.49)"  
 [5,] "scale_elev1"  "-0.84(-1.56,0.05)" 
 [6,] "scale_elev21" "-0.98(-1.46,-0.56)"
 [7,] "Int_occ2"     "1.35(-0.17,2.38)"  
 [8,] "scale_elev3"  "-1.37(-1.74,-0.99)"
 [9,] "scale_elev22" "-0.71(-0.99,-0.44)"

4.2 Predictions over space

Code
yy = c(1,9)
pred1 = pred_df %>% dplyr::filter(time%in%yy)
A3_2 = inla.spde.make.A(mesh= mesh, loc = cbind(pred1$x, pred1$y))
A3_3 = inla.spde.make.A(mesh= mesh, loc = cbind(pred1$x, pred1$y),
                        group = pred1$time)

func1 = function(...)
{
   aa = (Int_occ + 
           scale_elev * pred1$scale_elev +
          scale_elev2 * pred1$scale_elev2 +
          time[pred1$time]
  )
  rand = rnorm(length(pred1$scale_elev), 0, 1/sqrt(theta[4]))
  aa + 
    rand
}
func3_2 = function(...)
{
  aa = (Int_occ + 
           scale_elev * pred1$scale_elev +
          scale_elev2 * pred1$scale_elev2 +
           time[pred1$time] +
           (A3_2 %*% spatialfield)[,1] )
  aa
}
func3_3 = function(...)
{
  aa = (Int_occ + 
           scale_elev * pred1$scale_elev +
          scale_elev2 * pred1$scale_elev2 +
           time[pred1$time] +
           (A3_3 %*% spatialfield)[,1] 
  )
  aa
}
fix1 = inla.posterior.sample.eval(func1, sample1)
fix3_2 = inla.posterior.sample.eval(func3_2, sample2)
fix3_3 = inla.posterior.sample.eval(func3_3, sample3)


pred2 = pred1 %>%
  mutate(sd1 = apply(fix1,1,sd),
         mean1 = apply(fix1,1,mean),
         sd2 = apply(fix3_2,1,sd),
         mean2 = apply(fix3_2,1,mean),
         sd3 = apply(fix3_3,1,sd),
         mean3 = apply(fix3_3,1,mean)) 


# New facet label names for time variable
time.labs <-  c("2010", "2018") 
names(time.labs) <- c("1", "9")

# New facet label names for name variable
model.labs <- c("Model 1", "Model 2","Model 3")
names(model.labs) <- c("mean1", "mean2","mean3")

pred2 %>% dplyr::select(x,y,time, mean1, mean2, mean3) %>%
  pivot_longer(-c(x,y,time)) %>%
  ggplot() + geom_tile(aes(x,y,fill = value)) +
  coord_equal() + 
  facet_grid(time~name,labeller = labeller(time = time.labs, name = model.labs)) + scale_fill_scico(name="Occupancy \nprobability \n(logit-scaled)") + theme_maps +
   theme(text=element_text(family="serif", size=20),
                                               legend.text = element_text(size=16))

Code
ggsave(filename = "space_time_Occ_lprobs.pdf",dpi = 300,width = 4000,units = "px")
Saving 4000 x 1500 px image
Code
# New facet label names for name variable
model.labs <- c("Model 1", "Model 2","Model 3")
names(model.labs) <- c("sd1", "sd2","sd3")

pred2 %>% dplyr::select(x,y,time, sd1, sd2, sd3) %>%
  pivot_longer(-c(x,y,time)) %>%
  ggplot() + geom_tile(aes(x,y,fill = value)) +
  coord_equal() + 
  facet_grid(time~name,labeller = labeller(time = time.labs, name = model.labs)) + scale_fill_scico(name="Occupancy \nprobability  sd\n(logit-scaled)") + theme_maps +
   theme(text=element_text(family="serif", size=20),
                                               legend.text = element_text(size=16))

Code
ggsave(filename = "space_time_Occ_lprobsSD.pdf",dpi = 300,width = 4000,units = "px")
Saving 4000 x 1500 px image
Code
#' Probability of occurrence

probs1 = inla.link.logit(fix1, inverse = T)
quant1 = apply(probs1,1,quantile, c(0.025, 0.975))
probs2 = inla.link.logit(fix3_2, inverse = T)
quant2 = apply(probs2,1,quantile, c(0.025, 0.975))
probs3 = inla.link.logit(fix3_3, inverse = T)
quant3 = apply(probs3,1,quantile, c(0.025, 0.975))

pred2 = pred1 %>%
  mutate(mean1 = apply(probs1,1,mean),
         quant_range1 = quant1[2,]-quant1[1,],
         mean2 = apply(probs2,1,mean),
         quant_range2 = quant2[2,]-quant2[1,],
         mean3 = apply(probs3,1,mean),
         quant_range3 = quant3[2,]-quant3[1,]) 

model.labs <- c("Model 1", "Model 2","Model 3")
names(model.labs) <- c("mean1", "mean2","mean3")

pred2 %>% dplyr::select(x,y,time, mean1, mean2, mean3) %>%
  pivot_longer(-c(x,y,time)) %>%
  ggplot() + geom_tile(aes(x,y,fill = value)) +
  coord_equal() + 
   facet_grid(time~name,labeller = labeller(time = time.labs, name = model.labs)) + scale_fill_scico(name="Occupancy \nprobability") + theme_maps +
   theme(text=element_text(family="serif", size=20),
                                               legend.text = element_text(size=16))

Code
#ggsave(filename = "space_time_Occ_probs.pdf",dpi = 300,width = 4000,units = "px")

model.labs <- c("Model 1", "Model 2","Model 3")
names(model.labs) <- c("quant_range1", "quant_range2","quant_range3")

pred2 %>% dplyr::select(x,y,time, quant_range1, quant_range2, quant_range3) %>%
  pivot_longer(-c(x,y,time)) %>%
  ggplot() + geom_tile(aes(x,y,fill = value)) +
  coord_equal() + 
  facet_grid(time~name,labeller = labeller(time = time.labs, name = model.labs)) + scale_fill_scico(name="Difference in \nquantile") + theme_maps +
   theme(text=element_text(family="serif", size=20),
                                               legend.text = element_text(size=16))

Code
ggsave(filename = "space_time_Occ_qunatiles.pdf",dpi = 300,width = 4000,units = "px")
Saving 4000 x 1500 px image

4.3 Parameters for the detection part of the model

Code
# detection paramters ------------------------------------------------------

npar = 3
tab1 = rbind(model1$summary.hyperpar[1:npar,c(1,3,5)],
             model3_2$summary.hyperpar[1:npar,c(1,3,5)],
             model3_3$summary.hyperpar[1:npar,c(1,3,5)])
rownames(tab1) = c (c("Int detection1", "day1", "tod1"),
                    c("Int detection2", "day2", "tod2"),
                    c("Int detection3", "day3", "tod3"))

kable(tab1, booktabs = TRUE, digits = 2) %>% pack_rows(
  index = c("Model 1" = npar, "Model 2" = npar, "Model 3" = npar))
mean 0.025quant 0.975quant
Model 1
Int detection1 0.23 0.18 0.29
day1 -0.01 -0.06 0.04
tod1 -0.03 -0.08 0.03
Model 2
Int detection2 0.22 0.16 0.28
day2 -0.01 -0.06 0.05
tod2 -0.03 -0.08 0.03
Model 3
Int detection3 0.23 0.17 0.29
day3 -0.01 -0.06 0.05
tod3 -0.03 -0.08 0.03
Code
tt = round(tab1,2)
cbind(row.names(tab1),paste(tt[,1],"(",tt[,2],",",tt[,3],")", sep = ""))
      [,1]             [,2]               
 [1,] "Int detection1" "0.23(0.18,0.29)"  
 [2,] "day1"           "-0.01(-0.06,0.04)"
 [3,] "tod1"           "-0.03(-0.08,0.03)"
 [4,] "Int detection2" "0.22(0.16,0.28)"  
 [5,] "day2"           "-0.01(-0.06,0.05)"
 [6,] "tod2"           "-0.03(-0.08,0.03)"
 [7,] "Int detection3" "0.23(0.17,0.29)"  
 [8,] "day3"           "-0.01(-0.06,0.05)"
 [9,] "tod3"           "-0.03(-0.08,0.03)"

References

Doser, Jeffrey W., Andrew O. Finley, Marc Kéry, and Elise F. Zipkin. 2022. “spOccupancy: An R Package for Single-Species, Multi-Species, and Integrated Spatial Occupancy Models.” Methods in Ecology and Evolution 13 (8): 1670–78. https://doi.org/10.1111/2041-210x.13897.