# Scripts used in the paper 
# Rinde et al. 2014. The influence of physical factors on kelp and sea urchin distribution
# in previously and still grazed areas in the NE Atlantic. Plos One.DOI: 10.1371/journal.pone.0100222


# BRT analysis of Strongylocentrotus droebachiensis data

setwd("........") 
#set the path to the working directory where you place the data file

library(gdata) # library which support import of excelfiles

data_rinde = read.xls("modelldata.xlsx")       # imports the model data as a dataframe
page(data_rinde, method="print") # to have a look at the data
names(data_rinde)
# [1] "DATO"       "RESTORTARE" "PRESSD"     "longres"    "LAT"        "DEM"        "logswm"     "RADOPT"    
# [9] "SLOPE"      "FORS525"    "spmax5r"    "spmin5r"    "saltmax5r"  "tempmean5 

# RESTORTARE contains presence (1) and absence (0) values for recovered L. hyperborea kelp
# PESSD contains presence (1) and absence (0) values for the sea urchin Strongylocentrotus droebachiensis

dim(data_rinde) # 1220 14 

# open libraries applied to perform the following analysis
library(dismo)
library(raster)
library(stats)
library(utils)
library(sp)
library(rgdal)


# Performs cross-validation optimisation of a boosted regression tree (BRT) model 
# for PRESSD and using a family of bernoulli 
# Using 1220 observations and 11 predictors 

sd.11pred.gbm <- gbm.step(data = data_rinde,        
     gbm.x = c(4:14),                  
     gbm.y = 3,   # response = presence of SD; called PRESSD                                         
     learning.rate = 0.007,         
     tree.complexity = 5,
     family = "bernoulli",
     bag.fraction = 0.5)

# ProvideS the following summary of the analysis (will vary between different runs):

# fitting final gbm model with a fixed number of  2050  trees for  PRESSD 
# mean total deviance = 0.898 
# mean residual deviance = 0.22 
# estimated cv deviance = 0.496 ; se = 0.027 
# training data correlation = 0.91 
# cv correlation =  0.684 ; se = 0.019 
# training data AUC score = 0.994 
# cv AUC score = 0.926 ; se = 0.01 
# elapsed time -  0.66 minutes

summary(sd.11pred.gbm) 

# provides the relative influence of each predictor:
# var   rel.inf
# LAT               LAT 28.052600
# DEM               DEM 12.222945
# tempmean5r tempmean5r 11.000694
# spmin5r       spmin5r 10.163918
# longres       longres  9.847238
# logswm         logswm  5.914126
# saltmax5r   saltmax5r  5.610500
# FORS525       FORS525  4.418280
# SLOPE           SLOPE  4.372515
# spmax5r       spmax5r  4.274390
# RADOPT         RADOPT  4.122794
     

gbm.plot(sd.11pred.gbm, plot.layout = c(3,4))   # plots the response curves for each predictor variable


sd.11pred.gbm.fitvalues <- sd.11pred.gbm$fitted # extracts the fitted values
sd.11pred.gbm.residuals <- sd.11pred.gbm$residuals # extracts the residuals

write.table(sd.11pred.gbm.fitvalues, file = "sd_BRT.11pred_fit.txt") # writes the fitted values to a text file that can be opened in excel
write.table(sd.manutv.11gampred.residuals, file = "sd_BRT.11pred_resid.txt") # writes the residuals to a text file that can be opened in excel



# identify interactioons within the model:
int.modelsd <-gbm.interactions(sd.11pred.gbm)
#gbm.interactions - version 2.9 
#Cross tabulating interactions for gbm model with 11 predictors
#1  2  3  4  5  6  7  8  9  10 

# View list of interaction effects
int.modelsd$rank.list
# var1.index var1.names var2.index var2.names int.size
# 1          3        DEM          2        LAT   190.81
# 2          9    spmin5r          2        LAT    94.36
# 3         11 tempmean5r          2        LAT    57.57
# 4         11 tempmean5r          9    spmin5r    39.03



# 3D plots of the two most pronounced interactions: 
par(mfrow=c(1,1))
gbm.perspec(sd.11pred.gbm,3,2,smooth="average")

par(mfrow=c(1,1))
gbm.perspec(sd.11pred.gbm,9,2,smooth="average")



# GAM models


library(mgcv)  # library for gam analysis

library(MuMIn)  # library for creating model selection tables

options(na.action="na.fail") # needs to be set before performing the call to dredge below



# GAM models without interaction


sd.k3uint <- gam(PRESSD ~ s(DEM, bs="cr", k=3) + s(SLOPE, bs="cr", k=3) + s(RADOPT, bs="cr", k=3) +  s(FORS525, bs="cr", k=3) + s(logswm, bs="cr", k=3) + s(LAT, bs="cr", k=3) + s(longres, bs="cr", k=3) + s(tempmean5r, bs="cr", k=3) + s(saltmax5r, bs="cr", k=3) + s(spmax5r, bs="cr", k=3) + s(spmin5r, bs="cr", k=3), family = binomial, method = "ML", data=data_rinde)


dredge.sd.k3uint <- dredge(sd.k3uint)    # creates MuMIn selection table 
write.table(dredge.sd.k3uint, file = "dredge.sd.k3uint.txt",sep = ";")

top.sd.k3uint.dlt4 <- get.models(dredge.sd.k3uint, delta <4)

n <- length(top.sd.k3uint.dlt4)  # number of models with delta AICc < 4; 13
imptop.top.sd.k3uint.dlt4 <- importance(top.sd.k3uint.dlt4) # calculates the relative importance of each predictor given this subset of models

write.table(imptop.top.sd.k3uint.dlt4, file = "imptop.top.sd.k3uint.dlt4.txt",sep = ";")

# create an average model from this subset of models
avg.top.sd.k3uint.dlt4 <- model.avg(dredge.sd.k3uint, delta <4, se.fit =T)   

# The partial effects of the best model without interaction:
plot(top.sd.k3uint.dlt4$`1822`,all.terms=T, pages=1,scale=0) 

summary(top.sd.k3uint.dlt4$`1822`)  # summary of the best model without interaction


# GAM models with interaction between depth and latitude

sd.k3intdemlat  <- gam(PRESSD ~ s(DEM, bs="cr", k=3) + s(SLOPE, bs="cr", k=3) + s(RADOPT, bs="cr", k=3) +  s(FORS525, bs="cr", k=3) + s(logswm, bs="cr", k=3) + + s(DEM,LAT, k=6) + s(LAT, bs="cr", k=3) + s(longres, bs="cr", k=3) + s(tempmean5r, bs="cr", k=3) + s(saltmax5r, bs="cr", k=3) + s(spmax5r, bs="cr", k=3) + s(spmin5r, bs="cr", k=3), family = binomial, method = "ML", data=data_rinde)

# creates a subset of models to be exclued in the model selection procedure
msubset <-  expression(!`s(DEM, LAT, k = 6)` | xor(`s(DEM, bs = "cr", k = 3)`| `s(LAT, bs = "cr", k = 3)`, `s(DEM, LAT, k = 6)`))

dredge.sd.k3intdemlat <- dredge(sd.k3intdemlat, subset = msubset)   # creates MuMIn selection table 
write.table(dredge.sd.k3intdemlat, file = "dredge.sd.k3intdemlat.txt",sep = ";")   

top.dredge.sd.k3intdemlat <- get.models(dredge.sd.k3intdemlat, delta <4)  # create a subset of models with delta AICc <4
n <- length(top.dredge.sd.k3intdemlat)   # number of models with delta AICc < 4; 13

# create an average model from this subset of models with interaction between latitude and depth
avg.top.dredge.sd.k3intdemlat <- model.avg(dredge.sd.k3intdemlat, delta <4, se.fit =T)   

imp.top.dredge.sd.k3intdemlat <- importance(top.dredge.sd.k3intdemlat) # calculates the relative importance of each predictor given this subset of models

write.table(imp.top.dredge.sd.k3intdemlat, file = "imp.top.dredge.sd.k3intdemlat.txt",sep = ";")

# The partial effects of the best S.d. model with interaction:
plot(top.dredge.sd.k3intdemlat$`3642`,all.terms=T, pages=1,scale=0)  

summary(top.dredge.sd.k3intdemlat$`3642`)  # summary of the best model with interaction


# EVALUATION WITH TESTDATA


# Read test data

testdta = read.xls("testdata.xlsx")

page(testdta,method="print")  

names(testdta)
# [1] "DATO"       "RESTORTARE" "PRESSD"     "longres"    "LAT"        "DEM"        "logswm"     "RADOPT"    
# [9] "SLOPE"      "FORS525"    "spmax5r"    "spmin5r"    "saltmax5r"  "tempmean5r"

dim(testdta)      
# [1] 403  14       

head(testdta)

detach("package:gdata")

# predicts the response of the BRT s.d. model based on the predictorvariables in the test data:

predsd.11pred <-  predict(sd.11pred.gbm, n.trees=sd.11pred.gbm$gbm.call$best.trees, testdta, ext=NULL, progress="windows",  type="response")

# predicts the response of the four GAM models based on the predictorvariables in the test data:

bestintsd <- predict(top.dredge.sd.k3intdemlat$`3642`, newdata = testdta, se.fit = TRUE, type="response")
bestXintsd <- predict(top.sd.k3uint.dlt4$`1822`, newdata = testdta, se.fit = TRUE, type="response")
avgintSd <- predict(avg.top.dredge.sd.k3intdemlat, newdata = testdta, se.fit = TRUE, type="response")
avgXintSd <- predict(avg.top.sd.k3uint.dlt4, newdata = testdta, se.fit = TRUE, type="response")

# creates a table with the predicted probability from each model to the observations in the testdata
predtestdataSD <- data.frame(cbind(bestintsd$fit, bestXintsd$fit, avgintSd$fit, avgXintSd$fit, predsd.11pred)) 

write.table(predtestdataSD, file = "predtestdataSD.txt",sep = ";")

# Evaluation of the five established models for sea urchin occurrence:
# by use of Freeman og Moison (2008)s R package PresenceAbsence

library(PresenceAbsence)


SPDATA=read.xls("final sdmodelstested.xlsx")      

# imports a dataset with modelled probability values for the five SD models
page(SPDATA,method="print")                                  
head(SPDATA)

#  SPECIES OBSERVED    bestint   bestxint     avgint    avgxint         BRT
#1 S.droeb        0 0.27824455 0.16590855 0.27491545 0.16484934 0.231496931
#2 S.droeb        0 0.09372834 0.07999701 0.08369968 0.06910313 0.027429409
#3 S.droeb        0 0.55957966 0.61905611 0.55884454 0.60460086 0.075820536
#4 S.droeb        0 0.09657263 0.08713065 0.07148042 0.07153020 0.007227278
#5 S.droeb        0 0.12694097 0.17546410 0.13345791 0.18307099 0.034235497
#6 S.droeb        0 0.13115687 0.08873195 0.11496483 0.09158609 0.020037404

#defines some variables:

species <- as.character(unique(SPDATA$SPECIES))      
model.names <- as.character(names(SPDATA)[-c(1, 2)])
N.models <- ncol(SPDATA) - 2
N.sp <- length(species)
N.obs <- length(SPDATA$SPECIES[SPDATA$SPECIES == species[1]])
Obs.prev <- table(SPDATA$SPECIES, SPDATA$OBSERVED)[, 2]/N.obs
Obs.prev <- round(Obs.prev, digits = 2)


#Accuracy tables
#The function presence.absence.accuracy calculates basic accuracy measures (PCC, sensitivity,
#specifcity, Kappa, and AUC) for presence-absence data, and (optionally) the associated
#standard deviations. The PresenceAbsence package uses the method from DeLong et al.
#(1988) to calculate AUC and its associated standard deviation. By default the function will
#calculate all basic accuracy measures at the threshold of 0.5:

sp <- 1
DATA <- SPDATA[SPDATA$SPECIES == species[sp], ]
accu <- presence.absence.accuracy(DATA)
accu[, -c(1, 2)] <- signif(accu[, -c(1, 2)], digits = 2)
print(paste("Species:", species[sp]))
accu

# relativt lav auc
# accu
#     model threshold  PCC sensitivity specificity Kappa  AUC PCC.sd sensitivity.sd specificity.sd Kappa.sd AUC.sd
#1  bestint       0.5 0.89        0.49        0.96  0.51 0.79  0.015          0.067          0.011    0.065  0.037
#2 bestxint       0.5 0.90        0.51        0.96  0.52 0.79  0.015          0.067          0.011    0.064  0.037
#3   avgint       0.5 0.89        0.47        0.96  0.48 0.79  0.016          0.067          0.011    0.066  0.037
#4  avgxint       0.5 0.90        0.51        0.96  0.52 0.79  0.015          0.067          0.011    0.064  0.037
#5      BRT       0.5 0.90        0.63        0.95  0.59 0.88  0.015          0.064          0.012    0.059  0.026


# optimal cut-off thresholds given different options. The youden index is the one called MaxSens+Spec
sp <- "S.droeb"
DATA <- SPDATA[SPDATA$SPECIES == sp, ]
print(paste("Species:", sp))

optimal.thresholds(DATA, opt.methods = 1:12, req.sens = 0.9, req.spec = 0.9, FPC = 2, FNC = 1)
##
#         Method   bestint  bestxint    avgint   avgxint       BRT
#1       Default 0.5000000 0.5000000 0.5000000 0.5000000 0.5000000
#2     Sens=Spec 0.1200000 0.1200000 0.1200000 0.1200000 0.1000000
#3  MaxSens+Spec 0.2300000 0.2200000 0.2300000 0.2300000 0.3100000
#4      MaxKappa 0.5000000 0.5250000 0.5250000 0.5100000 0.5600000
#5        MaxPCC 0.5350000 0.5450000 0.5375000 0.5100000 0.5780000
#6  PredPrev=Obs 0.3950000 0.3600000 0.3900000 0.3650000 0.4300000
#7       ObsPrev 0.1414392 0.1414392 0.1414392 0.1414392 0.1414392
#8      MeanProb 0.1659486 0.1644320 0.1658245 0.1650108 0.1546393
#9    MinROCdist 0.2300000 0.2200000 0.2300000 0.2300000 0.0900000
#10      ReqSens 0.0400000 0.0300000 0.0400000 0.0300000 0.0300000
#11      ReqSpec 0.3600000 0.3000000 0.3500000 0.3100000 0.2300000
#12         Cost 0.6050000 0.6800000 0.5600000 0.6900000 0.7900000



# Calibration plots for the models

par(oma = c(0, 5, 0, 0), mar = c(3, 3, 3, 1), mfrow = c(2,3), cex = 0.7, cex.lab = 1.4, mgp = c(2, 0.5, 0))
sp <- c("S.droeb")

for (mod in 1:5) {
  calibration.plot(DATA, which.model = mod, color = mod +
                     1, main = model.names[mod], xlab = "Pred Prob Occ", ylab = "Obs Occ as Prop of Sites Surveyed")
  if (mod == 1) {
    mtext(sp[1], side = 3, line = 6.3, cex = 1.6)
    mtext(paste("(", Obs.prev[names(Obs.prev) == sp[1]],Obs.prev[1],
                "Prevalence )"), side = 3, line = 4.3, cex = 1.3)
  }
}