# Scripts used in the paper # Rinde et al. 2014. The influence of physical factors on kelp and sea urchin distribution # in previously and still grazed areas in the NE Atlantic. Plos One.DOI: 10.1371/journal.pone.0100222 # BRT analysis of Strongylocentrotus droebachiensis data setwd("........") #set the path to the working directory where you place the data file library(gdata) # library which support import of excelfiles data_rinde = read.xls("modelldata.xlsx") # imports the model data as a dataframe page(data_rinde, method="print") # to have a look at the data names(data_rinde) # [1] "DATO" "RESTORTARE" "PRESSD" "longres" "LAT" "DEM" "logswm" "RADOPT" # [9] "SLOPE" "FORS525" "spmax5r" "spmin5r" "saltmax5r" "tempmean5 # RESTORTARE contains presence (1) and absence (0) values for recovered L. hyperborea kelp # PESSD contains presence (1) and absence (0) values for the sea urchin Strongylocentrotus droebachiensis dim(data_rinde) # 1220 14 # open libraries applied to perform the following analysis library(dismo) library(raster) library(stats) library(utils) library(sp) library(rgdal) # Performs cross-validation optimisation of a boosted regression tree (BRT) model # for PRESSD and using a family of bernoulli # Using 1220 observations and 11 predictors sd.11pred.gbm <- gbm.step(data = data_rinde, gbm.x = c(4:14), gbm.y = 3, # response = presence of SD; called PRESSD learning.rate = 0.007, tree.complexity = 5, family = "bernoulli", bag.fraction = 0.5) # ProvideS the following summary of the analysis (will vary between different runs): # fitting final gbm model with a fixed number of 2050 trees for PRESSD # mean total deviance = 0.898 # mean residual deviance = 0.22 # estimated cv deviance = 0.496 ; se = 0.027 # training data correlation = 0.91 # cv correlation = 0.684 ; se = 0.019 # training data AUC score = 0.994 # cv AUC score = 0.926 ; se = 0.01 # elapsed time - 0.66 minutes summary(sd.11pred.gbm) # provides the relative influence of each predictor: # var rel.inf # LAT LAT 28.052600 # DEM DEM 12.222945 # tempmean5r tempmean5r 11.000694 # spmin5r spmin5r 10.163918 # longres longres 9.847238 # logswm logswm 5.914126 # saltmax5r saltmax5r 5.610500 # FORS525 FORS525 4.418280 # SLOPE SLOPE 4.372515 # spmax5r spmax5r 4.274390 # RADOPT RADOPT 4.122794 gbm.plot(sd.11pred.gbm, plot.layout = c(3,4)) # plots the response curves for each predictor variable sd.11pred.gbm.fitvalues <- sd.11pred.gbm$fitted # extracts the fitted values sd.11pred.gbm.residuals <- sd.11pred.gbm$residuals # extracts the residuals write.table(sd.11pred.gbm.fitvalues, file = "sd_BRT.11pred_fit.txt") # writes the fitted values to a text file that can be opened in excel write.table(sd.manutv.11gampred.residuals, file = "sd_BRT.11pred_resid.txt") # writes the residuals to a text file that can be opened in excel # identify interactioons within the model: int.modelsd <-gbm.interactions(sd.11pred.gbm) #gbm.interactions - version 2.9 #Cross tabulating interactions for gbm model with 11 predictors #1 2 3 4 5 6 7 8 9 10 # View list of interaction effects int.modelsd$rank.list # var1.index var1.names var2.index var2.names int.size # 1 3 DEM 2 LAT 190.81 # 2 9 spmin5r 2 LAT 94.36 # 3 11 tempmean5r 2 LAT 57.57 # 4 11 tempmean5r 9 spmin5r 39.03 # 3D plots of the two most pronounced interactions: par(mfrow=c(1,1)) gbm.perspec(sd.11pred.gbm,3,2,smooth="average") par(mfrow=c(1,1)) gbm.perspec(sd.11pred.gbm,9,2,smooth="average") # GAM models library(mgcv) # library for gam analysis library(MuMIn) # library for creating model selection tables options(na.action="na.fail") # needs to be set before performing the call to dredge below # GAM models without interaction sd.k3uint <- gam(PRESSD ~ s(DEM, bs="cr", k=3) + s(SLOPE, bs="cr", k=3) + s(RADOPT, bs="cr", k=3) + s(FORS525, bs="cr", k=3) + s(logswm, bs="cr", k=3) + s(LAT, bs="cr", k=3) + s(longres, bs="cr", k=3) + s(tempmean5r, bs="cr", k=3) + s(saltmax5r, bs="cr", k=3) + s(spmax5r, bs="cr", k=3) + s(spmin5r, bs="cr", k=3), family = binomial, method = "ML", data=data_rinde) dredge.sd.k3uint <- dredge(sd.k3uint) # creates MuMIn selection table write.table(dredge.sd.k3uint, file = "dredge.sd.k3uint.txt",sep = ";") top.sd.k3uint.dlt4 <- get.models(dredge.sd.k3uint, delta <4) n <- length(top.sd.k3uint.dlt4) # number of models with delta AICc < 4; 13 imptop.top.sd.k3uint.dlt4 <- importance(top.sd.k3uint.dlt4) # calculates the relative importance of each predictor given this subset of models write.table(imptop.top.sd.k3uint.dlt4, file = "imptop.top.sd.k3uint.dlt4.txt",sep = ";") # create an average model from this subset of models avg.top.sd.k3uint.dlt4 <- model.avg(dredge.sd.k3uint, delta <4, se.fit =T) # The partial effects of the best model without interaction: plot(top.sd.k3uint.dlt4$`1822`,all.terms=T, pages=1,scale=0) summary(top.sd.k3uint.dlt4$`1822`) # summary of the best model without interaction # GAM models with interaction between depth and latitude sd.k3intdemlat <- gam(PRESSD ~ s(DEM, bs="cr", k=3) + s(SLOPE, bs="cr", k=3) + s(RADOPT, bs="cr", k=3) + s(FORS525, bs="cr", k=3) + s(logswm, bs="cr", k=3) + + s(DEM,LAT, k=6) + s(LAT, bs="cr", k=3) + s(longres, bs="cr", k=3) + s(tempmean5r, bs="cr", k=3) + s(saltmax5r, bs="cr", k=3) + s(spmax5r, bs="cr", k=3) + s(spmin5r, bs="cr", k=3), family = binomial, method = "ML", data=data_rinde) # creates a subset of models to be exclued in the model selection procedure msubset <- expression(!`s(DEM, LAT, k = 6)` | xor(`s(DEM, bs = "cr", k = 3)`| `s(LAT, bs = "cr", k = 3)`, `s(DEM, LAT, k = 6)`)) dredge.sd.k3intdemlat <- dredge(sd.k3intdemlat, subset = msubset) # creates MuMIn selection table write.table(dredge.sd.k3intdemlat, file = "dredge.sd.k3intdemlat.txt",sep = ";") top.dredge.sd.k3intdemlat <- get.models(dredge.sd.k3intdemlat, delta <4) # create a subset of models with delta AICc <4 n <- length(top.dredge.sd.k3intdemlat) # number of models with delta AICc < 4; 13 # create an average model from this subset of models with interaction between latitude and depth avg.top.dredge.sd.k3intdemlat <- model.avg(dredge.sd.k3intdemlat, delta <4, se.fit =T) imp.top.dredge.sd.k3intdemlat <- importance(top.dredge.sd.k3intdemlat) # calculates the relative importance of each predictor given this subset of models write.table(imp.top.dredge.sd.k3intdemlat, file = "imp.top.dredge.sd.k3intdemlat.txt",sep = ";") # The partial effects of the best S.d. model with interaction: plot(top.dredge.sd.k3intdemlat$`3642`,all.terms=T, pages=1,scale=0) summary(top.dredge.sd.k3intdemlat$`3642`) # summary of the best model with interaction # EVALUATION WITH TESTDATA # Read test data testdta = read.xls("testdata.xlsx") page(testdta,method="print") names(testdta) # [1] "DATO" "RESTORTARE" "PRESSD" "longres" "LAT" "DEM" "logswm" "RADOPT" # [9] "SLOPE" "FORS525" "spmax5r" "spmin5r" "saltmax5r" "tempmean5r" dim(testdta) # [1] 403 14 head(testdta) detach("package:gdata") # predicts the response of the BRT s.d. model based on the predictorvariables in the test data: predsd.11pred <- predict(sd.11pred.gbm, n.trees=sd.11pred.gbm$gbm.call$best.trees, testdta, ext=NULL, progress="windows", type="response") # predicts the response of the four GAM models based on the predictorvariables in the test data: bestintsd <- predict(top.dredge.sd.k3intdemlat$`3642`, newdata = testdta, se.fit = TRUE, type="response") bestXintsd <- predict(top.sd.k3uint.dlt4$`1822`, newdata = testdta, se.fit = TRUE, type="response") avgintSd <- predict(avg.top.dredge.sd.k3intdemlat, newdata = testdta, se.fit = TRUE, type="response") avgXintSd <- predict(avg.top.sd.k3uint.dlt4, newdata = testdta, se.fit = TRUE, type="response") # creates a table with the predicted probability from each model to the observations in the testdata predtestdataSD <- data.frame(cbind(bestintsd$fit, bestXintsd$fit, avgintSd$fit, avgXintSd$fit, predsd.11pred)) write.table(predtestdataSD, file = "predtestdataSD.txt",sep = ";") # Evaluation of the five established models for sea urchin occurrence: # by use of Freeman og Moison (2008)s R package PresenceAbsence library(PresenceAbsence) SPDATA=read.xls("final sdmodelstested.xlsx") # imports a dataset with modelled probability values for the five SD models page(SPDATA,method="print") head(SPDATA) # SPECIES OBSERVED bestint bestxint avgint avgxint BRT #1 S.droeb 0 0.27824455 0.16590855 0.27491545 0.16484934 0.231496931 #2 S.droeb 0 0.09372834 0.07999701 0.08369968 0.06910313 0.027429409 #3 S.droeb 0 0.55957966 0.61905611 0.55884454 0.60460086 0.075820536 #4 S.droeb 0 0.09657263 0.08713065 0.07148042 0.07153020 0.007227278 #5 S.droeb 0 0.12694097 0.17546410 0.13345791 0.18307099 0.034235497 #6 S.droeb 0 0.13115687 0.08873195 0.11496483 0.09158609 0.020037404 #defines some variables: species <- as.character(unique(SPDATA$SPECIES)) model.names <- as.character(names(SPDATA)[-c(1, 2)]) N.models <- ncol(SPDATA) - 2 N.sp <- length(species) N.obs <- length(SPDATA$SPECIES[SPDATA$SPECIES == species[1]]) Obs.prev <- table(SPDATA$SPECIES, SPDATA$OBSERVED)[, 2]/N.obs Obs.prev <- round(Obs.prev, digits = 2) #Accuracy tables #The function presence.absence.accuracy calculates basic accuracy measures (PCC, sensitivity, #specifcity, Kappa, and AUC) for presence-absence data, and (optionally) the associated #standard deviations. The PresenceAbsence package uses the method from DeLong et al. #(1988) to calculate AUC and its associated standard deviation. By default the function will #calculate all basic accuracy measures at the threshold of 0.5: sp <- 1 DATA <- SPDATA[SPDATA$SPECIES == species[sp], ] accu <- presence.absence.accuracy(DATA) accu[, -c(1, 2)] <- signif(accu[, -c(1, 2)], digits = 2) print(paste("Species:", species[sp])) accu # relativt lav auc # accu # model threshold PCC sensitivity specificity Kappa AUC PCC.sd sensitivity.sd specificity.sd Kappa.sd AUC.sd #1 bestint 0.5 0.89 0.49 0.96 0.51 0.79 0.015 0.067 0.011 0.065 0.037 #2 bestxint 0.5 0.90 0.51 0.96 0.52 0.79 0.015 0.067 0.011 0.064 0.037 #3 avgint 0.5 0.89 0.47 0.96 0.48 0.79 0.016 0.067 0.011 0.066 0.037 #4 avgxint 0.5 0.90 0.51 0.96 0.52 0.79 0.015 0.067 0.011 0.064 0.037 #5 BRT 0.5 0.90 0.63 0.95 0.59 0.88 0.015 0.064 0.012 0.059 0.026 # optimal cut-off thresholds given different options. The youden index is the one called MaxSens+Spec sp <- "S.droeb" DATA <- SPDATA[SPDATA$SPECIES == sp, ] print(paste("Species:", sp)) optimal.thresholds(DATA, opt.methods = 1:12, req.sens = 0.9, req.spec = 0.9, FPC = 2, FNC = 1) ## # Method bestint bestxint avgint avgxint BRT #1 Default 0.5000000 0.5000000 0.5000000 0.5000000 0.5000000 #2 Sens=Spec 0.1200000 0.1200000 0.1200000 0.1200000 0.1000000 #3 MaxSens+Spec 0.2300000 0.2200000 0.2300000 0.2300000 0.3100000 #4 MaxKappa 0.5000000 0.5250000 0.5250000 0.5100000 0.5600000 #5 MaxPCC 0.5350000 0.5450000 0.5375000 0.5100000 0.5780000 #6 PredPrev=Obs 0.3950000 0.3600000 0.3900000 0.3650000 0.4300000 #7 ObsPrev 0.1414392 0.1414392 0.1414392 0.1414392 0.1414392 #8 MeanProb 0.1659486 0.1644320 0.1658245 0.1650108 0.1546393 #9 MinROCdist 0.2300000 0.2200000 0.2300000 0.2300000 0.0900000 #10 ReqSens 0.0400000 0.0300000 0.0400000 0.0300000 0.0300000 #11 ReqSpec 0.3600000 0.3000000 0.3500000 0.3100000 0.2300000 #12 Cost 0.6050000 0.6800000 0.5600000 0.6900000 0.7900000 # Calibration plots for the models par(oma = c(0, 5, 0, 0), mar = c(3, 3, 3, 1), mfrow = c(2,3), cex = 0.7, cex.lab = 1.4, mgp = c(2, 0.5, 0)) sp <- c("S.droeb") for (mod in 1:5) { calibration.plot(DATA, which.model = mod, color = mod + 1, main = model.names[mod], xlab = "Pred Prob Occ", ylab = "Obs Occ as Prop of Sites Surveyed") if (mod == 1) { mtext(sp[1], side = 3, line = 6.3, cex = 1.6) mtext(paste("(", Obs.prev[names(Obs.prev) == sp[1]],Obs.prev[1], "Prevalence )"), side = 3, line = 4.3, cex = 1.3) } }