# ==============================================================================
# Work with gps data downloaded from movebank
# ==============================================================================
# Download method:
# ---------------
# Go to: https://www.movebank.org/panel_embedded_movebank_webapp
# Click on Studies on the menu bar on the left side of the viewer
# Choose my study
# Click on Download data
# Choose: csv, include undeployed locations,
# add UTM coordinates, add study local time
# (see screenshot)
# Download above for both gps and acceleration data
#
# NOTE: Don't open downloaded files in excel, because it's somehow corrupting
# the date column, making it difficult for R to read it correctly
# ==============================================================================
# * Environment
# ==============================================================================
source("R/00_fxns.R")
library(maptools)
library(move)
library(dplyr)
library(data.table)
ACC_file = file.path("data/raw/e-obs", "movebank_acc.csv") # acc file
GPS_file = file.path("data/raw/e-obs", "movebank_gps.csv") # gps file
crop_file = file.path("data/raw/bats", "eobs_info.csv") # time-crop file
out_dir = "data/acc_behavL/"
if( !dir.exists(out_dir)) dir.create(out_dir)
out_filename = "acc_gps_behavL.RDS" # the axis info is appended to this filename
axises = c("xy", "xz", "yz", "xyz") # choose one or as many options as you want
# ==============================================================================
# * Read in acceleration and GPS data
# ==============================================================================
# read in the acceleration data
accALL = read.csv(ACC_file, stringsAsFactors = FALSE)
# read in the gps data and assign local timestamp as main one
gpsALL = read.csv(GPS_file, stringsAsFactors = FALSE)
# read in csv with timestamp-crop data
time_crop = read.csv(crop_file, stringsAsFactors = FALSE)
# ==============================================================================
# * Get acceleration and GPS data ready for processing
# ==============================================================================
# ** crop ACC anf GPS data to retain bat-specific data
# ------------------------------------------------------------------------------
# This also removes data for tags 5314 & 5326, which had very sparse data
tags = unique(time_crop$tagID)
accALL = lapply(tags, function(ID, time_crop, accALL) {
df = accALL[ accALL$tag.local.identifier %in% ID, ]
start_ts = time_crop$start_ts_UTC[ time_crop$tagID %in% ID ]
stop_ts = time_crop$stop_ts_UTC[ time_crop$tagID %in% ID ]
df[ df$timestamp >= start_ts &
df$timestamp < stop_ts, ]
}, time_crop, accALL) %>%
rbindlist
gpsALL = lapply(tags, function(ID, time_crop, gpsALL) {
df = gpsALL[ gpsALL$tag.local.identifier %in% ID, ]
start_ts = time_crop$start_ts_UTC[ time_crop$tagID %in% ID ]
stop_ts = time_crop$stop_ts_UTC[ time_crop$tagID %in% ID ]
df[ df$timestamp >= start_ts &
df$timestamp < stop_ts, ]
}, time_crop, gpsALL) %>%
rbindlist
# ** switch timestamp to local time and backup original to eobs.timestamp
# ------------------------------------------------------------------------------
accALL$eobs.timestamp = accALL$timestamp
accALL$timestamp = accALL$study.local.timestamp
gpsALL$eobs.timestamp = gpsALL$timestamp
gpsALL$timestamp = gpsALL$study.local.timestamp
# tell R what the local timestamp format is
accALL$timestamp = as.POSIXct(accALL$timestamp, format="%Y-%m-%d %H:%M:%OS",
tz="Africa/Dar_es_Salaam")
gpsALL$timestamp = as.POSIXct(gpsALL$timestamp, format="%Y-%m-%d %H:%M:%OS",
tz="Africa/Dar_es_Salaam")
# ** populate the individual local identifier column with tag ID info
# ------------------------------------------------------------------------------
# (needed by the move package)
accALL$tagID = paste0("K", accALL$tag.local.identifier)
gpsALL$tagID = paste0("K", gpsALL$tag.local.identifier)
# ** for GPS data, keep those with status "A", which is the good quality data
# ------------------------------------------------------------------------------
gpsALL = gpsALL[ gpsALL$eobs.status %in% "A", ]
# ** make a list of acceleration & gps data
# ------------------------------------------------------------------------------
accL = split(accALL, accALL$tagID)
gpsL = split(gpsALL, gpsALL$tagID)
# check if individuals names are the same in acc file as in the move object
identical(sort(names(gpsL)), sort(names(accL)))
Acceleration burst data is one long string of acc recordings along 3-axes: X, Y, and Z. With an ACC BYTE COUNT of 1135 bytes, the each ACC burst corresponds to 792 sequential recordings along the X, Y, and Z axes.
E.g. a reading of an acc burst:
accL[[1]]$eobs.accelerations.raw[1]
## [1] "1936 2537 2070 1934 2543 2068 1934 2539 2073 1932 2540 2070 1929 2542 2073 1931 2540 2078 1933 2541 2070 1936 2538 2074 1936 2538 2074 1935 2536 2070 1934 2542 2074 1935 2539 2076 1932 2541 2077 1933 2539 2080 1931 2538 2076 1932 2539 2075 1932 2537 2072 1933 2542 2072 1934 2540 2072 1934 2539 2073 1929 2541 2071 1932 2538 2070 1933 2537 2074 1932 2539 2073 1932 2538 2069 1934 2544 2072 1934 2538 2075 1933 2538 2076 1936 2533 2078 1934 2541 2077 1933 2538 2074 1935 2541 2079 1930 2543 2075 1932 2540 2076 1932 2540 2077 1932 2536 2071 1934 2538 2075 1935 2535 2076 1933 2540 2074 1932 2536 2073 1933 2542 2076 1930 2537 2074 1930 2540 2077 1932 2542 2079 1933 2537 2073 1936 2538 2073 1939 2542 2076 1934 2542 2070 1934 2538 2074 1931 2540 2075 1925 2542 2072 1928 2539 2073 1930 2540 2080 1932 2537 2078 1933 2533 2074 1935 2542 2076 1936 2538 2075 1933 2540 2072 1931 2545 2078 1930 2540 2076 1931 2536 2076 1932 2541 2083 1928 2540 2075 1933 2537 2076 1935 2540 2076 1937 2541 2067 1937 2539 2073 1937 2537 2076 1935 2540 2075 1934 2532 2079 1932 2541 2076 1930 2539 2075 1930 2539 2071 1932 2543 2078 1935 2537 2073 1937 2541 2073 1937 2543 2078 1936 2541 2069 1934 2539 2074 1930 2537 2075 1929 2537 2071 1928 2537 2075 1933 2537 2076 1934 2541 2076 1941 2535 2079 1936 2543 2074 1938 2540 2073 1941 2542 2074 1935 2547 2071 1935 2539 2073 1934 2540 2073 1931 2539 2070 1933 2540 2071 1932 2536 2077 1929 2541 2078 1929 2534 2073 1933 2539 2077 1933 2538 2076 1933 2538 2078 1932 2542 2080 1931 2537 2077 1931 2533 2079 1935 2542 2084 1931 2541 2074 1935 2539 2074 1935 2542 2072 1936 2543 2064 1934 2539 2069 1933 2537 2071 1931 2540 2070 1933 2533 2074 1932 2540 2075 1933 2537 2072 1933 2541 2072 1932 2545 2080 1935 2537 2076 1936 2537 2077 1933 2544 2082 1932 2542 2074 1936 2537 2075 1934 2540 2073 1934 2537 2068 1932 2539 2072 1929 2539 2074 1927 2538 2076 1930 2536 2079 1930 2538 2075 1931 2539 2074 1931 2538 2074 1933 2544 2078 1936 2539 2076 1938 2539 2076 1932 2540 2075 1934 2541 2071 1934 2540 2076 1932 2539 2071 1929 2539 2068 1933 2539 2071 1933 2538 2071 1934 2541 2075 1939 2539 2078 1937 2543 2071 1937 2539 2074 1941 2545 2082 1935 2540 2071 1934 2539 2075 1930 2538 2076 1930 2537 2068 1929 2540 2071 1932 2536 2076 1932 2538 2075 1936 2531 2077 1935 2541 2076 1934 2540 2073 1931 2537 2073 1929 2543 2078 1928 2538 2076 1929 2536 2076 1933 2542 2083 1930 2541 2075 1933 2537 2079 1932 2539 2079 1935 2538 2069 1933 2540 2072 1934 2539 2073 1930 2537 2075 1929 2537 2076 1928 2542 2071 1928 2539 2073 1931 2540 2072 1930 2541 2073 1930 2537 2074 1929 2538 2077 1926 2536 2077 1929 2538 2074 1931 2536 2080 1931 2536 2078 1929 2537 2075 1931 2540 2077 1930 2538 2076 1931 2540 2077 1935 2537 2079 1934 2539 2071 1934 2537 2069 1932 2545 2075 1928 2543 2069 1931 2539 2071 1928 2541 2075 1932 2540 2066 1933 2539 2074 1934 2536 2076 1932 2541 2076 1934 2531 2080 1933 2540 2080 1930 2539 2076 1929 2540 2075 1930 2542 2081 1931 2540 2073 1935 2538 2075 1940 2543 2082 1937 2540 2070 1938 2536 2073 1932 2538 2072 1931 2536 2066 1926 2539 2071 1925 2540 2077 1929 2537 2075 1931 2531 2079 1932 2539 2078 1933 2534 2075 1932 2539 2072 1934 2544 2075 1935 2539 2070 1937 2539 2072 1938 2546 2075 1936 2542 2065 1934 2539 2068 1929 2537 2070 1930 2537 2066 1932 2541 2070 1934 2538 2071 1933 2536 2077 1933 2538 2080 1931 2540 2078 1933 2537 2079 1937 2543 2083 1934 2542 2074 1934 2540 2078 1934 2540 2077 1936 2538 2067 1935 2543 2070 1938 2540 2071 1936 2540 2068 1936 2532 2072 1932 2540 2074 1929 2536 2074 1926 2543 2074 1931 2544 2079 1934 2539 2076 1935 2542 2077 1932 2539 2080 1935 2536 2075 1934 2535 2079 1929 2539 2078 1930 2536 2072 1929 2540 2074 1931 2540 2073 1934 2538 2071 1936 2535 2075 1936 2544 2071 1936 2541 2069 1936 2538 2070 1931 2547 2075 1933 2540 2074 1931 2539 2076 1935 2543 2083 1932 2538 2076 1935 2538 2078 1936 2537 2076 1933 2538 2070 1928 2540 2075 1928 2537 2078 1928 2539 2077 1931 2538 2078"
Going forward, with each acc burst we extract individual readings using strsplit
by space
, and create a matrix of the transposed acc readings such that each row corresponds to an individual acc burst, and each column represents the sample number within the burst, for a total of 792 columns. We then extract every first, second, and third column to get subsets of data corresponding to only X, Y, or Z readings.
acc_gen = function() paste0(sample(1:200, 9), c("x", "y", "z"), collapse = " ")
acc_df = data.frame(acc = c(acc_gen(), acc_gen(), acc_gen()),
stringsAsFactors = FALSE)
acc_df
## acc
## 1 7x 175y 167z 40x 97y 75z 158x 122y 186z
## 2 180x 132y 4z 81x 184y 104z 8x 101y 98z
## 3 138x 164y 193z 196x 161y 192z 162x 80y 36z
acc_df2 = strsplit(acc_df$acc, split = " ", fixed = TRUE)
acc_matrix = matrix(unlist(acc_df2), ncol = 9, byrow = TRUE)
colnames(acc_matrix) = paste0("V", 1:9)
acc_matrix
## V1 V2 V3 V4 V5 V6 V7 V8 V9
## [1,] "7x" "175y" "167z" "40x" "97y" "75z" "158x" "122y" "186z"
## [2,] "180x" "132y" "4z" "81x" "184y" "104z" "8x" "101y" "98z"
## [3,] "138x" "164y" "193z" "196x" "161y" "192z" "162x" "80y" "36z"
# X axis readings
acc_matrix[, seq(1, 9, 3)]
## V1 V4 V7
## [1,] "7x" "40x" "158x"
## [2,] "180x" "81x" "8x"
## [3,] "138x" "196x" "162x"
# Y axis readings
acc_matrix[, seq(2, 9, 3)]
## V2 V5 V8
## [1,] "175y" "97y" "122y"
## [2,] "132y" "184y" "101y"
## [3,] "164y" "161y" "80y"
# Z axis readings
acc_matrix[, seq(3, 9, 3)]
## V3 V6 V9
## [1,] "167z" "75z" "186z"
## [2,] "4z" "104z" "98z"
## [3,] "193z" "192z" "36z"
Once we subset the acc readings by each axis, we can calculate the variance of each burst’s readings and do a k-cluster analyses to classify acc bursts by activity (flying vs. not flying).
Based on the tag settings, our ACC BYTE COUNT was 1135 bytes, i.e. 792 ACC samples would be collected at each burst.
# Determining lengths of samples per acc burst
accaxesL = sapply(accL, function(acc) {
accaxes = strsplit(as.character(acc$eobs.accelerations.raw),
split=" ", fixed=T)
paste0(sort(unique(sapply(accaxes, length))), collapse = ", ")
})
data.frame(tagID = names(accaxesL), numberSamples = accaxesL) %>%
mutate(row.names = NULL)
## tagID numberSamples
## 1 K5309 792
## 2 K5310 792
## 3 K5311 792
## 4 K5312 792
## 5 K5313 792
## 6 K5315 792
## 7 K5317 792
## 8 K5319 792
The functions used below have been built upon code by Abedi-Lartey et al.:
Abedi-Lartey M, Dechmann DKN, Wikelski M, Scharf AK, Fahr J. Long-distance seed dispersal by straw-coloured fruit bats varies by season and landscape. Global Ecology and Conservation. 2016;7:12–24. Available from: https://doi.org/10.1016/j.gecco.2016.03.005
numberSamples = 792
for( axis in axises) {
acc_activityL = lapply(accL, get_acc_activity, numberSamples = 792, axis = axis)
tagIDs = names(acc_activityL)
acc_gps_behavL = lapply(tagIDs, get_acc_gps_behav, acc_activityL, gpsL)
names(acc_gps_behavL) = tagIDs
# save data
saveRDS(acc_gps_behavL, file.path(out_dir, paste0(axis, "_", out_filename)))
}
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] maptools_0.9-5 cowplot_1.0.0 readxl_1.3.1 tidyr_1.0.2
## [5] viridis_0.5.1 viridisLite_0.3.0 DT_0.11.5 hrbrthemes_0.6.0
## [9] ggsn_0.5.0 patchwork_1.0.0 ggplot2_3.3.0.9000 sf_0.8-0
## [13] htmlwidgets_1.5.1 leaflet_2.0.2 data.table_1.12.8 dplyr_0.8.4
## [17] lubridate_1.7.4 move_3.2.0 rgdal_1.4-4 raster_2.9-5
## [21] sp_1.3-1 geosphere_1.5-10
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.1 shiny_1.4.0 assertthat_0.2.1 cellranger_1.1.0
## [5] yaml_2.2.1 gdtools_0.2.1 Rttf2pt1_1.3.7 pillar_1.4.3
## [9] lattice_0.20-38 glue_1.3.1 extrafontdb_1.0 digest_0.6.23
## [13] promises_1.1.0 colorspace_1.4-1 htmltools_0.4.0 httpuv_1.5.2
## [17] plyr_1.8.5 pkgconfig_2.0.3 purrr_0.3.3 xtable_1.8-4
## [21] scales_1.1.0 jpeg_0.1-8.1 later_1.0.0 ggmap_3.0.0
## [25] tibble_2.1.3 withr_2.1.2 magrittr_1.5 crayon_1.3.4
## [29] mime_0.9 memoise_1.1.0 evaluate_0.14 xml2_1.2.2
## [33] foreign_0.8-71 class_7.3-15 tools_3.6.0 RgoogleMaps_1.4.3
## [37] matrixStats_0.54.0 lifecycle_0.1.0 stringr_1.4.0 munsell_0.5.0
## [41] compiler_3.6.0 e1071_1.7-2 systemfonts_0.1.1 rlang_0.4.4
## [45] classInt_0.4-2 units_0.6-3 rjson_0.2.20 crosstalk_1.0.0
## [49] bitops_1.0-6 rmarkdown_1.17.1 gtable_0.3.0 codetools_0.2-16
## [53] DBI_1.0.0 R6_2.4.1 gridExtra_2.3 knitr_1.26
## [57] fastmap_1.0.1 extrafont_0.17 KernSmooth_2.23-15 stringi_1.4.5
## [61] parallel_3.6.0 Rcpp_1.0.3 vctrs_0.2.2 png_0.1-7
## [65] tidyselect_1.0.0 xfun_0.12