Time on Old GT3X format
muschellij2 opened this issue · 2 comments
This is related to #3, but it's about the time of the old format. It seems as though the times for activity.bin files are not parsed correctly:
library(read.gt3x)
url = "https://github.com/THLfi/read.gt3x/files/3522749/GT3X%2B.01.day.gt3x.zip"
destfile = tempfile(fileext = ".zip")
dl = download.file(url, destfile = destfile)
gt3x_file = unzip(destfile, exdir = tempdir())
gt3x_file = gt3x_file[!grepl("__MACOSX", gt3x_file)]
gt3x_file
#> [1] "/var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//Rtmp4RG5z6/GT3X+ (01 day).gt3x"
res = read.gt3x(gt3x_file, verbose = TRUE, asDataFrame = TRUE, imputeZeroes = TRUE)
#> Input is a .gt3x file, unzipping to a temporary location first...
#> Unzipping gt3x data to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//Rtmp4RG5z6
#> 1/1
#> Unzipping /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//Rtmp4RG5z6/GT3X+ (01 day).gt3x
#> === info.txt, activity.bin, lux.bin extracted to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//Rtmp4RG5z6/GT3X+(01day)
#> GT3X information
#> $ Serial Number :"NEO1DXXXXXXXX"
#> $ Device Type :"GT3XPlus"
#> $ Firmware :"2.5.0"
#> $ Battery Voltage :"4.22"
#> $ Sample Rate :30
#> $ Start Date : POSIXct, format: "2012-06-27 10:54:00"
#> $ Stop Date : POSIXct, format: "2012-06-28 11:54:00"
#> $ Download Date : POSIXct, format: "2012-06-28 16:25:52"
#> $ Board Revision :"4"
#> $ Unexpected Resets :"0"
#> $ Sex :"Male"
#> $ Height :"172.72"
#> $ Mass :"69.8532249799612"
#> $ Age :"43"
#> $ Race :"White / Caucasian"
#> $ Limb :"Ankle"
#> $ Side :"Left"
#> $ Dominance :"Non-Dominant"
#> $ DateOfBirth :"621132192000000000"
#> $ Subject Name :"GT3XPlus"
#> $ Serial Prefix :"NEO"
#> $ Last Sample Time : 'POSIXct' num(0)
#> - attr(*, "tzone")= chr "GMT"
#> $ Acceleration Scale:341
#> Parsing GT3X data via CPP.. expected sample size: 2700000
#> Using NHANES-GT3X format - older format
#> Sample size: 2700000
#> Scaling...
#> Lux Sample size: 2700000
#> Done (in 0.97747802734375 seconds)
hdr = attributes(res)$header
sample_rate = hdr$"Sample Rate"
unique(diff(res$time))
#> [1] 0.0999999 0.1333334 0.1000001 0.1333332
Where if we just do the simple thing - we get 1 day of data - which I think is supposed to be correct.
range(res$time[1] + 0:(nrow(res)-1)/sample_rate)
#> [1] "2012-06-27 10:54:00 GMT" "2012-06-28 11:53:59 GMT"
Created on 2020-08-26 by the reprex package (v0.3.0)
Session info
devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.0.2 (2020-06-22)
#> os macOS Mojave 10.14.6
#> system x86_64, darwin17.0
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz America/New_York
#> date 2020-08-26
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.0)
#> backports 1.1.9 2020-08-24 [1] CRAN (R 4.0.0)
#> callr 3.4.3 2020-03-28 [1] CRAN (R 4.0.0)
#> cli 2.0.2 2020-02-28 [1] CRAN (R 4.0.0)
#> crayon 1.3.4 2017-09-16 [1] CRAN (R 4.0.0)
#> desc 1.2.0 2020-06-01 [1] Github (muschellij2/desc@b0c374f)
#> devtools 2.3.1.9000 2020-08-25 [1] Github (r-lib/devtools@df619ce)
#> digest 0.6.25 2020-02-23 [1] CRAN (R 4.0.0)
#> ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.0)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.0)
#> fansi 0.4.1 2020-01-08 [1] CRAN (R 4.0.0)
#> fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
#> glue 1.4.1 2020-05-13 [1] CRAN (R 4.0.0)
#> highr 0.8 2019-03-20 [1] CRAN (R 4.0.0)
#> htmltools 0.5.0 2020-06-16 [1] CRAN (R 4.0.0)
#> knitr 1.29 2020-06-23 [1] CRAN (R 4.0.2)
#> lifecycle 0.2.0 2020-03-06 [1] CRAN (R 4.0.0)
#> magrittr 1.5 2014-11-22 [1] CRAN (R 4.0.0)
#> memoise 1.1.0 2017-04-21 [1] CRAN (R 4.0.0)
#> pkgbuild 1.1.0 2020-07-13 [1] CRAN (R 4.0.2)
#> pkgload 1.1.0 2020-05-29 [1] CRAN (R 4.0.0)
#> prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.0)
#> processx 3.4.3 2020-07-05 [1] CRAN (R 4.0.0)
#> ps 1.3.4 2020-08-11 [1] CRAN (R 4.0.2)
#> purrr 0.3.4 2020-04-17 [1] CRAN (R 4.0.0)
#> R6 2.4.1 2019-11-12 [1] CRAN (R 4.0.0)
#> Rcpp 1.0.5 2020-07-06 [1] CRAN (R 4.0.0)
#> read.gt3x * 0.2.0 2020-08-24 [1] local
#> remotes 2.2.0 2020-07-21 [1] CRAN (R 4.0.2)
#> rlang 0.4.7.9000 2020-08-25 [1] Github (r-lib/rlang@de0c176)
#> rmarkdown 2.3 2020-06-18 [1] CRAN (R 4.0.0)
#> rprojroot 1.3-2 2018-01-03 [1] CRAN (R 4.0.0)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.0)
#> stringi 1.4.6 2020-02-17 [1] CRAN (R 4.0.0)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.0)
#> testthat 2.99.0.9000 2020-08-25 [1] Github (r-lib/testthat@6a24275)
#> usethis 1.6.1.9001 2020-08-25 [1] Github (r-lib/usethis@860c1ea)
#> withr 2.2.0 2020-04-20 [1] CRAN (R 4.0.0)
#> xfun 0.16 2020-07-24 [1] CRAN (R 4.0.2)
#> yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.0)
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library
We have the CSV output from ActiGraph, which has some small differences from what we get from read.gt3x
:
library(read.gt3x)
url = paste0("https://github.com/THLfi/read.gt3x/files/",
"3522749/GT3X%2B.01.day.gt3x.zip")
destfile = tempfile(fileext = ".zip")
dl = download.file(url, destfile = destfile)
gt3x_file = unzip(destfile, exdir = tempdir())
gt3x_file = gt3x_file[!grepl("__MACOSX", gt3x_file)]
gt3x_file
#> [1] "/var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW/GT3X+ (01 day).gt3x"
res = read.gt3x(gt3x_file, verbose = TRUE)
#> Input is a .gt3x file, unzipping to a temporary location first...
#> Unzipping gt3x data to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW
#> 1/1
#> Unzipping /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW/GT3X+ (01 day).gt3x
#> === info.txt, activity.bin, lux.bin extracted to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW/GT3X+(01day)
#> GT3X information
#> $ Serial Number :"NEO1DXXXXXXXX"
#> $ Device Type :"GT3XPlus"
#> $ Firmware :"2.5.0"
#> $ Battery Voltage :"4.22"
#> $ Sample Rate :30
#> $ Start Date : POSIXct, format: "2012-06-27 10:54:00"
#> $ Stop Date : POSIXct, format: "2012-06-28 11:54:00"
#> $ Download Date : POSIXct, format: "2012-06-28 16:25:52"
#> $ Board Revision :"4"
#> $ Unexpected Resets :"0"
#> $ Sex :"Male"
#> $ Height :"172.72"
#> $ Mass :"69.8532249799612"
#> $ Age :"43"
#> $ Race :"White / Caucasian"
#> $ Limb :"Ankle"
#> $ Side :"Left"
#> $ Dominance :"Non-Dominant"
#> $ DateOfBirth :"621132192000000000"
#> $ Subject Name :"GT3XPlus"
#> $ Serial Prefix :"NEO"
#> $ Last Sample Time : 'POSIXct' num(0)
#> - attr(*, "tzone")= chr "GMT"
#> $ Acceleration Scale:341
#> Parsing GT3X data via CPP.. expected sample size: 2700000
#> Using NHANES-GT3X format - older format
#> Sample size: 2700000
#> Scaling...
#> Lux Sample size: 2700000
#> Done (in 0.755279064178467 seconds)
at = attributes(res)
at$header$`Download Date`
#> [1] "2012-06-28 16:25:52 GMT"
act_df = read.gt3x(gt3x_file, verbose = TRUE,
asDataFrame = TRUE, imputeZeroes = TRUE)
#> Input is a .gt3x file, unzipping to a temporary location first...
#> Unzipping gt3x data to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW
#> 1/1
#> Unzipping /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW/GT3X+ (01 day).gt3x
#> === info.txt, activity.bin, lux.bin extracted to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW/GT3X+(01day)
#> GT3X information
#> $ Serial Number :"NEO1DXXXXXXXX"
#> $ Device Type :"GT3XPlus"
#> $ Firmware :"2.5.0"
#> $ Battery Voltage :"4.22"
#> $ Sample Rate :30
#> $ Start Date : POSIXct, format: "2012-06-27 10:54:00"
#> $ Stop Date : POSIXct, format: "2012-06-28 11:54:00"
#> $ Download Date : POSIXct, format: "2012-06-28 16:25:52"
#> $ Board Revision :"4"
#> $ Unexpected Resets :"0"
#> $ Sex :"Male"
#> $ Height :"172.72"
#> $ Mass :"69.8532249799612"
#> $ Age :"43"
#> $ Race :"White / Caucasian"
#> $ Limb :"Ankle"
#> $ Side :"Left"
#> $ Dominance :"Non-Dominant"
#> $ DateOfBirth :"621132192000000000"
#> $ Subject Name :"GT3XPlus"
#> $ Serial Prefix :"NEO"
#> $ Last Sample Time : 'POSIXct' num(0)
#> - attr(*, "tzone")= chr "GMT"
#> $ Acceleration Scale:341
#> Parsing GT3X data via CPP.. expected sample size: 2700000
#> Using NHANES-GT3X format - older format
#> Sample size: 2700000
#> Scaling...
#> Lux Sample size: 2700000
#> Done (in 0.606827974319458 seconds)
tail(act_df)
#> Sampling Rate: 30Hz
#> Firmware Version: 2.5.0
#> Serial Number Prefix: NEO
#> X Y Z time
#> 2699995 0 0 0 2012-06-30 22:13:59
#> 2699996 0 0 0 2012-06-30 22:13:59
#> 2699997 0 0 0 2012-06-30 22:13:59
#> 2699998 0 0 0 2012-06-30 22:13:59
#> 2699999 0 0 0 2012-06-30 22:13:59
#> 2700000 0 0 0 2012-06-30 22:13:59
We see above that the last date/time is on 6/30
, but the download date is from 6/28
!
We see that here as well:
last_time = act_df$time[ nrow(act_df)]
last_time > at$header$`Download Date`
#> [1] TRUE
last_time > at$header$`Stop Date`
#> [1] TRUE
If we just do the start time and then add in the milliseconds until the end we get times before the download date/time:
act_df$time2 = at$start_time +
lubridate::as.period(
1:nrow(act_df)/at$sample_rate,
unit = "secs")
last_time2 = act_df$time2[ nrow(act_df)]
last_time2 == at$header$`Download Date`
#> [1] FALSE
last_time2 == at$header$`Stop Date`
#> [1] TRUE
The CSV
Here is the CSV from ActiLife:
destfile = tempfile(fileext = ".csv.gz")
download.file(
"https://ndownloader.figshare.com/files/24459683",
destfile = destfile
)
df = readr::read_csv(destfile, skip = 10)
#> Parsed with column specification:
#> cols(
#> `Accelerometer X` = col_double(),
#> `Accelerometer Y` = col_double(),
#> `Accelerometer Z` = col_double()
#> )
colnames(df) = sub("Accelerometer ", "", colnames(df))
Again, the header is added in the CSV and we'll parse it. Again showing that the header has a download date/time that's past the time from read.gt3x
output:
hdr = readLines(destfile, 10)
dl = hdr[ grepl("Download", hdr)]
print(dl)
#> [1] "Download Time 16:25:52" "Download Date 6/28/2012"
dl_time = trimws(gsub("[[:alpha:]]", "", dl[grepl("Time", dl)]))
dl_date = trimws(gsub("[[:alpha:]]", "", dl[grepl("Date", dl)]))
dl_date = lubridate::mdy_hms(paste(dl_date, dl_time))
last_time > dl_date
#> [1] TRUE
last_time - dl_date
#> Time difference of 2.241758 days
We also see some oddities that ActiLife is missing 42 elements (though I think they're zero):
dim(act_df)
#> [1] 2700000 5
dim(df)
#> [1] 2699958 3
tail(df)
#> # A tibble: 6 x 3
#> X Y Z
#> <dbl> <dbl> <dbl>
#> 1 0.469 0.707 0.522
#> 2 0.472 0.707 0.519
#> 3 0.466 0.71 0.522
#> 4 0.466 0.707 0.522
#> 5 0.469 0.707 0.522
#> 6 0.469 0.707 0.522
tail(act_df)
#> Sampling Rate: 30Hz
#> Firmware Version: 2.5.0
#> Serial Number Prefix: NEO
#> X Y Z time time2
#> 2699995 0 0 0 2012-06-30 22:13:59 2012-06-28 11:53:59
#> 2699996 0 0 0 2012-06-30 22:13:59 2012-06-28 11:53:59
#> 2699997 0 0 0 2012-06-30 22:13:59 2012-06-28 11:53:59
#> 2699998 0 0 0 2012-06-30 22:13:59 2012-06-28 11:53:59
#> 2699999 0 0 0 2012-06-30 22:13:59 2012-06-28 11:53:59
#> 2700000 0 0 0 2012-06-30 22:13:59 2012-06-28 11:54:00
Here we'll truncate those values and see if things match:
act_df = act_df[1:nrow(df), ]
all(df[, c("X", "Y", "Z")] == act_df[, c("X", "Y", "Z")])
#> [1] FALSE
They do not match!
Oddly if you look at this, if we switch up X
and Y
, we get the same results. I'm looking at this now, and it's because o f https://github.com/actigraph/NHANES-GT3X-File-Format/blob/master/fileformats/activity.bin.md - it's Y, X, Z!
# switching up X and Y
all(df[, c("Y", "X", "Z")] == act_df[, c("X", "Y", "Z")])
#> [1] TRUE
Created on 2020-08-28 by the reprex package (v0.3.0)
Session info
devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.0.2 (2020-06-22)
#> os macOS Mojave 10.14.6
#> system x86_64, darwin17.0
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz America/New_York
#> date 2020-08-28
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.0)
#> backports 1.1.9 2020-08-24 [1] CRAN (R 4.0.0)
#> callr 3.4.3 2020-03-28 [1] CRAN (R 4.0.0)
#> cli 2.0.2 2020-02-28 [1] CRAN (R 4.0.0)
#> crayon 1.3.4 2017-09-16 [1] CRAN (R 4.0.0)
#> desc 1.2.0 2020-06-01 [1] Github (muschellij2/desc@b0c374f)
#> devtools 2.3.1.9000 2020-08-25 [1] Github (r-lib/devtools@df619ce)
#> digest 0.6.25 2020-02-23 [1] CRAN (R 4.0.0)
#> ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.0)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.0)
#> fansi 0.4.1 2020-01-08 [1] CRAN (R 4.0.0)
#> fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
#> generics 0.0.2 2018-11-29 [1] CRAN (R 4.0.0)
#> glue 1.4.1 2020-05-13 [1] CRAN (R 4.0.0)
#> highr 0.8 2019-03-20 [1] CRAN (R 4.0.0)
#> hms 0.5.3 2020-01-08 [1] CRAN (R 4.0.0)
#> htmltools 0.5.0 2020-06-16 [1] CRAN (R 4.0.0)
#> knitr 1.29 2020-06-23 [1] CRAN (R 4.0.2)
#> lifecycle 0.2.0 2020-03-06 [1] CRAN (R 4.0.0)
#> lubridate 1.7.9 2020-06-08 [1] CRAN (R 4.0.0)
#> magrittr 1.5 2014-11-22 [1] CRAN (R 4.0.0)
#> memoise 1.1.0 2017-04-21 [1] CRAN (R 4.0.0)
#> pillar 1.4.6 2020-07-10 [1] CRAN (R 4.0.2)
#> pkgbuild 1.1.0 2020-07-13 [1] CRAN (R 4.0.2)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.0)
#> pkgload 1.1.0 2020-05-29 [1] CRAN (R 4.0.0)
#> prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.0)
#> processx 3.4.3 2020-07-05 [1] CRAN (R 4.0.0)
#> ps 1.3.4 2020-08-11 [1] CRAN (R 4.0.2)
#> purrr 0.3.4 2020-04-17 [1] CRAN (R 4.0.0)
#> R6 2.4.1 2019-11-12 [1] CRAN (R 4.0.0)
#> Rcpp 1.0.5 2020-07-06 [1] CRAN (R 4.0.0)
#> read.gt3x * 0.2.0 2020-08-28 [1] local
#> readr 1.3.1 2018-12-21 [1] CRAN (R 4.0.0)
#> remotes 2.2.0 2020-07-21 [1] CRAN (R 4.0.2)
#> rlang 0.4.7.9000 2020-08-25 [1] Github (r-lib/rlang@de0c176)
#> rmarkdown 2.3 2020-06-18 [1] CRAN (R 4.0.0)
#> rprojroot 1.3-2 2018-01-03 [1] CRAN (R 4.0.0)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.0)
#> stringi 1.4.6 2020-02-17 [1] CRAN (R 4.0.0)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.0)
#> testthat 2.99.0.9000 2020-08-25 [1] Github (r-lib/testthat@6a24275)
#> tibble 3.0.3 2020-07-10 [1] CRAN (R 4.0.2)
#> usethis 1.6.1.9001 2020-08-25 [1] Github (r-lib/usethis@860c1ea)
#> utf8 1.1.4 2018-05-24 [1] CRAN (R 4.0.0)
#> vctrs 0.3.2 2020-07-15 [1] CRAN (R 4.0.2)
#> withr 2.2.0 2020-04-20 [1] CRAN (R 4.0.0)
#> xfun 0.16 2020-07-24 [1] CRAN (R 4.0.2)
#> yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.0)
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library
This is still an issue with this file - the time still does not read correctly.