THLfi/read.gt3x

Time on Old GT3X format

muschellij2 opened this issue · 2 comments

This is related to #3, but it's about the time of the old format. It seems as though the times for activity.bin files are not parsed correctly:

library(read.gt3x)
url = "https://github.com/THLfi/read.gt3x/files/3522749/GT3X%2B.01.day.gt3x.zip"
destfile = tempfile(fileext = ".zip")
dl = download.file(url, destfile = destfile)
gt3x_file = unzip(destfile, exdir = tempdir())
gt3x_file = gt3x_file[!grepl("__MACOSX", gt3x_file)]
gt3x_file
#> [1] "/var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//Rtmp4RG5z6/GT3X+ (01 day).gt3x"

res = read.gt3x(gt3x_file, verbose = TRUE, asDataFrame = TRUE, imputeZeroes = TRUE)
#> Input is a .gt3x file, unzipping to a temporary location first...
#> Unzipping gt3x data to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//Rtmp4RG5z6
#> 1/1
#> Unzipping /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//Rtmp4RG5z6/GT3X+ (01 day).gt3x
#>  === info.txt, activity.bin, lux.bin extracted to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//Rtmp4RG5z6/GT3X+(01day)
#> GT3X information
#>  $ Serial Number     :"NEO1DXXXXXXXX"
#>  $ Device Type       :"GT3XPlus"
#>  $ Firmware          :"2.5.0"
#>  $ Battery Voltage   :"4.22"
#>  $ Sample Rate       :30
#>  $ Start Date        : POSIXct, format: "2012-06-27 10:54:00"
#>  $ Stop Date         : POSIXct, format: "2012-06-28 11:54:00"
#>  $ Download Date     : POSIXct, format: "2012-06-28 16:25:52"
#>  $ Board Revision    :"4"
#>  $ Unexpected Resets :"0"
#>  $ Sex               :"Male"
#>  $ Height            :"172.72"
#>  $ Mass              :"69.8532249799612"
#>  $ Age               :"43"
#>  $ Race              :"White / Caucasian"
#>  $ Limb              :"Ankle"
#>  $ Side              :"Left"
#>  $ Dominance         :"Non-Dominant"
#>  $ DateOfBirth       :"621132192000000000"
#>  $ Subject Name      :"GT3XPlus"
#>  $ Serial Prefix     :"NEO"
#>  $ Last Sample Time  : 'POSIXct' num(0) 
#>  - attr(*, "tzone")= chr "GMT"
#>  $ Acceleration Scale:341
#> Parsing GT3X data via CPP.. expected sample size: 2700000
#> Using NHANES-GT3X format - older format
#> Sample size: 2700000
#> Scaling...
#> Lux Sample size: 2700000
#> Done (in 0.97747802734375 seconds)
hdr = attributes(res)$header
sample_rate = hdr$"Sample Rate"
unique(diff(res$time))
#> [1] 0.0999999 0.1333334 0.1000001 0.1333332

Where if we just do the simple thing - we get 1 day of data - which I think is supposed to be correct.

range(res$time[1] + 0:(nrow(res)-1)/sample_rate)
#> [1] "2012-06-27 10:54:00 GMT" "2012-06-28 11:53:59 GMT"

Created on 2020-08-26 by the reprex package (v0.3.0)

Session info
devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value                       
#>  version  R version 4.0.2 (2020-06-22)
#>  os       macOS Mojave 10.14.6        
#>  system   x86_64, darwin17.0          
#>  ui       X11                         
#>  language (EN)                        
#>  collate  en_US.UTF-8                 
#>  ctype    en_US.UTF-8                 
#>  tz       America/New_York            
#>  date     2020-08-26                  
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version     date       lib source                           
#>  assertthat    0.2.1       2019-03-21 [1] CRAN (R 4.0.0)                   
#>  backports     1.1.9       2020-08-24 [1] CRAN (R 4.0.0)                   
#>  callr         3.4.3       2020-03-28 [1] CRAN (R 4.0.0)                   
#>  cli           2.0.2       2020-02-28 [1] CRAN (R 4.0.0)                   
#>  crayon        1.3.4       2017-09-16 [1] CRAN (R 4.0.0)                   
#>  desc          1.2.0       2020-06-01 [1] Github (muschellij2/desc@b0c374f)
#>  devtools      2.3.1.9000  2020-08-25 [1] Github (r-lib/devtools@df619ce)  
#>  digest        0.6.25      2020-02-23 [1] CRAN (R 4.0.0)                   
#>  ellipsis      0.3.1       2020-05-15 [1] CRAN (R 4.0.0)                   
#>  evaluate      0.14        2019-05-28 [1] CRAN (R 4.0.0)                   
#>  fansi         0.4.1       2020-01-08 [1] CRAN (R 4.0.0)                   
#>  fs            1.5.0       2020-07-31 [1] CRAN (R 4.0.2)                   
#>  glue          1.4.1       2020-05-13 [1] CRAN (R 4.0.0)                   
#>  highr         0.8         2019-03-20 [1] CRAN (R 4.0.0)                   
#>  htmltools     0.5.0       2020-06-16 [1] CRAN (R 4.0.0)                   
#>  knitr         1.29        2020-06-23 [1] CRAN (R 4.0.2)                   
#>  lifecycle     0.2.0       2020-03-06 [1] CRAN (R 4.0.0)                   
#>  magrittr      1.5         2014-11-22 [1] CRAN (R 4.0.0)                   
#>  memoise       1.1.0       2017-04-21 [1] CRAN (R 4.0.0)                   
#>  pkgbuild      1.1.0       2020-07-13 [1] CRAN (R 4.0.2)                   
#>  pkgload       1.1.0       2020-05-29 [1] CRAN (R 4.0.0)                   
#>  prettyunits   1.1.1       2020-01-24 [1] CRAN (R 4.0.0)                   
#>  processx      3.4.3       2020-07-05 [1] CRAN (R 4.0.0)                   
#>  ps            1.3.4       2020-08-11 [1] CRAN (R 4.0.2)                   
#>  purrr         0.3.4       2020-04-17 [1] CRAN (R 4.0.0)                   
#>  R6            2.4.1       2019-11-12 [1] CRAN (R 4.0.0)                   
#>  Rcpp          1.0.5       2020-07-06 [1] CRAN (R 4.0.0)                   
#>  read.gt3x   * 0.2.0       2020-08-24 [1] local                            
#>  remotes       2.2.0       2020-07-21 [1] CRAN (R 4.0.2)                   
#>  rlang         0.4.7.9000  2020-08-25 [1] Github (r-lib/rlang@de0c176)     
#>  rmarkdown     2.3         2020-06-18 [1] CRAN (R 4.0.0)                   
#>  rprojroot     1.3-2       2018-01-03 [1] CRAN (R 4.0.0)                   
#>  sessioninfo   1.1.1       2018-11-05 [1] CRAN (R 4.0.0)                   
#>  stringi       1.4.6       2020-02-17 [1] CRAN (R 4.0.0)                   
#>  stringr       1.4.0       2019-02-10 [1] CRAN (R 4.0.0)                   
#>  testthat      2.99.0.9000 2020-08-25 [1] Github (r-lib/testthat@6a24275)  
#>  usethis       1.6.1.9001  2020-08-25 [1] Github (r-lib/usethis@860c1ea)   
#>  withr         2.2.0       2020-04-20 [1] CRAN (R 4.0.0)                   
#>  xfun          0.16        2020-07-24 [1] CRAN (R 4.0.2)                   
#>  yaml          2.2.1       2020-02-01 [1] CRAN (R 4.0.0)                   
#> 
#> [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library

We have the CSV output from ActiGraph, which has some small differences from what we get from read.gt3x:

library(read.gt3x)
url = paste0("https://github.com/THLfi/read.gt3x/files/", 
             "3522749/GT3X%2B.01.day.gt3x.zip")
destfile = tempfile(fileext = ".zip")
dl = download.file(url, destfile = destfile)
gt3x_file = unzip(destfile, exdir = tempdir())
gt3x_file = gt3x_file[!grepl("__MACOSX", gt3x_file)]
gt3x_file
#> [1] "/var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW/GT3X+ (01 day).gt3x"

res = read.gt3x(gt3x_file, verbose = TRUE)
#> Input is a .gt3x file, unzipping to a temporary location first...
#> Unzipping gt3x data to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW
#> 1/1
#> Unzipping /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW/GT3X+ (01 day).gt3x
#>  === info.txt, activity.bin, lux.bin extracted to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW/GT3X+(01day)
#> GT3X information
#>  $ Serial Number     :"NEO1DXXXXXXXX"
#>  $ Device Type       :"GT3XPlus"
#>  $ Firmware          :"2.5.0"
#>  $ Battery Voltage   :"4.22"
#>  $ Sample Rate       :30
#>  $ Start Date        : POSIXct, format: "2012-06-27 10:54:00"
#>  $ Stop Date         : POSIXct, format: "2012-06-28 11:54:00"
#>  $ Download Date     : POSIXct, format: "2012-06-28 16:25:52"
#>  $ Board Revision    :"4"
#>  $ Unexpected Resets :"0"
#>  $ Sex               :"Male"
#>  $ Height            :"172.72"
#>  $ Mass              :"69.8532249799612"
#>  $ Age               :"43"
#>  $ Race              :"White / Caucasian"
#>  $ Limb              :"Ankle"
#>  $ Side              :"Left"
#>  $ Dominance         :"Non-Dominant"
#>  $ DateOfBirth       :"621132192000000000"
#>  $ Subject Name      :"GT3XPlus"
#>  $ Serial Prefix     :"NEO"
#>  $ Last Sample Time  : 'POSIXct' num(0) 
#>  - attr(*, "tzone")= chr "GMT"
#>  $ Acceleration Scale:341
#> Parsing GT3X data via CPP.. expected sample size: 2700000
#> Using NHANES-GT3X format - older format
#> Sample size: 2700000
#> Scaling...
#> Lux Sample size: 2700000
#> Done (in 0.755279064178467 seconds)
at = attributes(res)
at$header$`Download Date`
#> [1] "2012-06-28 16:25:52 GMT"
act_df = read.gt3x(gt3x_file, verbose = TRUE,
                   asDataFrame = TRUE, imputeZeroes = TRUE)
#> Input is a .gt3x file, unzipping to a temporary location first...
#> Unzipping gt3x data to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW
#> 1/1
#> Unzipping /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW/GT3X+ (01 day).gt3x
#>  === info.txt, activity.bin, lux.bin extracted to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW/GT3X+(01day)
#> GT3X information
#>  $ Serial Number     :"NEO1DXXXXXXXX"
#>  $ Device Type       :"GT3XPlus"
#>  $ Firmware          :"2.5.0"
#>  $ Battery Voltage   :"4.22"
#>  $ Sample Rate       :30
#>  $ Start Date        : POSIXct, format: "2012-06-27 10:54:00"
#>  $ Stop Date         : POSIXct, format: "2012-06-28 11:54:00"
#>  $ Download Date     : POSIXct, format: "2012-06-28 16:25:52"
#>  $ Board Revision    :"4"
#>  $ Unexpected Resets :"0"
#>  $ Sex               :"Male"
#>  $ Height            :"172.72"
#>  $ Mass              :"69.8532249799612"
#>  $ Age               :"43"
#>  $ Race              :"White / Caucasian"
#>  $ Limb              :"Ankle"
#>  $ Side              :"Left"
#>  $ Dominance         :"Non-Dominant"
#>  $ DateOfBirth       :"621132192000000000"
#>  $ Subject Name      :"GT3XPlus"
#>  $ Serial Prefix     :"NEO"
#>  $ Last Sample Time  : 'POSIXct' num(0) 
#>  - attr(*, "tzone")= chr "GMT"
#>  $ Acceleration Scale:341
#> Parsing GT3X data via CPP.. expected sample size: 2700000
#> Using NHANES-GT3X format - older format
#> Sample size: 2700000
#> Scaling...
#> Lux Sample size: 2700000
#> Done (in 0.606827974319458 seconds)
tail(act_df)
#> Sampling Rate: 30Hz
#> Firmware Version: 2.5.0
#> Serial Number Prefix: NEO
#>         X Y Z                time
#> 2699995 0 0 0 2012-06-30 22:13:59
#> 2699996 0 0 0 2012-06-30 22:13:59
#> 2699997 0 0 0 2012-06-30 22:13:59
#> 2699998 0 0 0 2012-06-30 22:13:59
#> 2699999 0 0 0 2012-06-30 22:13:59
#> 2700000 0 0 0 2012-06-30 22:13:59

We see above that the last date/time is on 6/30, but the download date is from 6/28!

We see that here as well:

last_time = act_df$time[ nrow(act_df)]
last_time > at$header$`Download Date`
#> [1] TRUE
last_time > at$header$`Stop Date`
#> [1] TRUE

If we just do the start time and then add in the milliseconds until the end we get times before the download date/time:

act_df$time2 = at$start_time + 
  lubridate::as.period(
    1:nrow(act_df)/at$sample_rate, 
    unit = "secs")

last_time2 = act_df$time2[ nrow(act_df)]
last_time2 == at$header$`Download Date`
#> [1] FALSE
last_time2 == at$header$`Stop Date`
#> [1] TRUE

The CSV

Here is the CSV from ActiLife:

destfile = tempfile(fileext = ".csv.gz")
download.file(
  "https://ndownloader.figshare.com/files/24459683", 
  destfile = destfile
)
df = readr::read_csv(destfile, skip = 10)
#> Parsed with column specification:
#> cols(
#>   `Accelerometer X` = col_double(),
#>   `Accelerometer Y` = col_double(),
#>   `Accelerometer Z` = col_double()
#> )
colnames(df) = sub("Accelerometer ", "", colnames(df))

Again, the header is added in the CSV and we'll parse it. Again showing that the header has a download date/time that's past the time from read.gt3x output:

hdr = readLines(destfile, 10)
dl = hdr[ grepl("Download", hdr)]
print(dl)
#> [1] "Download Time 16:25:52"  "Download Date 6/28/2012"
dl_time = trimws(gsub("[[:alpha:]]", "", dl[grepl("Time", dl)]))

dl_date = trimws(gsub("[[:alpha:]]", "", dl[grepl("Date", dl)]))
dl_date = lubridate::mdy_hms(paste(dl_date, dl_time))

last_time > dl_date 
#> [1] TRUE

last_time - dl_date 
#> Time difference of 2.241758 days

We also see some oddities that ActiLife is missing 42 elements (though I think they're zero):

dim(act_df)
#> [1] 2700000       5
dim(df)
#> [1] 2699958       3
tail(df)
#> # A tibble: 6 x 3
#>       X     Y     Z
#>   <dbl> <dbl> <dbl>
#> 1 0.469 0.707 0.522
#> 2 0.472 0.707 0.519
#> 3 0.466 0.71  0.522
#> 4 0.466 0.707 0.522
#> 5 0.469 0.707 0.522
#> 6 0.469 0.707 0.522
tail(act_df)
#> Sampling Rate: 30Hz
#> Firmware Version: 2.5.0
#> Serial Number Prefix: NEO
#>         X Y Z                time               time2
#> 2699995 0 0 0 2012-06-30 22:13:59 2012-06-28 11:53:59
#> 2699996 0 0 0 2012-06-30 22:13:59 2012-06-28 11:53:59
#> 2699997 0 0 0 2012-06-30 22:13:59 2012-06-28 11:53:59
#> 2699998 0 0 0 2012-06-30 22:13:59 2012-06-28 11:53:59
#> 2699999 0 0 0 2012-06-30 22:13:59 2012-06-28 11:53:59
#> 2700000 0 0 0 2012-06-30 22:13:59 2012-06-28 11:54:00

Here we'll truncate those values and see if things match:

act_df = act_df[1:nrow(df), ]
all(df[, c("X", "Y", "Z")] == act_df[, c("X", "Y", "Z")])
#> [1] FALSE

They do not match!

Oddly if you look at this, if we switch up X and Y, we get the same results. I'm looking at this now, and it's because o f https://github.com/actigraph/NHANES-GT3X-File-Format/blob/master/fileformats/activity.bin.md - it's Y, X, Z!

# switching up X and Y
all(df[, c("Y", "X", "Z")] == act_df[, c("X", "Y", "Z")])
#> [1] TRUE

Created on 2020-08-28 by the reprex package (v0.3.0)

Session info
devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value                       
#>  version  R version 4.0.2 (2020-06-22)
#>  os       macOS Mojave 10.14.6        
#>  system   x86_64, darwin17.0          
#>  ui       X11                         
#>  language (EN)                        
#>  collate  en_US.UTF-8                 
#>  ctype    en_US.UTF-8                 
#>  tz       America/New_York            
#>  date     2020-08-28                  
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version     date       lib source                           
#>  assertthat    0.2.1       2019-03-21 [1] CRAN (R 4.0.0)                   
#>  backports     1.1.9       2020-08-24 [1] CRAN (R 4.0.0)                   
#>  callr         3.4.3       2020-03-28 [1] CRAN (R 4.0.0)                   
#>  cli           2.0.2       2020-02-28 [1] CRAN (R 4.0.0)                   
#>  crayon        1.3.4       2017-09-16 [1] CRAN (R 4.0.0)                   
#>  desc          1.2.0       2020-06-01 [1] Github (muschellij2/desc@b0c374f)
#>  devtools      2.3.1.9000  2020-08-25 [1] Github (r-lib/devtools@df619ce)  
#>  digest        0.6.25      2020-02-23 [1] CRAN (R 4.0.0)                   
#>  ellipsis      0.3.1       2020-05-15 [1] CRAN (R 4.0.0)                   
#>  evaluate      0.14        2019-05-28 [1] CRAN (R 4.0.0)                   
#>  fansi         0.4.1       2020-01-08 [1] CRAN (R 4.0.0)                   
#>  fs            1.5.0       2020-07-31 [1] CRAN (R 4.0.2)                   
#>  generics      0.0.2       2018-11-29 [1] CRAN (R 4.0.0)                   
#>  glue          1.4.1       2020-05-13 [1] CRAN (R 4.0.0)                   
#>  highr         0.8         2019-03-20 [1] CRAN (R 4.0.0)                   
#>  hms           0.5.3       2020-01-08 [1] CRAN (R 4.0.0)                   
#>  htmltools     0.5.0       2020-06-16 [1] CRAN (R 4.0.0)                   
#>  knitr         1.29        2020-06-23 [1] CRAN (R 4.0.2)                   
#>  lifecycle     0.2.0       2020-03-06 [1] CRAN (R 4.0.0)                   
#>  lubridate     1.7.9       2020-06-08 [1] CRAN (R 4.0.0)                   
#>  magrittr      1.5         2014-11-22 [1] CRAN (R 4.0.0)                   
#>  memoise       1.1.0       2017-04-21 [1] CRAN (R 4.0.0)                   
#>  pillar        1.4.6       2020-07-10 [1] CRAN (R 4.0.2)                   
#>  pkgbuild      1.1.0       2020-07-13 [1] CRAN (R 4.0.2)                   
#>  pkgconfig     2.0.3       2019-09-22 [1] CRAN (R 4.0.0)                   
#>  pkgload       1.1.0       2020-05-29 [1] CRAN (R 4.0.0)                   
#>  prettyunits   1.1.1       2020-01-24 [1] CRAN (R 4.0.0)                   
#>  processx      3.4.3       2020-07-05 [1] CRAN (R 4.0.0)                   
#>  ps            1.3.4       2020-08-11 [1] CRAN (R 4.0.2)                   
#>  purrr         0.3.4       2020-04-17 [1] CRAN (R 4.0.0)                   
#>  R6            2.4.1       2019-11-12 [1] CRAN (R 4.0.0)                   
#>  Rcpp          1.0.5       2020-07-06 [1] CRAN (R 4.0.0)                   
#>  read.gt3x   * 0.2.0       2020-08-28 [1] local                            
#>  readr         1.3.1       2018-12-21 [1] CRAN (R 4.0.0)                   
#>  remotes       2.2.0       2020-07-21 [1] CRAN (R 4.0.2)                   
#>  rlang         0.4.7.9000  2020-08-25 [1] Github (r-lib/rlang@de0c176)     
#>  rmarkdown     2.3         2020-06-18 [1] CRAN (R 4.0.0)                   
#>  rprojroot     1.3-2       2018-01-03 [1] CRAN (R 4.0.0)                   
#>  sessioninfo   1.1.1       2018-11-05 [1] CRAN (R 4.0.0)                   
#>  stringi       1.4.6       2020-02-17 [1] CRAN (R 4.0.0)                   
#>  stringr       1.4.0       2019-02-10 [1] CRAN (R 4.0.0)                   
#>  testthat      2.99.0.9000 2020-08-25 [1] Github (r-lib/testthat@6a24275)  
#>  tibble        3.0.3       2020-07-10 [1] CRAN (R 4.0.2)                   
#>  usethis       1.6.1.9001  2020-08-25 [1] Github (r-lib/usethis@860c1ea)   
#>  utf8          1.1.4       2018-05-24 [1] CRAN (R 4.0.0)                   
#>  vctrs         0.3.2       2020-07-15 [1] CRAN (R 4.0.2)                   
#>  withr         2.2.0       2020-04-20 [1] CRAN (R 4.0.0)                   
#>  xfun          0.16        2020-07-24 [1] CRAN (R 4.0.2)                   
#>  yaml          2.2.1       2020-02-01 [1] CRAN (R 4.0.0)                   
#> 
#> [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library

This is still an issue with this file - the time still does not read correctly.