
Time on Old GT3X format

This is related to #3, but it's about the time of the old format. It seems as though the times for activity.bin files are not parsed correctly:

url = ""
destfile = tempfile(fileext = ".zip")
dl = download.file(url, destfile = destfile)
gt3x_file = unzip(destfile, exdir = tempdir())
gt3x_file = gt3x_file[!grepl("__MACOSX", gt3x_file)]
#> [1] "/var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//Rtmp4RG5z6/GT3X+ (01 day).gt3x"

res = read.gt3x(gt3x_file, verbose = TRUE, asDataFrame = TRUE, imputeZeroes = TRUE)
#> Input is a .gt3x file, unzipping to a temporary location first...
#> Unzipping gt3x data to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//Rtmp4RG5z6
#> 1/1
#> Unzipping /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//Rtmp4RG5z6/GT3X+ (01 day).gt3x
#>  === info.txt, activity.bin, lux.bin extracted to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//Rtmp4RG5z6/GT3X+(01day)
#> GT3X information
#>  $ Serial Number     :"NEO1DXXXXXXXX"
#>  $ Device Type       :"GT3XPlus"
#>  $ Firmware          :"2.5.0"
#>  $ Battery Voltage   :"4.22"
#>  $ Sample Rate       :30
#>  $ Start Date        : POSIXct, format: "2012-06-27 10:54:00"
#>  $ Stop Date         : POSIXct, format: "2012-06-28 11:54:00"
#>  $ Download Date     : POSIXct, format: "2012-06-28 16:25:52"
#>  $ Board Revision    :"4"
#>  $ Unexpected Resets :"0"
#>  $ Sex               :"Male"
#>  $ Height            :"172.72"
#>  $ Mass              :"69.8532249799612"
#>  $ Age               :"43"
#>  $ Race              :"White / Caucasian"
#>  $ Limb              :"Ankle"
#>  $ Side              :"Left"
#>  $ Dominance         :"Non-Dominant"
#>  $ DateOfBirth       :"621132192000000000"
#>  $ Subject Name      :"GT3XPlus"
#>  $ Serial Prefix     :"NEO"
#>  $ Last Sample Time  : 'POSIXct' num(0) 
#>  - attr(*, "tzone")= chr "GMT"
#>  $ Acceleration Scale:341
#> Parsing GT3X data via CPP.. expected sample size: 2700000
#> Using NHANES-GT3X format - older format
#> Sample size: 2700000
#> Scaling...
#> Lux Sample size: 2700000
#> Done (in 0.97747802734375 seconds)
hdr = attributes(res)$header
sample_rate = hdr$"Sample Rate"
#> [1] 0.0999999 0.1333334 0.1000001 0.1333332

Where if we just do the simple thing - we get 1 day of data - which I think is supposed to be correct.

range(res$time[1] + 0:(nrow(res)-1)/sample_rate)
#> [1] "2012-06-27 10:54:00 GMT" "2012-06-28 11:53:59 GMT"

We have the CSV output from ActiGraph, which has some small differences from what we get from read.gt3x:

url = paste0("", 
destfile = tempfile(fileext = ".zip")
dl = download.file(url, destfile = destfile)
gt3x_file = unzip(destfile, exdir = tempdir())
gt3x_file = gt3x_file[!grepl("__MACOSX", gt3x_file)]
#> [1] "/var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW/GT3X+ (01 day).gt3x"

res = read.gt3x(gt3x_file, verbose = TRUE)
#> Input is a .gt3x file, unzipping to a temporary location first...
#> Unzipping gt3x data to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW
#> 1/1
#> Unzipping /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW/GT3X+ (01 day).gt3x
#>  === info.txt, activity.bin, lux.bin extracted to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW/GT3X+(01day)
#> GT3X information
#>  $ Serial Number     :"NEO1DXXXXXXXX"
#>  $ Device Type       :"GT3XPlus"
#>  $ Firmware          :"2.5.0"
#>  $ Battery Voltage   :"4.22"
#>  $ Sample Rate       :30
#>  $ Start Date        : POSIXct, format: "2012-06-27 10:54:00"
#>  $ Stop Date         : POSIXct, format: "2012-06-28 11:54:00"
#>  $ Download Date     : POSIXct, format: "2012-06-28 16:25:52"
#>  $ Board Revision    :"4"
#>  $ Unexpected Resets :"0"
#>  $ Sex               :"Male"
#>  $ Height            :"172.72"
#>  $ Mass              :"69.8532249799612"
#>  $ Age               :"43"
#>  $ Race              :"White / Caucasian"
#>  $ Limb              :"Ankle"
#>  $ Side              :"Left"
#>  $ Dominance         :"Non-Dominant"
#>  $ DateOfBirth       :"621132192000000000"
#>  $ Subject Name      :"GT3XPlus"
#>  $ Serial Prefix     :"NEO"
#>  $ Last Sample Time  : 'POSIXct' num(0) 
#>  - attr(*, "tzone")= chr "GMT"
#>  $ Acceleration Scale:341
#> Parsing GT3X data via CPP.. expected sample size: 2700000
#> Using NHANES-GT3X format - older format
#> Sample size: 2700000
#> Scaling...
#> Lux Sample size: 2700000
#> Done (in 0.755279064178467 seconds)
at = attributes(res)
at$header$`Download Date`
#> [1] "2012-06-28 16:25:52 GMT"
act_df = read.gt3x(gt3x_file, verbose = TRUE,
                   asDataFrame = TRUE, imputeZeroes = TRUE)
#> Input is a .gt3x file, unzipping to a temporary location first...
#> Unzipping gt3x data to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW
#> 1/1
#> Unzipping /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW/GT3X+ (01 day).gt3x
#>  === info.txt, activity.bin, lux.bin extracted to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpFmMDCW/GT3X+(01day)
#> GT3X information
#>  $ Serial Number     :"NEO1DXXXXXXXX"
#>  $ Device Type       :"GT3XPlus"
#>  $ Firmware          :"2.5.0"
#>  $ Battery Voltage   :"4.22"
#>  $ Sample Rate       :30
#>  $ Start Date        : POSIXct, format: "2012-06-27 10:54:00"
#>  $ Stop Date         : POSIXct, format: "2012-06-28 11:54:00"
#>  $ Download Date     : POSIXct, format: "2012-06-28 16:25:52"
#>  $ Board Revision    :"4"
#>  $ Unexpected Resets :"0"
#>  $ Sex               :"Male"
#>  $ Height            :"172.72"
#>  $ Mass              :"69.8532249799612"
#>  $ Age               :"43"
#>  $ Race              :"White / Caucasian"
#>  $ Limb              :"Ankle"
#>  $ Side              :"Left"
#>  $ Dominance         :"Non-Dominant"
#>  $ DateOfBirth       :"621132192000000000"
#>  $ Subject Name      :"GT3XPlus"
#>  $ Serial Prefix     :"NEO"
#>  $ Last Sample Time  : 'POSIXct' num(0) 
#>  - attr(*, "tzone")= chr "GMT"
#>  $ Acceleration Scale:341
#> Parsing GT3X data via CPP.. expected sample size: 2700000
#> Using NHANES-GT3X format - older format
#> Sample size: 2700000
#> Scaling...
#> Lux Sample size: 2700000
#> Done (in 0.606827974319458 seconds)
#> Sampling Rate: 30Hz
#> Firmware Version: 2.5.0
#> Serial Number Prefix: NEO
#>         X Y Z                time
#> 2699995 0 0 0 2012-06-30 22:13:59
#> 2699996 0 0 0 2012-06-30 22:13:59
#> 2699997 0 0 0 2012-06-30 22:13:59
#> 2699998 0 0 0 2012-06-30 22:13:59
#> 2699999 0 0 0 2012-06-30 22:13:59
#> 2700000 0 0 0 2012-06-30 22:13:59

We see above that the last date/time is on 6/30, but the download date is from 6/28!

We see that here as well:

last_time = act_df$time[ nrow(act_df)]
last_time > at$header$`Download Date`
#> [1] TRUE
last_time > at$header$`Stop Date`
#> [1] TRUE

If we just do the start time and then add in the milliseconds until the end we get times before the download date/time:

act_df$time2 = at$start_time + 
    unit = "secs")

last_time2 = act_df$time2[ nrow(act_df)]
last_time2 == at$header$`Download Date`
#> [1] FALSE
last_time2 == at$header$`Stop Date`
#> [1] TRUE


Here is the CSV from ActiLife:

destfile = tempfile(fileext = ".csv.gz")
  destfile = destfile
df = readr::read_csv(destfile, skip = 10)
#> Parsed with column specification:
#> cols(
#>   `Accelerometer X` = col_double(),
#>   `Accelerometer Y` = col_double(),
#>   `Accelerometer Z` = col_double()
#> )
colnames(df) = sub("Accelerometer ", "", colnames(df))

Again, the header is added in the CSV and we'll parse it. Again showing that the header has a download date/time that's past the time from read.gt3x output:

hdr = readLines(destfile, 10)
dl = hdr[ grepl("Download", hdr)]
#> [1] "Download Time 16:25:52"  "Download Date 6/28/2012"
dl_time = trimws(gsub("[[:alpha:]]", "", dl[grepl("Time", dl)]))

dl_date = trimws(gsub("[[:alpha:]]", "", dl[grepl("Date", dl)]))
dl_date = lubridate::mdy_hms(paste(dl_date, dl_time))

last_time > dl_date 
#> [1] TRUE

last_time - dl_date 
#> Time difference of 2.241758 days

We also see some oddities that ActiLife is missing 42 elements (though I think they're zero):

#> [1] 2700000       5
#> [1] 2699958       3
#> # A tibble: 6 x 3
#>       X     Y     Z
#>   <dbl> <dbl> <dbl>
#> 1 0.469 0.707 0.522
#> 2 0.472 0.707 0.519
#> 3 0.466 0.71  0.522
#> 4 0.466 0.707 0.522
#> 5 0.469 0.707 0.522
#> 6 0.469 0.707 0.522
#> Sampling Rate: 30Hz
#> Firmware Version: 2.5.0
#> Serial Number Prefix: NEO
#>         X Y Z                time               time2
#> 2699995 0 0 0 2012-06-30 22:13:59 2012-06-28 11:53:59
#> 2699996 0 0 0 2012-06-30 22:13:59 2012-06-28 11:53:59
#> 2699997 0 0 0 2012-06-30 22:13:59 2012-06-28 11:53:59
#> 2699998 0 0 0 2012-06-30 22:13:59 2012-06-28 11:53:59
#> 2699999 0 0 0 2012-06-30 22:13:59 2012-06-28 11:53:59
#> 2700000 0 0 0 2012-06-30 22:13:59 2012-06-28 11:54:00

Here we'll truncate those values and see if things match:

act_df = act_df[1:nrow(df), ]
all(df[, c("X", "Y", "Z")] == act_df[, c("X", "Y", "Z")])
#> [1] FALSE

They do not match!

Oddly if you look at this, if we switch up X and Y, we get the same results. I'm looking at this now, and it's because o f - it's Y, X, Z!

# switching up X and Y
all(df[, c("Y", "X", "Z")] == act_df[, c("X", "Y", "Z")])
#> [1] TRUE

This is still an issue with this file - the time still does not read correctly.