englianhu/binary.com-interview-question

在`ts`、`xts`、`matrix`、`zoo`格式上使用`auto.arima`

englianhu opened this issue · 1 comments

tsxtsmatrix格式上使用auto.arima

数据来源:猫城@englianhu/binary.com-interview-question-data/世博量化研究院/文艺数据库/fx/USDJPY/样本2.rds

1 GiB [世博量化研究院*]❯ 样本2 <- readRDS("~/文档/猫城/binary.com-interview-question-data/文艺数据库/fx/USDJPY/样本2.rds")
✖ 1 GiB [世博量化研究院*]❯ 测试数据 <- 样本2[10000:11200, c('年月日时分', '闭市价')]
✔ 1 GiB [世博量化研究院*]❯ 测试数据 %>% data.frame %>% head
           年月日时分                  闭市价
1 2015-01-13 22:40:00 117.9290000000000020464
2 2015-01-13 22:41:00 117.8984999999999985221
3 2015-01-13 22:42:00 117.9070000000000106866
4 2015-01-13 22:43:00 117.8985000000000127329
5 2015-01-13 22:44:00 117.8849999999999909051
6 2015-01-13 22:45:00 117.85849999999999226931 GiB [世博量化研究院*]❯ 
✔ 1 GiB [世博量化研究院*]❯ 测试数据 %>% data.table::as.matrix(rownames = TRUE) %>% auto.arima
Series: . 
ARIMA(0,1,0) 

sigma^2 = 0.0009413966542781437592:  log likelihood = 2485.590000000000145519
AIC=-4969.189999999999599822   AICc=-4969.180000000000291038   BIC=-4964.1000000000003637981 GiB [世博量化研究院*]❯ 测试数据 %>% as.xts %>% auto.arima
Series: . 
ARIMA(0,1,0) 

sigma^2 = 0.0009413966542781437592:  log likelihood = 2485.590000000000145519
AIC=-4969.189999999999599822   AICc=-4969.180000000000291038   BIC=-4964.1000000000003637981 GiB [世博量化研究院*]❯ 测试数据 %>% as.xts %>% ts %>% auto.arima
Series: . 
ARIMA(0,1,0) 

sigma^2 = 0.0009413966542781437592:  log likelihood = 2485.590000000000145519
AIC=-4969.189999999999599822   AICc=-4969.180000000000291038   BIC=-4964.1000000000003637981 GiB [世博量化研究院*]❯ 测试数据 %>% as.xts %>% ts(frequency = 1200) %>% auto.arima
Series: . 
ARIMA(0,1,0) 

sigma^2 = 0.0009413966542781437592:  log likelihood = 2485.590000000000145519
AIC=-4969.189999999999599822   AICc=-4969.180000000000291038   BIC=-4964.1000000000003637981 GiB [世博量化研究院*]❯ 测试数据 %>% as.xts %>% ts(frequency = 120) %>% auto.arima
Series: . 
ARIMA(0,1,0) 

sigma^2 = 0.0009413966542781437592:  log likelihood = 2485.590000000000145519
AIC=-4969.189999999999599822   AICc=-4969.180000000000291038   BIC=-4964.1000000000003637981 GiB [世博量化研究院*]❯ matrix(测试数据$闭市价, dimnames = list(测试数据$年月日时分, '闭市价'), ncol = 1) %>% auto.arima
Series: . 
ARIMA(0,1,0) 

sigma^2 = 0.0009413966542781437592:  log likelihood = 2485.590000000000145519
AIC=-4969.189999999999599822   AICc=-4969.180000000000291038   BIC=-4964.1000000000003637981 GiB [世博量化研究院*]❯ matrix(测试数据$闭市价, dimnames = list(测试数据$年月日时分, '闭市价'), ncol = 1) %>% as.ts %>% auto.arima
Series: . 
ARIMA(0,1,0) 

sigma^2 = 0.0009413966542781437592:  log likelihood = 2485.590000000000145519
AIC=-4969.189999999999599822   AICc=-4969.180000000000291038   BIC=-4964.1000000000003637981 GiB [世博量化研究院*]❯ matrix(测试数据$闭市价, dimnames = list(测试数据$年月日时分, '闭市价'), ncol = 1) %>% as.zoo %>% auto.arima
Series: . 
ARIMA(0,1,0) 

sigma^2 = 0.0009413966542781437592:  log likelihood = 2485.590000000000145519
AIC=-4969.189999999999599822   AICc=-4969.180000000000291038   BIC=-4964.100000000000363798

测试一下不同函数有何分别,即使设置个frequency,结果都是一样...

1.5 GiB [世博量化研究院*]❯ microbenchmark(
     'as.matrix()' = as.matrix(测试数据, rownames = TRUE) %>% auto.arima, 
     'as.matrix %>% ' = 测试数据 %>% as.matrix(rownames = TRUE) %>% auto.arima, 
     'as.xts' = 测试数据 %>% as.xts %>% auto.arima, 
     'as.xts %>% as.ts' = 测试数据 %>% as.xts %>% ts %>% auto.arima, 
     'as.ts(freq = 1200)' = 测试数据 %>% as.xts %>% ts(frequency = 1200) %>% auto.arima, 
     'as.ts(freq = 120)' = 测试数据 %>% as.xts %>% ts(frequency = 120) %>% auto.arima, 
     'matrix %>% as.ts' = matrix(测试数据$闭市价, dimnames = list(测试数据$年月日时分, '闭市价'), ncol = 1) %>% as.ts %>% auto.arima, 
     'matrix %>% as.zoo' = matrix(测试数据$闭市价, dimnames = list(测试数据$年月日时分, '闭市价'), ncol = 1) %>% as.zoo %>% auto.arima, 
     'matrix' = matrix(测试数据$闭市价, dimnames = list(测试数据$年月日时分, '闭市价'), ncol = 1) %>% auto.arima)
Unit: milliseconds
               expr                       min                        lq                      mean
        as.matrix()   43.51706399999999774764   44.45820700000000158525   51.01456184999999976526
     as.matrix %>%    43.60379600000000266391   44.43160650000000089221   52.06498675000000275759
             as.xts   42.19390899999999788861   43.44118149999999900501   52.29102436000000153626
   as.xts %>% as.ts   42.21445899999999795682   43.00931149999999547617   51.63839049999999986085
 as.ts(freq = 1200)   42.24664899999999789770   43.10529650000000145837   51.77428743000000110897
  as.ts(freq = 120) 2392.39838700000018434366 2411.68259299999999711872 2462.60519316999989314354
   matrix %>% as.ts   41.50966199999999872716   42.22200399999999831380   50.11591734999999658839
  matrix %>% as.zoo   46.18885300000000171394   46.91678300000000234604   58.44752450000000010277
             matrix   41.58900100000000321643   42.62158850000000143154   49.24968701000000237400
                    median                        uq                       max neval
   45.56872500000000059117   48.29254650000000026466  113.00312399999999968259   100
   45.77284600000000125419   52.01385399999999492593   98.40117499999999495230   100
   44.63580299999999567717   49.53256000000000369710  126.15233999999999525699   100
   43.99932599999999638385   49.67516450000000105547   99.18939199999999800639   100
   44.28173999999999921329   49.64366749999999939291  121.05843600000000037653   100
 2427.76964899999984481838 2455.14120800000000599539 3041.27480900000000474392   100
   43.36704900000000151294   47.44317900000000065575  150.78695899999999596730   100
   48.47124600000000071987   61.96651900000000523505  222.77181400000000621731   100
   43.92246000000000094587   48.42570549999999940383  111.59604899999999361171   100

参考资源

1.2 GiB [世博量化研究院*]❯ matrix(测试数据$闭市价, dimnames = list(测试数据$年月日时分, '闭市价'), ncol = 1) %>% zoo(frequency = 1) %>% auto.arima
Series: . 
ARIMA(0,1,0) 

sigma^2 = 0.0009413966542781437592:  log likelihood = 2485.590000000000145519
AIC=-4969.189999999999599822   AICc=-4969.180000000000291038   BIC=-4964.1000000000003637981.2 GiB [世博量化研究院*]❯ matrix(测试数据$闭市价, dimnames = list(测试数据$年月日时分, '闭市价'), ncol = 1) %>% zoo(frequency = 10) %>% auto.arima
Series: . 
ARIMA(0,0,0)(0,1,0)[10] 

sigma^2 = 0.0009485104828998929894:  log likelihood = 2466.949999999999818101
AIC=-4931.899999999999636202   AICc=-4931.899999999999636202   BIC=-4924.5100000000002182791.2 GiB [世博量化研究院*]❯ matrix(测试数据$闭市价, dimnames = list(测试数据$年月日时分, '闭市价'), ncol = 1) %>% zoo(frequency = 12) %>% auto.arima
Series: . 
ARIMA(0,0,0)(0,1,0)[12] 

sigma^2 = 0.0009501059589014066538:  log likelihood = 2462.80999999999994543
AIC=-4923.619999999999890861   AICc=-4923.619999999999890861   BIC=-4916.039999999999963621.2 GiB [世博量化研究院*]❯ matrix(测试数据$闭市价, dimnames = list(测试数据$年月日时分, '闭市价'), ncol = 1) %>% zoo(frequency = 100) %>% auto.arima
Series: . 
ARIMA(0,0,0)(0,1,0)[100] 

sigma^2 = 0.001026045399758194815:  log likelihood = 2280.530000000000200089
AIC=-4559.060000000000400178   AICc=-4559.060000000000400178   BIC=-4549.3699999999998908611.2 GiB [世博量化研究院*]❯ matrix(测试数据$闭市价, dimnames = list(测试数据$年月日时分, '闭市价'), ncol = 1) %>% zoo(frequency = 120) %>% auto.arima
Series: .
ARIMA(0,0,0)(0,1,0)[120]

sigma^2 = 0.001045028663398494487:  log likelihood = 2239.110000000000127329
AIC=-4476.21000000000003638   AICc=-4476.21000000000003638   BIC=-4466.329999999999927241.2 GiB [世博量化研究院*]❯ matrix(测试数据$闭市价, dimnames = list(测试数据$年月日时分, '闭市价'), ncol = 1) %>% zoo(frequency = 1000) %>% auto.arima
Series: .
ARIMA(0,0,0) with non-zero mean

Coefficients:
                             mean
      117.19461532056617159014422
s.e.    0.01308706771212796507453

sigma^2 = 0.2058672768984180779:  log likelihood = -754.5399999999999636202
AIC=1513.07999999999992724   AICc=1513.07999999999992724   BIC=1537.079999999999927241.2 GiB [世博量化研究院*]❯ matrix(测试数据$闭市价, dimnames = list(测试数据$年月日时分, '闭市价'), ncol = 1) %>% zoo(frequency = 1200) %>% auto.arima
Series: . 
ARIMA(0,0,0) with non-zero mean 

Coefficients:
                              mean
      117.19461532056617159014422
s.e.    0.01308706771212796507453

sigma^2 = 0.2058672768984180779:  log likelihood = -754.5399999999999636202
AIC=1513.07999999999992724   AICc=1513.07999999999992724   BIC=1537.44000000000005457

以上设置zoo(frequency = 频率)循环周期,运算出的结果不一样...