CagtayFabry/pytcs

datatable `NaN` columns with fill

Opened this issue · 0 comments

There is a bug in the 1.0.0 version of datatable with unequal column layouts
This breaks with many export formats

1.0.0 ERROR

import datatable

test = """\
1.,2,3,4,5,6,7,8,9,10
1.,2,3,4,5,6,7,8,9,10
1.,2,3,4,5,6,7
1.,2,3,4,5,6,7
1.,2, ,4,5,6,7
  ,2,3,4,5,6,7
  ,2,3,4,5,6,7"""

print(datatable.__version__)
#> 1.0.0
print(datatable.fread(test, fill=True))
#> |      C0     C1     C2     C3     C4     C5     C6     C7     C8     C9
#>    | float64  int32  int32  int32  int32  int32  int32  int32  int32  int32
#> -- + -------  -----  -----  -----  -----  -----  -----  -----  -----  -----
#>  0 |       1      2      3      4      5      6      7      8      9     10
#>  1 |       1      2      3      4      5      6      7      8      9     10
#>  2 |       1      2      3      4      5      6      7     NA      7      7
#>  3 |       1      2      3      4      5      6      7     NA      7      7
#>  4 |       1      2     NA      4      5      6      7      7      7      7
#>  5 |      NA      2      3      4      5      6      7      7      7      7
#>  6 |      NA      2      3      4      5      6      7     NA     NA     NA
#> [7 rows x 10 columns]

1.1.0a0 FIXED

dev installs:
https://datatable.readthedocs.io/en/latest/start/install.html#install-latest-dev-version

import datatable

test = """\
1.,2,3,4,5,6,7,8,9,10
1.,2,3,4,5,6,7,8,9,10
1.,2,3,4,5,6,7
1.,2,3,4,5,6,7
1.,2, ,4,5,6,7
  ,2,3,4,5,6,7
  ,2,3,4,5,6,7"""

print(datatable.__version__)
#> 1.1.0a0+pr3381.2202
print(datatable.fread(test, fill=True))
#>    |      C0     C1     C2     C3     C4     C5     C6     C7     C8     C9
#>    | float64  int32  int32  int32  int32  int32  int32  int32  int32  int32
#> -- + -------  -----  -----  -----  -----  -----  -----  -----  -----  -----
#>  0 |       1      2      3      4      5      6      7      8      9     10
#>  1 |       1      2      3      4      5      6      7      8      9     10
#>  2 |       1      2      3      4      5      6      7     NA     NA     NA
#>  3 |       1      2      3      4      5      6      7     NA     NA     NA
#>  4 |       1      2     NA      4      5      6      7     NA     NA     NA
#>  5 |      NA      2      3      4      5      6      7     NA     NA     NA
#>  6 |      NA      2      3      4      5      6      7     NA     NA     NA
#> [7 rows x 10 columns]