okfn-brasil/serenata-toolbox

Chamber of deputies: ValueError: could not convert string to float

Closed this issue · 3 comments

When trying to translate the chamber of deputies files I'm getting a ValueError: could not convert string to float error. It's related to thisconverter:

def _parse_float(self, string):
    return float(string.replace(',', '.'))
In [10]: d.translate()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-10-34514b74cf15> in <module>()
----> 1 d.translate()

/Users/cuducos/serenata-toolbox/serenata_toolbox/chamber_of_deputies/dataset.py in translate(self)
     32         for year in self.years:
     33             csv_path = os.path.join(self.path, 'Ano-{}.csv'.format(year))
---> 34             self._translate_file(csv_path)
     35
     36     def clean(self):

/Users/cuducos/serenata-toolbox/serenata_toolbox/chamber_of_deputies/dataset.py in _translate_file(self, csv_path)
     57                                        'vlrGlosa': self._parse_float,
     58                                        'vlrLiquido': self._parse_float,
---> 59                                        'vlrRestituicao': self._parse_float})
     60
     61         data.rename(columns={

/Users/cuducos/.virtualenvs/serenata-toolbox/lib/python3.5/site-packages/pandas-0.19.1-py3.5-macosx-10.11-x86_64.egg/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    643                     skip_blank_lines=skip_blank_lines)
    644
--> 645         return _read(filepath_or_buffer, kwds)
    646
    647     parser_f.__name__ = name

/Users/cuducos/.virtualenvs/serenata-toolbox/lib/python3.5/site-packages/pandas-0.19.1-py3.5-macosx-10.11-x86_64.egg/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    398         return parser
    399
--> 400     data = parser.read()
    401     parser.close()
    402     return data

/Users/cuducos/.virtualenvs/serenata-toolbox/lib/python3.5/site-packages/pandas-0.19.1-py3.5-macosx-10.11-x86_64.egg/pandas/io/parsers.py in read(self, nrows)
    936                 raise ValueError('skipfooter not supported for iteration')
    937
--> 938         ret = self._engine.read(nrows)
    939
    940         if self.options.get('as_recarray'):

/Users/cuducos/.virtualenvs/serenata-toolbox/lib/python3.5/site-packages/pandas-0.19.1-py3.5-macosx-10.11-x86_64.egg/pandas/io/parsers.py in read(self, nrows)
   1505     def read(self, nrows=None):
   1506         try:
-> 1507             data = self._reader.read(nrows)
   1508         except StopIteration:
   1509             if self._first_chunk:

pandas/parser.pyx in pandas.parser.TextReader.read (pandas/parser.c:9935)()

pandas/parser.pyx in pandas.parser.TextReader._read_low_memory (pandas/parser.c:10193)()

pandas/parser.pyx in pandas.parser.TextReader._read_rows (pandas/parser.c:11212)()

pandas/parser.pyx in pandas.parser.TextReader._convert_column_data (pandas/parser.c:12554)()

pandas/parser.pyx in pandas.parser._apply_converter (pandas/parser.c:27555)()

/Users/cuducos/serenata-toolbox/serenata_toolbox/chamber_of_deputies/dataset.py in _parse_float(self, string)
    125
    126     def _parse_float(self, string):
--> 127         return float(string.replace(',', '.'))

ValueError: could not convert string to float:

One idea is to using the decimal argument in the read_csv instead of a function to handle the conversion of number such as 3,1415 to 3.1415.

In spite of that I must confess it might be an external issue: maybe the Chamber's server changed the format or the server itself if not as we expected it to be…

Coincidently I have just found the same error:

In [4]: chamber.translate()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-4-76d232f4fc7d> in <module>()
----> 1 chamber.translate()

c:\users\rodolfoviana\documents\code\serenata-de-amor\serenata-toolbox\serenata_
toolbox\chamber_of_deputies\dataset.py in translate(self)
     36         for year in self.years:
     37             csv_path = os.path.join(self.path, 'Ano-{}.csv'.format(year)
)
---> 38             self.__translate_file(csv_path)
     39
     40     def clean(self):

c:\users\rodolfoviana\documents\code\serenata-de-amor\serenata-toolbox\serenata_
toolbox\chamber_of_deputies\dataset.py in __translate_file(self, csv_path)
     61                                        'vlrGlosa': self.__parse_float,
     62                                        'vlrLiquido': self.__parse_float,

---> 63                                        'vlrRestituicao': self.__parse_fl
oat})
     64
     65         data.rename(columns={

C:\Users\rodolfoviana\AppData\Local\conda\conda\envs\serenata_de_amor\lib\site-p
ackages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, hea
der, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine
, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_v
alues, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer
_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, com
pression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, co
mment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfo
oter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use
_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    653                     skip_blank_lines=skip_blank_lines)
    654
--> 655         return _read(filepath_or_buffer, kwds)
    656
    657     parser_f.__name__ = name

C:\Users\rodolfoviana\AppData\Local\conda\conda\envs\serenata_de_amor\lib\site-p
ackages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
    409
    410     try:
--> 411         data = parser.read(nrows)
    412     finally:
    413         parser.close()

C:\Users\rodolfoviana\AppData\Local\conda\conda\envs\serenata_de_amor\lib\site-p
ackages\pandas\io\parsers.py in read(self, nrows)
   1003                 raise ValueError('skipfooter not supported for iteration
')
   1004
-> 1005         ret = self._engine.read(nrows)
   1006
   1007         if self.options.get('as_recarray'):

C:\Users\rodolfoviana\AppData\Local\conda\conda\envs\serenata_de_amor\lib\site-p
ackages\pandas\io\parsers.py in read(self, nrows)
   1746     def read(self, nrows=None):
   1747         try:
-> 1748             data = self._reader.read(nrows)
   1749         except StopIteration:
   1750             if self._first_chunk:

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.read (pandas\_libs\p
arsers.c:10862)()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._read_low_memory (pa
ndas\_libs\parsers.c:11138)()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._read_rows (pandas\_
libs\parsers.c:12175)()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._convert_column_data
 (pandas\_libs\parsers.c:14103)()

pandas/_libs/parsers.pyx in pandas._libs.parsers._apply_converter (pandas\_libs\
parsers.c:30644)()

c:\users\rodolfoviana\documents\code\serenata-de-amor\serenata-toolbox\serenata_
toolbox\chamber_of_deputies\dataset.py in __parse_float(self, string)
    129
    130     def __parse_float(self, string):
--> 131         return float(string.replace(',', '.'))

ValueError: could not convert string to float:

By the way I checked every .csv file and they use ,. So I believe they did not change the format.

Fixed by #124