JSON Serialization Error
scottiegarcia opened this issue · 2 comments
scottiegarcia commented
General Information:
- OS: MacOS Sonoma 14.3.1
- Python version: 3.10
- Library version: 0.10.8
Describe the bug:
JSON Serialization errors due to datetime object not getting serialized beforehand
To Reproduce:
from dataprofiler import Profiler
import pandas as pd
df = pd.DataFrame({"test": [1209214]})
dp = Profiler(df)
dp.save("test.json", save_method="json")
Expected behavior:
Dataprofiler should serialize all datetime attributes to strings before saving to JSON, and deserialize on load
Screenshots:
Traceback
Traceback (most recent call last):
File "/Projects/helpful-scripts/bug2.py", line 10, in <module>
dp.save("test.json", save_method="json")
File "/miniconda3/envs/monitoring/lib/python3.10/site-packages/dataprofiler/profilers/profile_builder.py", line 3070, in save
self._json_save_helper(filepath)
File "/miniconda3/envs/monitoring/lib/python3.10/site-packages/dataprofiler/profilers/profile_builder.py", line 1155, in _json_save_helper
json.dump(self, f, cls=ProfileEncoder)
File "/miniconda3/envs/monitoring/lib/python3.10/json/__init__.py", line 179, in dump
for chunk in iterable:
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 439, in _iterencode
yield from _iterencode(o, _current_indent_level)
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 431, in _iterencode
yield from _iterencode_dict(o, _current_indent_level)
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
yield from chunks
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
yield from chunks
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 325, in _iterencode_list
yield from chunks
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 439, in _iterencode
yield from _iterencode(o, _current_indent_level)
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 431, in _iterencode
yield from _iterencode_dict(o, _current_indent_level)
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
yield from chunks
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
yield from chunks
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
yield from chunks
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 439, in _iterencode
yield from _iterencode(o, _current_indent_level)
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 431, in _iterencode
yield from _iterencode_dict(o, _current_indent_level)
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
yield from chunks
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
yield from chunks
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
yield from chunks
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 439, in _iterencode
yield from _iterencode(o, _current_indent_level)
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 431, in _iterencode
yield from _iterencode_dict(o, _current_indent_level)
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
yield from chunks
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
yield from chunks
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 438, in _iterencode
o = _default(o)
File "/miniconda3/envs/monitoring/lib/python3.10/site-packages/dataprofiler/profilers/json_encoder.py", line 71, in default
return json.JSONEncoder.default(self, to_serialize)
File "/miniconda3/envs/monitoring/lib/python3.10/json/encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type datetime is not JSON serializable
taylorfturner commented
@scottiegarcia good to close?
scottiegarcia commented
Unfortunately no. I needed to test loading as well and am running into errors there.
Code snippet below
from dataprofiler import Profiler
import pandas as pd
import os
df = pd.DataFrame({"a": [130277]})
dp = Profiler(df)
dp.save("test.json", save_method="json")
dp = Profiler.load("test.json", load_method="json")
stacktrace
Traceback (most recent call last):
File "/helpful-scripts/bug.py", line 12, in <module>
dp = Profiler.load("test.json", load_method="json")
File "/DataProfiler/dataprofiler/profilers/profile_builder.py", line 3160, in load
return BaseProfiler.load(filepath, load_method)
File "/DataProfiler/dataprofiler/profilers/profile_builder.py", line 1201, in load
return load_profiler(json.load(infile), {})
File "/DataProfiler/dataprofiler/profilers/json_decoder.py", line 246, in load_profiler
return profiler_cls.load_from_dict(serialized_json["data"], config)
File "/DataProfiler/dataprofiler/profilers/profile_builder.py", line 2142, in load_from_dict
structured_profiler = super().load_from_dict(data, config)
File "/DataProfiler/dataprofiler/profilers/profile_builder.py", line 913, in load_from_dict
value[idx] = load_structured_col_profiler(profile, config)
File "/DataProfiler/dataprofiler/profilers/json_decoder.py", line 276, in load_structured_col_profiler
return profiler_cls.load_from_dict(serialized_json["data"], config)
File "/DataProfiler/dataprofiler/profilers/profile_builder.py", line 413, in load_from_dict
value[profile_key] = load_compiler(profile_value, config)
File "/DataProfiler/dataprofiler/profilers/json_decoder.py", line 191, in load_compiler
return column_profiler_cls.load_from_dict(serialized_json["data"], config)
File "/DataProfiler/dataprofiler/profilers/column_profile_compilers.py", line 242, in load_from_dict
value[col_type] = load_column_profile(profile_as_dict, config)
File "/DataProfiler/dataprofiler/profilers/json_decoder.py", line 159, in load_column_profile
return column_profiler_cls.load_from_dict(serialized_json["data"], config)
File "/DataProfiler/dataprofiler/profilers/datetime_column_profile.py", line 152, in load_from_dict
profile._dt_obj_min = pd.Timestamp(profile._dt_obj_min)
File "pandas/_libs/tslibs/timestamps.pyx", line 1698, in pandas._libs.tslibs.timestamps.Timestamp.__new__
File "pandas/_libs/tslibs/conversion.pyx", line 249, in pandas._libs.tslibs.conversion.convert_to_tsobject
File "pandas/_libs/tslibs/conversion.pyx", line 523, in pandas._libs.tslibs.conversion._convert_str_to_tsobject
File "pandas/_libs/tslibs/conversion.pyx", line 506, in pandas._libs.tslibs.conversion._convert_str_to_tsobject
File "pandas/_libs/tslibs/np_datetime.pyx", line 212, in pandas._libs.tslibs.np_datetime.check_dts_bounds
pandas._libs.tslibs.np_datetime.OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 277-01-03 00:00:00