statnett/data_cache

caching error in pandas cache

Khaled-Abdelhamid opened this issue · 1 comments

@pandas_cache
def get_sentiment(text_content: str):
    """
    Classifying Content in a String

    Args:
      text_content The text content to analyze. Must include at least 20 words.
    """

    client = language_v1.LanguageServiceClient.from_service_account_json(filename=GOOGLE_APPLICATION_CREDENTIALS)

    # text_content = 'That actor on TV makes movies in Hollywood and also stars in a variety of popular new TV shows.'

    # Available types: PLAIN_TEXT, HTML
    type_ = language_v1.Document.Type.PLAIN_TEXT

    # Optional. If not specified, the language is automatically detected.
    # For list of supported languages:
    # https://cloud.google.com/natural-language/docs/languages
    language = "en"
    document = {"content": text_content, "type_": type_, "language": language}

    response = client.analyze_sentiment(request={"document": document})
    out=analyse_response(response)
    return out

@pandas_cache
def analyse_response(response)->Tuple[str]:
    magnitude=response.document_sentiment.magnitude
    score=response.document_sentiment.score

    if -.1<magnitude<.1 or -.1<score<.1:
        label= "neutral"
        
    elif score>=.1:
        label= "positive"
        
    elif score<=-.1:
        label= "negative"

    # print("magnitude: ",magnitude)
    # print("score: ",score)
    # print("label: ",label)
    return magnitude ,score ,label

text = """i'm soo happy""".strip()

res=get_sentiment(text)

Gives the error

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/tmp/ipykernel_6589/3336704012.py in <module>
      2 text = """i'm soo happy""".strip()
      3 
----> 4 res=get_sentiment(text)

~/miniconda3/lib/python3.7/site-packages/data_cache/cache_tools.py in wrapped(*f_args, **f_kwargs)
    210                 group = "a" + hashlib.md5(inspect.getsource(func).encode("utf-8")).hexdigest()
    211                 key = "a" + hashlib.md5(json.dumps(extracted_args).encode("utf-8")).hexdigest()
--> 212                 return table_getter(group, key, func, f_args, f_kwargs, extracted_args)
    213 
    214             return wrapped

~/miniconda3/lib/python3.7/site-packages/data_cache/cache_tools.py in store_func(func_key, arg_key, func, f_args, f_kwargs, metadata)
    172             if isinstance(data, tuple):
    173                 for i, data_ in enumerate(data):
--> 174                     store.create_dataset(f"{path}/data{i}{suffix}", data=data_)
    175                     add_metadata(store[f"{path}/data{i}"], func, metadata)
    176             else:

~/miniconda3/lib/python3.7/site-packages/data_cache/cache_tools.py in create_dataset(self, key, data)
     63 class PandasStore(pd.HDFStore):
     64     def create_dataset(self, key: str, data: pd.DataFrame) -> None:
---> 65         data.to_hdf(super(), key)
     66 
     67     def __getitem__(self, key: str) -> PandasGroup:

AttributeError: 'float' object has no attribute 'to_hdf'

system details

OS: Manjaro Linux x86_64
Host: Inspiron 5567
Kernel: 5.15.57-2-MANJARO
Packages: 1589 (pacman), 6 (flatpak), 9 (snap)
Shell: zsh 5.9
python: python3.7
Terminal: vscode
CPU: Intel i7-7500U (4) @ 3.500GHz
GPU: AMD ATI Radeon R7 M260/M265 / M340/M360 / M440/M445 / 530/535 / 620/625 Mobile
GPU: Intel HD Graphics 620
Memory: 7031MiB / 15899MiB

Hey, the pandas_cache is only meant for functions that return pandas DataFrames. For this to work with a tuple of strings you would have to write a StoreClass which handles writing and reading the string-tuples to/from disk.