Following the weak_supervision.ipynb
gerileka opened this issue · 1 comments
LABEL_STUDIO_URL = 'http://localhost:8080'
LABEL_STUDIO_API_KEY = 'XXXXXXX'
from label_studio_sdk import Client
ls = Client(url=LABEL_STUDIO_URL, api_key=LABEL_STUDIO_API_KEY)
ls.check_connection()
from label_studio_sdk.project import ProjectSampling
project = ls.start_project(
title='AL Project Created from SDK',
label_config='''
<View>
<Text name="text" value="$text"/>
<Choices name="sentiment" toName="text" choice="single" showInLine="true">
<Choice value="Positive"/>
<Choice value="Negative"/>
<Choice value="Neutral"/>
</Choices>
</View>
'''
)
project.set_sampling(ProjectSampling.UNCERTAINTY)
import pandas as pd
tasks = pd.read_csv('https://raw.githubusercontent.com/jdvelasq/datalabs/master/datasets/amazon_cells_labelled.tsv', sep='\t')
tasks.rename(columns={tasks.columns[0]:"text",tasks.columns[1]:"value"},inplace=True)
tasks = tasks.to_dict('records')
tasks_ids = project.import_tasks(tasks)
knowing that task look like this :
[{'text': 'So there is no way for me to plug it in here in the US unless I go by a converter.',
'value': 0.0},
{'text': 'Good case, Excellent value.', 'value': 1.0},
{'text': 'I thought Motorola made reliable products!.', 'value': nan},
{'text': 'Battery for Motorola Razr.', 'value': nan},
{'text': 'Great for the jawbone.', 'value': 1.0},
{'text': 'When I got this item it was larger than I thought It was the size of a small video game but bulky.',
'value': nan},
{'text': '(I looked for one that specifically said DCU-60 and said it supported USB 1 and 2.',
'value': nan}]
apparently nan values are not accepted. Exemple this specific dict in the list :
{'text': 'I thought Motorola made reliable products!.', 'value': nan}
a fillna("") with empty resolved the problem