CSV export of SparQL result: missing parentheses?
NicoRobertIn opened this issue · 2 comments
Issue Description:
It seems that the CSV export of SparQL requests can forget quotes... Am I missing something?
Steps to Reproduce:
In a python script
from subprocess import Popen, DEVNULL
from time import sleep
from atexit import register
from py4j.java_gateway import JavaGateway
# Start java gateway
java_process = Popen(
['java', '-jar', '-Dfile.encoding=UTF-8', 'corese-library-python-4.4.1.jar'],
stdout=DEVNULL,
stderr=DEVNULL
)
# Waiting for the java server to start up
sleep(1)
gateway = JavaGateway()
register(gateway.shutdown)
# Import of class
Graph = gateway.jvm.fr.inria.corese.core.Graph
Load = gateway.jvm.fr.inria.corese.core.load.Load
QueryProcess = gateway.jvm.fr.inria.corese.core.query.QueryProcess
ResultFormat = gateway.jvm.fr.inria.corese.core.print.ResultFormat
TEST_CSV = 14
def load(path):
"""Load a graph from a local file or a URL
:param path: local path or a URL or a list of these
:returns: the graph load
"""
graph = Graph()
ld = Load.create(graph)
try:
graph = Graph()
ld = Load.create(graph)
ld.parse(path)
return graph
except Exception as e:
# syntax_errors = parse_syntax_errors()
print("error", str(e).strip())
def query_graph(graph, query):
query_process = QueryProcess.create(graph)
abstract_syntax_tree = query_process.ast(query)
mappings = query_process.query(abstract_syntax_tree)
resultFormater = ResultFormat.create(mappings)
resultFormater.setSelectFormat(TEST_CSV)
resultFormater.setConstructFormat(TEST_CSV)
result = resultFormater.toString()
return result
graph = load("https://raw.githubusercontent.com/stardog-union/stardog-tutorials/master/music/beatles.ttl")
print(query_graph(graph, "SELECT * WHERE {?s ?p ?o}"))
Expected Behavior:
The output should be readable by pandas
Actual Behavior:
It is not readable by pandas, some lines are missing parentheses, an example of a line that way:
http://stardog.com/tutorial/Please_Please_Me, http://stardog.com/tutorial/name, Please Please Me
Step to reproduces:
Get a csv named OSG_dimension-(v1.1).ttl
at path: https://data.world/bryon/qudt-1-1
Then execute this python code to have the following pandas parsing error:
ParserError: Error tokenizing data. C error: Expected 3 fields in line 14, saw 4
from subprocess import Popen, DEVNULL
from time import sleep
from atexit import register
from py4j.java_gateway import JavaGateway
import pandas as pds
from io import StringIO
# Start java gateway
java_process = Popen(
['java', '-jar', '-Dfile.encoding=UTF-8', 'corese-library-python-4.4.1.jar'],
stdout=DEVNULL,
stderr=DEVNULL
)
# Waiting for the java server to start up
sleep(1)
gateway = JavaGateway()
register(gateway.shutdown)
# Import of class
Graph = gateway.jvm.fr.inria.corese.core.Graph
Load = gateway.jvm.fr.inria.corese.core.load.Load
QueryProcess = gateway.jvm.fr.inria.corese.core.query.QueryProcess
ResultFormat = gateway.jvm.fr.inria.corese.core.print.ResultFormat
TEST_CSV = 14
def load(path):
"""Load a graph from a local file or a URL
:param path: local path or a URL or a list of these
:returns: the graph load
"""
graph = Graph()
ld = Load.create(graph)
try:
graph = Graph()
ld = Load.create(graph)
ld.parse(path)
return graph
except Exception as e:
# syntax_errors = parse_syntax_errors()
print("error", str(e).strip())
def query_graph(graph, query):
query_process = QueryProcess.create(graph)
abstract_syntax_tree = query_process.ast(query)
mappings = query_process.query(abstract_syntax_tree)
resultFormater = ResultFormat.create(mappings)
resultFormater.setSelectFormat(TEST_CSV)
resultFormater.setConstructFormat(TEST_CSV)
result = resultFormater.toString()
return result
graph = load("OSG_dimension-(v1.1).ttl")
export = query_graph(graph, "SELECT * WHERE {?s ?p ?o}")
# Convert String into StringIO
csvStringIO = StringIO(export)
df = pds.read_csv(csvStringIO, sep=",", header=None)
print(df)
After verification, it appears that the CSV prettyprinter in Corese for SPARQL results is functioning correctly and is escaping special characters as intended. The issue you are encountering appears to be related to the space following the separator ", ". I have removed this unnecessary space. This fix will be included in the next release.