Wimmics/corese

CSV export of SparQL result: missing parentheses?

NicoRobertIn opened this issue · 2 comments

Issue Description:

It seems that the CSV export of SparQL requests can forget quotes... Am I missing something?

Steps to Reproduce:

In a python script

from subprocess import Popen, DEVNULL
from time import sleep
from atexit import register
from py4j.java_gateway import JavaGateway

# Start java gateway
java_process = Popen(
    ['java', '-jar', '-Dfile.encoding=UTF-8', 'corese-library-python-4.4.1.jar'],
    stdout=DEVNULL,
    stderr=DEVNULL
)

# Waiting for the java server to start up
sleep(1)
gateway = JavaGateway()
register(gateway.shutdown)

# Import of class
Graph = gateway.jvm.fr.inria.corese.core.Graph
Load = gateway.jvm.fr.inria.corese.core.load.Load
QueryProcess = gateway.jvm.fr.inria.corese.core.query.QueryProcess
ResultFormat = gateway.jvm.fr.inria.corese.core.print.ResultFormat
TEST_CSV = 14

def load(path):
    """Load a graph from a local file or a URL

    :param path: local path or a URL or a list of these
    :returns: the graph load
    """

    graph = Graph()

    ld = Load.create(graph)

    try:
        graph = Graph()
        ld = Load.create(graph)
        ld.parse(path)

        return graph

    except Exception as e:
        # syntax_errors = parse_syntax_errors()
        print("error", str(e).strip())

def query_graph(graph, query):
    query_process = QueryProcess.create(graph)

    abstract_syntax_tree = query_process.ast(query)
    mappings = query_process.query(abstract_syntax_tree)

    resultFormater = ResultFormat.create(mappings)
    resultFormater.setSelectFormat(TEST_CSV)
    resultFormater.setConstructFormat(TEST_CSV)
    
    result = resultFormater.toString()

    return result

graph = load("https://raw.githubusercontent.com/stardog-union/stardog-tutorials/master/music/beatles.ttl")
print(query_graph(graph, "SELECT * WHERE {?s ?p ?o}"))

Expected Behavior:

The output should be readable by pandas

Actual Behavior:

It is not readable by pandas, some lines are missing parentheses, an example of a line that way:

http://stardog.com/tutorial/Please_Please_Me, http://stardog.com/tutorial/name, Please Please Me

Step to reproduces:

Get a csv named OSG_dimension-(v1.1).ttl at path: https://data.world/bryon/qudt-1-1
Then execute this python code to have the following pandas parsing error:

ParserError: Error tokenizing data. C error: Expected 3 fields in line 14, saw 4

from subprocess import Popen, DEVNULL
from time import sleep
from atexit import register
from py4j.java_gateway import JavaGateway

import pandas as pds
from io import StringIO

# Start java gateway
java_process = Popen(
    ['java', '-jar', '-Dfile.encoding=UTF-8', 'corese-library-python-4.4.1.jar'],
    stdout=DEVNULL,
    stderr=DEVNULL
)

# Waiting for the java server to start up
sleep(1)
gateway = JavaGateway()
register(gateway.shutdown)

# Import of class
Graph = gateway.jvm.fr.inria.corese.core.Graph
Load = gateway.jvm.fr.inria.corese.core.load.Load
QueryProcess = gateway.jvm.fr.inria.corese.core.query.QueryProcess
ResultFormat = gateway.jvm.fr.inria.corese.core.print.ResultFormat
TEST_CSV = 14

def load(path):
    """Load a graph from a local file or a URL

    :param path: local path or a URL or a list of these
    :returns: the graph load
    """

    graph = Graph()

    ld = Load.create(graph)

    try:
        graph = Graph()
        ld = Load.create(graph)
        ld.parse(path)

        return graph

    except Exception as e:
        # syntax_errors = parse_syntax_errors()
        print("error", str(e).strip())

def query_graph(graph, query):
    query_process = QueryProcess.create(graph)

    abstract_syntax_tree = query_process.ast(query)
    mappings = query_process.query(abstract_syntax_tree)

    resultFormater = ResultFormat.create(mappings)
    resultFormater.setSelectFormat(TEST_CSV)
    resultFormater.setConstructFormat(TEST_CSV)
    
    result = resultFormater.toString()

    return result

graph = load("OSG_dimension-(v1.1).ttl")
export = query_graph(graph, "SELECT * WHERE {?s ?p ?o}")

# Convert String into StringIO
csvStringIO = StringIO(export)
df = pds.read_csv(csvStringIO, sep=",", header=None)

print(df)

After verification, it appears that the CSV prettyprinter in Corese for SPARQL results is functioning correctly and is escaping special characters as intended. The issue you are encountering appears to be related to the space following the separator ", ". I have removed this unnecessary space. This fix will be included in the next release.