Federated query optimization
elordis opened this issue · 3 comments
Hello.
We are using Corese to federate two of our SPARQL endpoints. One contains main graph, other is a graph view over a relational database with operational data. We want to do a graph search like this:
SELECT ?DeviceRoot_id ?DeviceRoot_DeviceHasNetworkInstance_id ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingMac ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingInterface ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingVlan ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingEntryType WHERE {
?df_7_0 <http://example.net/device.owl#DeviceRoot> ?DeviceRoot_id. # Main graph
?DeviceRoot_id <http://example.net/device.owl#DeviceHasNetworkInstance> ?DeviceRoot_DeviceHasNetworkInstance_id. # Main graph
?DeviceRoot_DeviceHasNetworkInstance_id <http://example.net/device.owl#NetworkInstanceHasFDB> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB. # Main graph
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable. # Main graph
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#hasMac> "01:00:0c:cc:cc:cc"; # RDB graph
<http://example.net/device.owl#matchingMac> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingMac; # RDB graph
<http://example.net/device.owl#matchingInterface> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingInterface; # RDB graph
<http://example.net/device.owl#matchingVlan> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingVlan; # RDB graph
<http://example.net/device.owl#matchingEntryType> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingEntryType. # RDB graph
}
On sample data (few thousand triplets) everything works fine, but on real datasets performance looks very bad.
From what I've been able to dug up, the main culprit is query that Corese uses to extract data:
select *
where {
bind (exists {select *
where {
?s <http://example.net/device.owl#DeviceRoot> ?o .
}
limit 1 } as ?b0)
bind (exists {select *
where {
?s <http://example.net/device.owl#DeviceHasNetworkInstance> ?o .
}
limit 1 } as ?b1)
bind (exists {select *
where {
?s <http://example.net/device.owl#NetworkInstanceHasFDB> ?o .
}
limit 1 } as ?b2)
bind (exists {select *
where {
?s <http://example.net/device.owl#FDBHasMacTable> ?o .
}
limit 1 } as ?b3)
bind (exists {select *
where {
?s <http://example.net/device.owl#hasMac> ?o .
}
limit 1 } as ?b4)
bind (exists {select *
where {
?s <http://example.net/device.owl#matchingMac> ?o .
}
limit 1 } as ?b5)
bind (exists {select *
where {
?s <http://example.net/device.owl#matchingInterface> ?o .
}
limit 1 } as ?b6)
bind (exists {select *
where {
?s <http://example.net/device.owl#matchingVlan> ?o .
}
limit 1 } as ?b7)
bind (exists {select *
where {
?s <http://example.net/device.owl#matchingEntryType> ?o .
}
limit 1 } as ?b8)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#hasMac> '01:00:0c:cc:cc:cc' .
}
limit 1 } as ?b9)
bind (exists {select *
where {
?df_7_6 <http://example.net/device.owl#DeviceRoot> ?DeviceRoot_id .
?DeviceRoot_id <http://example.net/device.owl#DeviceHasNetworkInstance> ?DeviceRoot_DeviceHasNetworkInstance_id .
}
limit 1 } as ?b10)
bind (exists {select *
where {
?DeviceRoot_id <http://example.net/device.owl#DeviceHasNetworkInstance> ?DeviceRoot_DeviceHasNetworkInstance_id .
?DeviceRoot_DeviceHasNetworkInstance_id <http://example.net/device.owl#NetworkInstanceHasFDB> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB .
}
limit 1 } as ?b11)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_id <http://example.net/device.owl#NetworkInstanceHasFDB> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable .
}
limit 1 } as ?b12)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#hasMac> '01:00:0c:cc:cc:cc' .
}
limit 1 } as ?b13)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingMac> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingMac .
}
limit 1 } as ?b14)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingInterface> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingInterface .
}
limit 1 } as ?b15)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingVlan> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingVlan .
}
limit 1 } as ?b16)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingEntryType> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingEntryType .
}
limit 1 } as ?b17)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#hasMac> '01:00:0c:cc:cc:cc' .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingMac> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingMac .
}
limit 1 } as ?b18)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#hasMac> '01:00:0c:cc:cc:cc' .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingInterface> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingInterface .
}
limit 1 } as ?b19)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#hasMac> '01:00:0c:cc:cc:cc' .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingVlan> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingVlan .
}
limit 1 } as ?b20)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#hasMac> '01:00:0c:cc:cc:cc' .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingEntryType> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingEntryType .
}
limit 1 } as ?b21)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingMac> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingMac .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingInterface> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingInterface .
}
limit 1 } as ?b22)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingMac> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingMac .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingVlan> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingVlan .
}
limit 1 } as ?b23)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingMac> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingMac .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingEntryType> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingEntryType .
}
limit 1 } as ?b24)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingInterface> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingInterface .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingVlan> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingVlan .
}
limit 1 } as ?b25)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingInterface> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingInterface .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingEntryType> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingEntryType .
}
limit 1 } as ?b26)
bind (exists {select *
where {
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingVlan> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingVlan .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingEntryType> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingEntryType .
}
limit 1 } as ?b27)
}
limit 1000
It looks like the query leads to combinatorial explosion that just hangs up our graph storage.
Are there any workarounds to make things more performant?
Thank you. Things have improved by a lot. Performance is still not good on heavy queries but at least things should work in most cases.
So, now Corese asks our triple store queries like this:
select *
where {
values (?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable ){
(<http://example.net/device.owl#MA-7EKB-VN40-311_default_FDB_MACTable> )
(<http://example.net/device.owl#sw-7noy-iz13-105_default_FDB_MACTable> )
(<http://example.net/device.owl#SA-7NOY-PZ14-304_default_FDB_MACTable> )
(<http://example.net/device.owl#SW-8NSK-BHMC-213_default_FDB_MACTable> )
(<http://example.net/device.owl#SW-7KRG-PS73-300_default_FDB_MACTable> )
(<http://example.net/device.owl#SA-7SAL-ARK4-101_default_FDB_MACTable> )
(<http://example.net/device.owl#MA-7EKB-ZOO9-049_default_FDB_MACTable> )
(<http://example.net/device.owl#sa-8nkz-osin-001_default_FDB_MACTable> )
(<http://example.net/device.owl#SW-8BRN-PROF-002_default_FDB_MACTable> )
(<http://example.net/device.owl#SW-8BRN-PO11-003_default_FDB_MACTable> )
(<http://example.net/device.owl#SW-8NSK-BHMC-221_default_FDB_MACTable> )
(<http://example.net/device.owl#SW-8NSK-BHMC-214_default_FDB_MACTable> )
(<http://example.net/device.owl#SW-8NSK-BHMC-212_default_FDB_MACTable> )
(<http://example.net/device.owl#SW-8NSK-BHMC-223_default_FDB_MACTable> )
(<http://example.net/device.owl#SW-8NSK-BHMC-122_default_FDB_MACTable> )
(<http://example.net/device.owl#SW-8NSK-BHMC-123_default_FDB_MACTable> )
(<http://example.net/device.owl#SA-8BIY-RVRA-001_default_FDB_MACTable> )
(<http://example.net/device.owl#SW-8NSK-BHMC-211_default_FDB_MACTable> )
(<http://example.net/device.owl#SA-8NSK-F242-001_default_FDB_MACTable> )
(<http://example.net/device.owl#SA1-VOKZ-310-105_default_FDB_MACTable> )
}
?df_7_4 <http://example.net/device.owl#DeviceRoot> ?DeviceRoot_id .
?DeviceRoot_id <http://example.net/device.owl#DeviceHasNetworkInstance> ?DeviceRoot_DeviceHasNetworkInstance_id .
?DeviceRoot_DeviceHasNetworkInstance_id <http://example.net/device.owl#NetworkInstanceHasFDB> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB .
?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable .
}
limit 1000
Is it possible to increase "chunk size" for those values
blocks? It feels like it should impove performance by quite a lot.