Wimmics/corese

Federated query optimization

elordis opened this issue · 3 comments

Hello.

We are using Corese to federate two of our SPARQL endpoints. One contains main graph, other is a graph view over a relational database with operational data. We want to do a graph search like this:

SELECT ?DeviceRoot_id ?DeviceRoot_DeviceHasNetworkInstance_id ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingMac ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingInterface ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingVlan ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingEntryType WHERE {
  ?df_7_0 <http://example.net/device.owl#DeviceRoot> ?DeviceRoot_id. # Main graph
  ?DeviceRoot_id <http://example.net/device.owl#DeviceHasNetworkInstance> ?DeviceRoot_DeviceHasNetworkInstance_id. # Main graph
  ?DeviceRoot_DeviceHasNetworkInstance_id <http://example.net/device.owl#NetworkInstanceHasFDB> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB. # Main graph
  ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable. # Main graph
  ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#hasMac> "01:00:0c:cc:cc:cc";  # RDB graph
    <http://example.net/device.owl#matchingMac> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingMac; # RDB graph
    <http://example.net/device.owl#matchingInterface> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingInterface; # RDB graph
    <http://example.net/device.owl#matchingVlan> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingVlan; # RDB graph
    <http://example.net/device.owl#matchingEntryType> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingEntryType. # RDB graph
}

On sample data (few thousand triplets) everything works fine, but on real datasets performance looks very bad.

From what I've been able to dug up, the main culprit is query that Corese uses to extract data:

select * 
where {
  bind (exists {select * 
    where {
      ?s <http://example.net/device.owl#DeviceRoot> ?o .
    }
    limit 1 } as ?b0)
  bind (exists {select * 
    where {
      ?s <http://example.net/device.owl#DeviceHasNetworkInstance> ?o .
    }
    limit 1 } as ?b1)
  bind (exists {select * 
    where {
      ?s <http://example.net/device.owl#NetworkInstanceHasFDB> ?o .
    }
    limit 1 } as ?b2)
  bind (exists {select * 
    where {
      ?s <http://example.net/device.owl#FDBHasMacTable> ?o .
    }
    limit 1 } as ?b3)
  bind (exists {select * 
    where {
      ?s <http://example.net/device.owl#hasMac> ?o .
    }
    limit 1 } as ?b4)
  bind (exists {select * 
    where {
      ?s <http://example.net/device.owl#matchingMac> ?o .
    }
    limit 1 } as ?b5)
  bind (exists {select * 
    where {
      ?s <http://example.net/device.owl#matchingInterface> ?o .
    }
    limit 1 } as ?b6)
  bind (exists {select * 
    where {
      ?s <http://example.net/device.owl#matchingVlan> ?o .
    }
    limit 1 } as ?b7)
  bind (exists {select * 
    where {
      ?s <http://example.net/device.owl#matchingEntryType> ?o .
    }
    limit 1 } as ?b8)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#hasMac> '01:00:0c:cc:cc:cc' .
    }
    limit 1 } as ?b9)
  bind (exists {select * 
    where {
      ?df_7_6 <http://example.net/device.owl#DeviceRoot> ?DeviceRoot_id .
      ?DeviceRoot_id <http://example.net/device.owl#DeviceHasNetworkInstance> ?DeviceRoot_DeviceHasNetworkInstance_id .
    }
    limit 1 } as ?b10)
  bind (exists {select * 
    where {
      ?DeviceRoot_id <http://example.net/device.owl#DeviceHasNetworkInstance> ?DeviceRoot_DeviceHasNetworkInstance_id .
      ?DeviceRoot_DeviceHasNetworkInstance_id <http://example.net/device.owl#NetworkInstanceHasFDB> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB .
    }
    limit 1 } as ?b11)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_id <http://example.net/device.owl#NetworkInstanceHasFDB> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable .
    }
    limit 1 } as ?b12)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#hasMac> '01:00:0c:cc:cc:cc' .
    }
    limit 1 } as ?b13)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingMac> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingMac .
    }
    limit 1 } as ?b14)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingInterface> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingInterface .
    }
    limit 1 } as ?b15)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingVlan> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingVlan .
    }
    limit 1 } as ?b16)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingEntryType> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingEntryType .
    }
    limit 1 } as ?b17)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#hasMac> '01:00:0c:cc:cc:cc' .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingMac> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingMac .
    }
    limit 1 } as ?b18)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#hasMac> '01:00:0c:cc:cc:cc' .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingInterface> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingInterface .
    }
    limit 1 } as ?b19)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#hasMac> '01:00:0c:cc:cc:cc' .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingVlan> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingVlan .
    }
    limit 1 } as ?b20)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#hasMac> '01:00:0c:cc:cc:cc' .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingEntryType> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingEntryType .
    }
    limit 1 } as ?b21)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingMac> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingMac .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingInterface> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingInterface .
    }
    limit 1 } as ?b22)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingMac> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingMac .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingVlan> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingVlan .
    }
    limit 1 } as ?b23)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingMac> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingMac .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingEntryType> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingEntryType .
    }
    limit 1 } as ?b24)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingInterface> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingInterface .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingVlan> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingVlan .
    }
    limit 1 } as ?b25)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingInterface> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingInterface .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingEntryType> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingEntryType .
    }
    limit 1 } as ?b26)
  bind (exists {select * 
    where {
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingVlan> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingVlan .
      ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable <http://example.net/device.owl#matchingEntryType> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable_matchingEntryType .
    }
    limit 1 } as ?b27)
}
limit 1000

It looks like the query leads to combinatorial explosion that just hangs up our graph storage.
Are there any workarounds to make things more performant?

ocorby commented

Thank you. Things have improved by a lot. Performance is still not good on heavy queries but at least things should work in most cases.
So, now Corese asks our triple store queries like this:

select * 
where {
  values (?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable ){
    (<http://example.net/device.owl#MA-7EKB-VN40-311_default_FDB_MACTable> )
    (<http://example.net/device.owl#sw-7noy-iz13-105_default_FDB_MACTable> )
    (<http://example.net/device.owl#SA-7NOY-PZ14-304_default_FDB_MACTable> )
    (<http://example.net/device.owl#SW-8NSK-BHMC-213_default_FDB_MACTable> )
    (<http://example.net/device.owl#SW-7KRG-PS73-300_default_FDB_MACTable> )
    (<http://example.net/device.owl#SA-7SAL-ARK4-101_default_FDB_MACTable> )
    (<http://example.net/device.owl#MA-7EKB-ZOO9-049_default_FDB_MACTable> )
    (<http://example.net/device.owl#sa-8nkz-osin-001_default_FDB_MACTable> )
    (<http://example.net/device.owl#SW-8BRN-PROF-002_default_FDB_MACTable> )
    (<http://example.net/device.owl#SW-8BRN-PO11-003_default_FDB_MACTable> )
    (<http://example.net/device.owl#SW-8NSK-BHMC-221_default_FDB_MACTable> )
    (<http://example.net/device.owl#SW-8NSK-BHMC-214_default_FDB_MACTable> )
    (<http://example.net/device.owl#SW-8NSK-BHMC-212_default_FDB_MACTable> )
    (<http://example.net/device.owl#SW-8NSK-BHMC-223_default_FDB_MACTable> )
    (<http://example.net/device.owl#SW-8NSK-BHMC-122_default_FDB_MACTable> )
    (<http://example.net/device.owl#SW-8NSK-BHMC-123_default_FDB_MACTable> )
    (<http://example.net/device.owl#SA-8BIY-RVRA-001_default_FDB_MACTable> )
    (<http://example.net/device.owl#SW-8NSK-BHMC-211_default_FDB_MACTable> )
    (<http://example.net/device.owl#SA-8NSK-F242-001_default_FDB_MACTable> )
    (<http://example.net/device.owl#SA1-VOKZ-310-105_default_FDB_MACTable> )
  }
  ?df_7_4 <http://example.net/device.owl#DeviceRoot> ?DeviceRoot_id .
  ?DeviceRoot_id <http://example.net/device.owl#DeviceHasNetworkInstance> ?DeviceRoot_DeviceHasNetworkInstance_id .
  ?DeviceRoot_DeviceHasNetworkInstance_id <http://example.net/device.owl#NetworkInstanceHasFDB> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB .
  ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB <http://example.net/device.owl#FDBHasMacTable> ?DeviceRoot_DeviceHasNetworkInstance_NetworkInstanceHasFDB_FDBHasMacTable .
}
limit 1000

Is it possible to increase "chunk size" for those values blocks? It feels like it should impove performance by quite a lot.

ocorby commented