`Context.combine` is O(2n^2)
sneakers-the-rat opened this issue · 0 comments
One last little perf thing before i quit for the night.
combine
calls add_prefix
in a loop:
add_prefix
calls prefixes
and namespaces
prefixmaps/src/prefixmaps/datamodel/context.py
Lines 195 to 197 in 82bfdbc
prefixes
and namespaces
iterate over prefix_expansions
add_prefix
appends to prefix_expansions
so for each item in the context, prefix_expansions
is iterated twice and then grows.
This makes combining contexts very expensive - 124s for 24 calls (5.2s/call) in the linkml tests. 5.2s/call makes it not great for runtime use :(
Here's a very lazy fix that isn't great but it works - check for duplication only once when adding. it's not pretty bc i didn't want to disrupt the nested if/else in add_prefix
, but would be simplified if that was flattened.
@dataclass
class Context:
_prefixes: Optional[List[str]] = None
_prefixes_lower: Optional[List[str]] = None
_namespaces: Optional[List[str]] = None
_namespaces_lower: Optional[List[str]] = None
def add_prefix(
self,
prefix: PREFIX,
namespace: NAMESPACE,
status: StatusType = StatusType.canonical,
preferred: bool = False,
):
# ...
prefixes = self.prefixes(lower=True)
namespaces = self.namespaces(lower=True)
if prefix.lower() in prefixes:
if namespace.lower() in namespaces:
return
# status = StatusType.multi_alias
else:
status = StatusType.prefix_alias
self._namespaces.append(namespace)
self._namespaces_lower.append(namespace.lower())
else:
self._prefixes.append(prefix)
self._prefixes_lower.append(prefix.lower())
if namespace.lower() in namespaces:
status = StatusType.namespace_alias
else:
self._namespaces.append(namespace)
self._namespaces_lower.append(namespace.lower())
self.prefix_expansions.append(
PrefixExpansion(
context=self.name,
prefix=prefix,
namespace=namespace,
status=status,
)
)
def prefixes(self, lower=False) -> List[str]:
"""
All unique prefixes in all prefix expansions.
:param lower: if True, the prefix is normalized to lowercase.
:return:
"""
if lower:
if self._prefixes_lower is None:
self._prefixes_lower = list({pe.prefix.lower() for pe in self.prefix_expansions})
return self._prefixes_lower
else:
if self._prefixes is None:
self._prefixes = list({pe.prefix for pe in self.prefix_expansions})
return self._prefixes
def namespaces(self, lower=False) -> List[str]:
"""
All unique namespaces in all prefix expansions
:param lower: if True, the namespace is normalized to lowercase.
:return:
"""
if lower:
if self._namespaces_lower is None:
self._namespaces_lower = list({pe.namespace.lower() for pe in self.prefix_expansions})
return self._namespaces_lower
else:
if self._namespaces is None:
self._namespaces = list({pe.namespace for pe in self.prefix_expansions})
return self._namespaces
After changes no longer appears in profiling results bc takes no time.