Codebase is not pythonic
Closed this issue · 0 comments
SmokeShine commented
Dispatcher.py is unnecessarily using builder method and is not pythonic. It is inflating number of lines.
class CrawlerDispatcher:
def __init__(self) -> None:
self._crawlers = {}
@classmethod
def build(cls) -> "CrawlerDispatcher":
dispatcher = cls()
return dispatcher
def register_medium(self) -> "CrawlerDispatcher":
self.register("https://medium.com", MediumCrawler)
return self
def register_linkedin(self) -> "CrawlerDispatcher":
self.register("https://linkedin.com", LinkedInCrawler)
return self
def register_github(self) -> "CrawlerDispatcher":
self.register("https://github.com", GithubCrawler)
return self
def register(self, domain: str, crawler: type[BaseCrawler]) -> None:
parsed_domain = urlparse(domain)
domain = parsed_domain.netloc
self._crawlers[r"https://(www\.)?{}/*".format(re.escape(domain))] = crawler
def get_crawler(self, url: str) -> BaseCrawler:
for pattern, crawler in self._crawlers.items():
if re.match(pattern, url):
return crawler()
else:
logger.warning(f"No crawler found for {url}. Defaulting to CustomArticleCrawler.")
return CustomArticleCrawler()
Instead of
class CrawlerDispatcher:
def __init__(self):
self._crawlers = {}
self._register_default_crawlers()
def _register_default_crawlers(self):
self.register("https://medium.com", MediumCrawler)
self.register("https://linkedin.com", LinkedInCrawler)
self.register("https://github.com", GithubCrawler)
def register(self, domain: str, crawler: type[BaseCrawler]) -> None:
parsed_domain = urlparse(domain)
domain = parsed_domain.netloc
self._crawlers[r"https://(www\.)?{}/*".format(re.escape(domain))] = crawler
def get_crawler(self, url: str) -> BaseCrawler:
for pattern, crawler in self._crawlers.items():
if re.match(pattern, url):
return crawler()
logger.warning(f"No crawler found for {url}. Defaulting to CustomArticleCrawler.")
return CustomArticleCrawler()
Similarly, python does not need to use abs class similar to java. It has protocols.
https://peps.python.org/pep-0544/#using-protocols