trash: unused functions
Opened this issue · 0 comments
ocervell commented
To sanitize the code, we removed some unused functions but they can maybe be used later on:
# secator/utils.py
def find_list_item(array, val, key='id', default=None):
"""Find item by key in list of dicts."""
return next((item for item in array if item[key] == val), default)
# secator/signals.py
class GracefulInterruptHandler(object):
def __init__(self, signals=(signal.SIGINT, signal.SIGTERM)):
self.signals = signals
self.original_handlers = {}
def __enter__(self):
self.interrupted = False
self.released = False
for sig in self.signals:
self.original_handlers[sig] = signal.getsignal(sig)
signal.signal(sig, self.handler)
return self
def handler(self, signum, frame):
self.release()
self.interrupted = True
def __exit__(self, type, value, tb):
self.release()
def release(self):
if self.released:
return False
for sig in self.signals:
signal.signal(sig, self.original_handlers[sig])
self.released = True
return True
# secator/utils.py
def filter_urls(urls, **remove_parts):
"""Filter a list of URLs using `furl`.
Args:
urls (list): List of URLs to filter.
remove_parts (dict): Dict of URL pieces to remove.
Example:
>>> urls = ['http://localhost/test.js', 'http://localhost/test?a=1&b=2']
>>> filter_urls(urls, filter_ext=True)
['http://localhost/test']
Returns:
list: List of filtered URLs.
"""
if not remove_parts:
return urls
furl_remove_args = {
k.replace('remove_', ''): v for k, v in remove_parts.items()
}
return [
sanitize_url(furl(url).remove(**furl_remove_args).url)
for url in urls
]
# secator/runners/command.py
class Command(Runner):
...
@classmethod
def poll(cls, result):
# TODO: Move this to TaskBase
while not result.ready():
data = AsyncResult(result.id).info
if DEBUG > 1 and isinstance(data, dict):
print(data)
sleep(1)
return result.get()
# secator/runners/_helpers.py
def confirm_exit(func):
"""Decorator asking user for confirmation to exit.
Args:
func (func): Decorated function.
"""
def inner_function(self, *args, **kwargs):
try:
func(self, *args, **kwargs)
except KeyboardInterrupt:
exit_confirmed = Confirm.ask('Are you sure you want to exit ?')
if exit_confirmed:
self.log_results()
raise KeyboardInterrupt
return inner_function
# secator/utils.py
def match_extensions(response, allowed_ext=['.html']):
"""Check if a URL is a file from the HTTP response by looking at the content_type and the URL.
Args:
response (dict): httpx response.
Returns:
bool: True if is a file, False otherwise.
"""
content_type = response.get('content_type', '').split(';')[0]
url = response.get('final_url') or response['url']
ext = mimetypes.guess_extension(content_type)
ext2 = os.path.splitext(urlparse(url).path)[1]
if (ext and ext in allowed_ext) or (ext2 and ext2 in allowed_ext):
return True
return False
# secator/utils.py
def setup_logger(level='info', format='%(message)s'):
logger = logging.getLogger('secator')
level = logging.getLevelName(level.upper())
logger.setLevel(level)
handler = logging.StreamHandler()
formatter = logging.Formatter(format)
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger
# secator/utils.py
def get_task_name_padding(classes=None):
all_tasks = discover_tasks()
classes = classes or all_tasks
return max([len(cls.__name__) for cls in all_tasks if cls in classes]) + 2
# secator/celery.py
def get_results(result):
"""Get all intermediate results from Celery result object.
Use this when running complex workflows with .si() i.e not passing results
between tasks.
Args:
result (Union[AsyncResult, GroupResult]): Celery result.
Returns:
list: List of results.
"""
while not result.ready():
continue
results = []
get_nested_results(result, results=results)
return results
def get_nested_results(result, results=[]):
"""Get results recursively from Celery result object by parsing result tree
in reverse order. Also gets results from GroupResult children.
Args:
result (Union[AsyncResult, GroupResult]): Celery result object.
Returns:
list: List of results.
"""
if result is None:
return
if isinstance(result, GroupResult):
console.log(repr(result))
get_nested_results(result.parent, results=results)
for child in result.children:
get_nested_results(child, results=results)
elif isinstance(result, AsyncResult):
console.log(repr(result))
res = result.get()
console.log(f'-> Found {len(res)} results.')
console.log(f'-> {res}')
if res is not None:
results.extend(res)
get_nested_results(result.parent, results=results)
def poll_task(result, seen=[]):
"""Poll Celery result tree recursively to get results live.
TODO: function is incomplete, as it does not parse all results.
Args:
result (Union[AsyncResult, GroupResult]): Celery result object.
seen (list): List of seen results (do not yield again).
Yields:
dict: Result.
"""
if result is None:
return
if result.children:
for child in result.children:
yield from poll_task(child, seen=seen)
else:
res = AsyncResult(result.id)
if not res.info:
sleep(0.1)
yield from poll_task(result, seen=seen)
# Task done running
if isinstance(res.info, list):
for item in res.info:
if item._uuid not in seen:
yield res.id, None, item
seen.append(item._uuid)
return
# Get task partial results, remove duplicates
results = res.info['results']
name = res.info['name']
for item in results:
if item._uuid not in seen:
yield res.id, name, item
seen.append(item._uuid)
# Task still running, keep polling
if not res.ready():
sleep(0.1)
yield from poll_task(result, seen=seen)