vanheeringen-lab/genomepy

SSL Error Downloading Genome

sojichld opened this issue · 0 comments

I am having an issue downloading the appropriate genome.

I get the following error when running genomepy.install_genome(name=ref_genome, provider="ucsc",genomes_dir=None)

What are the manual equivalents for getting this data?

The error:

12:15:30 | INFO | Downloading assembly summaries from UCSC
---------------------------------------------------------------------------
SSLError                                  Traceback (most recent call last)
SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:992)

The above exception was the direct cause of the following exception:

MaxRetryError                             Traceback (most recent call last)
File /apps/prod/easybuild/sl7.x86_64.foss-2021a/software/Python/3.11.1-GCCcore-10.3.0-jupyter-4.0/lib/python3.11/site-packages/requests/adapters.py:486, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    485 try:
--> 486     resp = conn.urlopen(
    487         method=request.method,
    488         url=url,
    489         body=request.body,
    490         headers=request.headers,
    491         redirect=False,
    492         assert_same_host=False,
    493         preload_content=False,
    494         decode_content=False,
    495         retries=self.max_retries,
    496         timeout=timeout,
    497         chunked=chunked,
    498     )
    500 except (ProtocolError, OSError) as err:

File ~/.local/lib/python3.11/site-packages/urllib3/connectionpool.py:843, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
    841     new_e = ProtocolError("Connection aborted.", new_e)
--> 843 retries = retries.increment(
    844     method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
    845 )
    846 retries.sleep()

File ~/.local/lib/python3.11/site-packages/urllib3/util/retry.py:519, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
    518     reason = error or ResponseError(cause)
--> 519     raise MaxRetryError(_pool, url, reason) from reason  # type: ignore[arg-type]
    521 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)

MaxRetryError: HTTPSConnectionPool(host='api.genome.ucsc.edu', port=443): Max retries exceeded with url: /list/ucscGenomes (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:992)')))

During handling of the above exception, another exception occurred:

SSLError                                  Traceback (most recent call last)
Cell In[4], line 18
      3 #genome_installation = ma.is_genome_installed(ref_genome=ref_genome,
      4 #                                             genomes_dir=None)
      5 #print(ref_genome, "installation: ", genome_installation)
   (...)
     15 
     16 #if not genome_installation:
     17 import genomepy
---> 18 genomepy.install_genome(name=ref_genome, provider="ucsc",genomes_dir=None)

File ~/genomepy/genomepy/functions.py:208, in install_genome(name, provider, genomes_dir, localname, mask, keep_alt, regex, invert_match, bgzip, annotation, only_annotation, skip_matching, skip_filter, threads, force, **kwargs)
    206 out_dir = os.path.join(genomes_dir, localname)
    207 genome_file = os.path.join(out_dir, f"{localname}.fa")
--> 208 provider = _provider_selection(name, localname, genomes_dir, provider)
    210 # check which files need to be downloaded
    211 genome_found = _is_genome_dir(out_dir)

File ~/genomepy/genomepy/functions.py:371, in _provider_selection(name, localname, genomes_dir, provider)
    368     if p in ["ensembl", "ucsc", "ncbi"]:
    369         provider = p
--> 371 return _lazy_provider_selection(name, provider)

File ~/genomepy/genomepy/functions.py:342, in _lazy_provider_selection(name, provider)
    340 """return the first PROVIDER which has genome NAME"""
    341 providers = []
--> 342 for p in online_providers(provider):
    343     providers.append(p.name)
    344     if name in p.genomes:

File ~/genomepy/genomepy/providers/__init__.py:104, in online_providers(provider)
    102 for provider in providers:
    103     try:
--> 104         yield create(provider)
    105     except ConnectionError as e:
    106         logger.warning(str(e))

File ~/genomepy/genomepy/providers/__init__.py:60, in create(name)
     58 p = PROVIDERS[name]
     59 p.download_assembly_report = staticmethod(download_assembly_report)
---> 60 return p()

File ~/genomepy/genomepy/providers/ucsc.py:72, in UcscProvider.__init__(self)
     70 self._provider_status()
     71 # Populate on init, so that methods can be cached
---> 72 self.genomes = get_genomes(ADRESSES["api"])

File /apps/prod/easybuild/sl7.x86_64.foss-2021a/software/Python/3.11.1-GCCcore-10.3.0-jupyter-4.0/lib/python3.11/contextlib.py:81, in ContextDecorator.__call__.<locals>.inner(*args, **kwds)
     78 @wraps(func)
     79 def inner(*args, **kwds):
     80     with self._recreate_cm():
---> 81         return func(*args, **kwds)

File ~/.local/lib/python3.11/site-packages/diskcache/core.py:1875, in Cache.memoize.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
   1872 result = self.get(key, default=ENOVAL, retry=True)
   1874 if result is ENOVAL:
-> 1875     result = func(*args, **kwargs)
   1876     if expire is None or expire > 0:
   1877         self.set(key, result, expire, tag=tag, retry=True)

File ~/genomepy/genomepy/providers/ucsc.py:411, in get_genomes(rest_url)
    406 @lock
    407 @disk_cache.memoize(expire=cache_exp_genomes, tag="get_genomes-ucsc")
    408 def get_genomes(rest_url):
    409     logger.info("Downloading assembly summaries from UCSC")
--> 411     r = requests.get(rest_url, headers={"Content-Type": "application/json"})
    412     if not r.ok:
    413         r.raise_for_status()

File /apps/prod/easybuild/sl7.x86_64.foss-2021a/software/Python/3.11.1-GCCcore-10.3.0-jupyter-4.0/lib/python3.11/site-packages/requests/api.py:73, in get(url, params, **kwargs)
     62 def get(url, params=None, **kwargs):
     63     r"""Sends a GET request.
     64 
     65     :param url: URL for the new :class:`Request` object.
   (...)
     70     :rtype: requests.Response
     71     """
---> 73     return request("get", url, params=params, **kwargs)

File /apps/prod/easybuild/sl7.x86_64.foss-2021a/software/Python/3.11.1-GCCcore-10.3.0-jupyter-4.0/lib/python3.11/site-packages/requests/api.py:59, in request(method, url, **kwargs)
     55 # By using the 'with' statement we are sure the session is closed, thus we
     56 # avoid leaving sockets open which can trigger a ResourceWarning in some
     57 # cases, and look like a memory leak in others.
     58 with sessions.Session() as session:
---> 59     return session.request(method=method, url=url, **kwargs)

File /apps/prod/easybuild/sl7.x86_64.foss-2021a/software/Python/3.11.1-GCCcore-10.3.0-jupyter-4.0/lib/python3.11/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    584 send_kwargs = {
    585     "timeout": timeout,
    586     "allow_redirects": allow_redirects,
    587 }
    588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
    591 return resp

File /apps/prod/easybuild/sl7.x86_64.foss-2021a/software/Python/3.11.1-GCCcore-10.3.0-jupyter-4.0/lib/python3.11/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
    700 start = preferred_clock()
    702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
    705 # Total elapsed time of the request (approximately)
    706 elapsed = preferred_clock() - start

File /apps/prod/easybuild/sl7.x86_64.foss-2021a/software/Python/3.11.1-GCCcore-10.3.0-jupyter-4.0/lib/python3.11/site-packages/requests/adapters.py:517, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    513         raise ProxyError(e, request=request)
    515     if isinstance(e.reason, _SSLError):
    516         # This branch is for urllib3 v1.22 and later.
--> 517         raise SSLError(e, request=request)
    519     raise ConnectionError(e, request=request)
    521 except ClosedPoolError as e:

SSLError: HTTPSConnectionPool(host='api.genome.ucsc.edu', port=443): Max retries exceeded with url: /list/ucscGenomes (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:992)')))