iterative/dvc

dvc.scm.CloneError: SCM error

295552440 opened this issue · 1 comments

I need help.

code:

import dvc.api

with dvc.api.open(
        'get-started/data.xml',
        repo='https://github.com/iterative/dataset-registry'
) as f:
    print(f.read())

error:

Traceback (most recent call last):
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\util\ssl_.py", line 440, in ssl_wrap_socket
    context.load_verify_locations(ca_certs, ca_cert_dir, ca_cert_data)
FileNotFoundError: [Errno 2] No such file or directory

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\connectionpool.py", line 793, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\connectionpool.py", line 491, in _make_request
    raise new_e
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\connectionpool.py", line 467, in _make_request
    self._validate_conn(conn)
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\connectionpool.py", line 1099, in _validate_conn
    conn.connect()
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\connection.py", line 653, in connect
    sock_and_verified = _ssl_wrap_socket_and_match_hostname(
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\connection.py", line 806, in _ssl_wrap_socket_and_match_hostname
    ssl_sock = ssl_wrap_socket(
               ^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\util\ssl_.py", line 442, in ssl_wrap_socket
    raise SSLError(e) from e
urllib3.exceptions.SSLError: [Errno 2] No such file or directory

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\dulwich\client.py", line 2290, in _http_request
    resp = self.pool_manager.request(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\_request_methods.py", line 136, in request
    return self.request_encode_url(
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\_request_methods.py", line 183, in request_encode_url
    return self.urlopen(method, url, **extra_kw)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\poolmanager.py", line 444, in urlopen
    response = conn.urlopen(method, u.request_uri, **kw)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\connectionpool.py", line 877, in urlopen
    return self.urlopen(
           ^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\connectionpool.py", line 877, in urlopen
    return self.urlopen(
           ^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\connectionpool.py", line 877, in urlopen
    return self.urlopen(
           ^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\connectionpool.py", line 847, in urlopen
    retries = retries.increment(
              ^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\urllib3\util\retry.py", line 515, in increment
    raise MaxRetryError(_pool, url, reason) from reason  # type: ignore[arg-type]
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='github.com', port=443): Max retries exceeded with url: /iterative/dataset-registry/info/refs?service=git-upload-pack (Caused by SSLError(FileNotFoundError(2, 'No such file or directory')))

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\scmrepo\git\backend\dulwich\__init__.py", line 260, in clone
    repo = clone_from()
           ^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\dulwich\porcelain.py", line 546, in clone
    return client.clone(
           ^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\dulwich\client.py", line 752, in clone
    result = self.fetch(path, target, progress=progress, depth=depth)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\dulwich\client.py", line 840, in fetch
    result = self.fetch_pack(
             ^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\dulwich\client.py", line 2157, in fetch_pack
    refs, server_capabilities, url = self._discover_references(
                                     ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\dulwich\client.py", line 2013, in _discover_references
    resp, read = self._http_request(url, headers)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\scmrepo\git\backend\dulwich\client.py", line 50, in _http_request
    result = super()._http_request(
             ^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\dulwich\client.py", line 2298, in _http_request
    raise GitProtocolError(str(e)) from e
dulwich.errors.GitProtocolError: HTTPSConnectionPool(host='github.com', port=443): Max retries exceeded with url: /iterative/dataset-registry/info/refs?service=git-upload-pack (Caused by SSLError(FileNotFoundError(2, 'No such file or directory')))

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\dvc\scm.py", line 150, in clone
    git = Git.clone(url, to_path, progress=pbar.update_git, **kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\scmrepo\git\__init__.py", line 154, in clone
    backend.clone(url, to_path, bare=bare, mirror=mirror, **kwargs)
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\scmrepo\git\backend\dulwich\__init__.py", line 268, in clone
    raise CloneError(url, os.fsdecode(to_path)) from exc
scmrepo.exceptions.CloneError: Failed to clone repo 'https://github.com/iterative/dataset-registry' to 'C:\Users\73663\AppData\Local\Temp\tmpp0mvpqhpdvc-clone'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "D:\work\ai\workspace\python-test\dvc_api.py", line 3, in <module>
    with dvc.api.open(
  File "D:\opt\python\Lib\contextlib.py", line 137, in __enter__
    return next(self.gen)
           ^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\dvc\api\data.py", line 276, in _open
    with Repo.open(repo, rev=rev, **repo_kwargs) as _repo:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\dvc\repo\__init__.py", line 297, in open
    return open_repo(url, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\dvc\repo\open_repo.py", line 60, in open_repo
    return _external_repo(url, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\opt\python\Lib\contextlib.py", line 81, in inner
    return func(*args, **kwds)
           ^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\dvc\repo\open_repo.py", line 23, in _external_repo
    path = _cached_clone(url, rev)
           ^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\dvc\repo\open_repo.py", line 134, in _cached_clone
    clone_path, shallow = _clone_default_branch(url, rev)
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\funcy\decorators.py", line 47, in wrapper
    return deco(call, *dargs, **dkwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\funcy\flow.py", line 246, in wrap_with
    return call()
           ^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\funcy\decorators.py", line 68, in __call__
    return self._func(*self._args, **self._kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\dvc\repo\open_repo.py", line 198, in _clone_default_branch
    git = clone(url, clone_path)
          ^^^^^^^^^^^^^^^^^^^^^^
  File "D:\work\ai\workspace\python-test\venv\Lib\site-packages\dvc\scm.py", line 155, in clone
    raise CloneError("SCM error") from exc
dvc.scm.CloneError: SCM error

@295552440 are you running it on some corporate environment (with custom certificates, proxy, etc)?

It seems it is related to the environment - how certificates are setup. Does regular curl command from github.com work for you?

Sometimes updating the pyopenssl helps btw. pip install pyopenssl --upgrade. Sometimes, you need to check with your admins on the proper way to install custom certificates on the box.