/hfmirror

Mirror for resources to local and huggingface

Primary LanguagePythonApache License 2.0Apache-2.0

hfmirror

PyPI PyPI - Python Version Loc Comments

Code Test Package Release codecov

GitHub Org's stars GitHub stars GitHub forks GitHub commit activity GitHub issues GitHub pulls Contributors GitHub license

Mirror for resources to local and huggingface.

Installation

You can simply install it with pip command line from the official PyPI site.

pip install hbutils

For more information about installation, you can refer to Installation.

Quick Start

Mirror Github Releases to Your Disk

The following code can mirror the release files of repository narugo1992/gchar to your local directory test_releases

from hfmirror.resource import GithubReleaseResource
from hfmirror.storage import LocalStorage
from hfmirror.sync import SyncTask

if __name__ == '__main__':
    github = GithubReleaseResource(
        # the github repository
        repo='narugo1992/gchar',

        # access_token of github client (if needed)
        access_token='my_github_access_token',

        # add files like LATEST_RELEASE to mark the versions
        add_version_attachment=True,
    )

    storage = LocalStorage('test_releases')

    task = SyncTask(github, storage)
    task.sync()

This is the test_releases

test_releases
├── LATEST_RELEASE
├── LATEST_RELEASE_0
├── LATEST_RELEASE_0.0
├── LATEST_RELEASE_0.0.1
├── LATEST_RELEASE_0.0.2
├── LATEST_RELEASE_0.0.3
├── LATEST_RELEASE_0.0.4
├── LATEST_RELEASE_0.0.5
├── LATEST_RELEASE_0.0.6
├── LATEST_RELEASE_0.0.8
├── v0.0.1
│   ├── gchar-0.0.1-py3-none-any.whl
│   └── gchar-0.0.1.tar.gz
├── v0.0.2
│   ├── gchar-0.0.2-py3-none-any.whl
│   └── gchar-0.0.2.tar.gz
├── v0.0.3
│   ├── gchar-0.0.3-py3-none-any.whl
│   └── gchar-0.0.3.tar.gz
├── v0.0.4
│   ├── gchar-0.0.4-py3-none-any.whl
│   └── gchar-0.0.4.tar.gz
├── v0.0.5
│   ├── gchar-0.0.5-py3-none-any.whl
│   └── gchar-0.0.5.tar.gz
├── v0.0.6
│   ├── gchar-0.0.6-py3-none-any.whl
│   └── gchar-0.0.6.tar.gz
└── v0.0.8
    ├── gchar-0.0.8-py3-none-any.whl
    └── gchar-0.0.8.tar.gz

Mirror Game Skins to HuggingFace

Your can mirror the skins of genshin impact based on gchar to huggingface repo, using the following code with custom resource and huggingface repository storage:

import mimetypes
import os
import re
from typing import Iterable, Union, Tuple, Any, Mapping, List, Type

from gchar.games.base import Character as BaseCharacter
from gchar.games.genshin import Character
from hbutils.system import urlsplit
from huggingface_hub import HfApi
from tqdm.auto import tqdm

from hfmirror.resource import SyncResource
from hfmirror.resource.resource import TargetPathType
from hfmirror.storage import HuggingfaceStorage
from hfmirror.sync import SyncTask
from hfmirror.utils import srequest, get_requests_session


class ArknightsSkinResource(SyncResource):
    def __init__(self, chs: List[Character], ch_type: Type[BaseCharacter]):
        self.characters = chs
        self.ch_type = ch_type
        self.session = get_requests_session()

    def grab(self) -> Iterable[Union[
        Tuple[str, Any, TargetPathType, Mapping],
        Tuple[str, Any, TargetPathType],
    ]]:
        yield 'metadata', {'game': self.ch_type.__game_name__}, ''
        _exist_ids = set()
        for ch in tqdm(self.characters):
            if ch.index in _exist_ids:
                continue

            metadata = {
                'id': ch.index,
                'cnname': str(ch.cnname) if ch.cnname else None,
                'jpname': str(ch.jpname) if ch.jpname else None,
                'enname': str(ch.enname) if ch.enname else None,
                'alias': list(map(str, ch.alias_names)),
            }
            yield 'metadata', metadata, f'{ch.index}'
            _exist_ids.add(ch.index)

            for skin in ch.skins:
                _, ext = os.path.splitext(urlsplit(skin.url).filename)
                if not ext:
                    resp = srequest(self.session, 'HEAD', skin.url)
                    ext = mimetypes.guess_extension(resp.headers['Content-Type'])

                filename = re.sub(r'\W+', '_', skin.name).strip('_') + ext
                yield 'remote', skin.url, f'{ch.index}/{filename}', {'name': skin.name}


if __name__ == '__main__':
    resource = ArknightsSkinResource(
        Character.all(contains_extra=False),
        Character
    )

    api = HfApi(token=os.environ['HF_TOKEN'])
    api.create_repo('narugo/test_repo', repo_type='dataset', exist_ok=True)
    storage = HuggingfaceStorage(
        repo='narugo/test_repo',
        hf_client=api,
        namespace='genshin',
    )

    task = SyncTask(resource, storage)
    task.sync()