upbit/pixivpy

download all images from user

compwron opened this issue · 3 comments

Here is some messy code which uses this library to download all images for a particular user.

# first run:
# pip install pixivpy-async
# pip install requests
# the token will eventually expire- to get a new one follow the doc at https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362
# note- to see NSFW art, log into your account and edit "Viewing restriction" https://www.pixiv.net/setting_user.php

TOKEN=""
ITER_LIMIT = 10
ILLUSTRATIONS_PAGE = 30

from queue import Empty
from pixivpy_async import *
import asyncio
from os.path import exists

def calc_next_url(current_user_id, current_offset):
   return f"https://app-api.pixiv.net/v1/user/illusts?user_id={current_user_id}&filter=for_ios&type=illust&offset={current_offset}"

async def download(aapi, illust):
    # if illust["x_restrict"] == 0:
    #    print("not restricted")
    #    return
    create_date = illust["create_date"][:10].replace("-","_")
    id = illust["id"]
    artist = f"{illust['user']['id']} {illust['user']['name']} {illust['user']['account']}"
    if len(illust.get("meta_single_page", {})):
        await aapi.download(illust["meta_single_page"]["original_image_url"], name=f"{create_date}_01")
        print(f"downloaded {artist} post {id} image 1")
    elif len(illust.get("meta_pages", [])):
        for index, page in enumerate(illust["meta_pages"]):
          await aapi.download(page["image_urls"]["original"], name=f"{create_date}_{index+1:02d}")
          print(f"downloaded {artist} post {id} image {index+1}")
    else:
        print(f"{id} already downloaded")

async def gettem(aapi, artist_id, current_offset, iter=0):
    print("Next page...")
    next_url = calc_next_url(artist_id, current_offset)
    print(next_url)
    await asyncio.sleep(30) # try to not get rate limited?
    next_qs = aapi.parse_qs(next_url)
    print(next_qs)
    json_result = await aapi.user_illusts(**next_qs)
    print("next url?", json_result.next_url, json_result["next_url"])
    if len(json_result["illusts"]) == 0:
        print(f"Rate limited? Sleeping... iter: {iter} of limit {ITER_LIMIT}")
        await asyncio.sleep(10)
        if iter > ITER_LIMIT:
            raise Exception(f"nothing in illusts: {json_result}")
        iter += 1
        gettem(aapi, artist_id, current_offset - ILLUSTRATIONS_PAGE, iter)
    for illust in json_result["illusts"]:
        await download(aapi, illust)

async def main():
    artist_id = 151689
    current_user_id = 275527
    current_offset = ILLUSTRATIONS_PAGE # pages are 30 items long
    async with PixivClient() as client:
        aapi = AppPixivAPI(client=client)
        await aapi.login(refresh_token=TOKEN)
        json_result = await aapi.user_illusts(artist_id)
        # print(json_result)
        for illust in json_result["illusts"]:
            await download(aapi, illust)
        print("next url?", json_result.next_url, json_result["next_url"])
        print(json_result["next_url"])
        while True: # continue until errorsplode
            print("still true")
            await gettem(aapi, artist_id, current_offset)
            current_offset += ILLUSTRATIONS_PAGE
asyncio.run(main())

Not sure the purpose of this thread, but here is mine. It utilized tqdm to create a nice looking progress bar.

Code
import os
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path

import requests
from tqdm import tqdm

from pixivpy3 import AppPixivAPI

USER_ID = '15919563'
DOWNLOAD_DIR = Path(r'SOME-WHERE')
REFRESH_TOKEN_FILE = Path(r'SOME-WHERE\refresh-token.txt')


def auth_pixiv_api(api: AppPixivAPI, refresh_token_file: Path):
    with refresh_token_file.open('rt') as f:
        refresh_token = f.read().strip()
    api.auth(refresh_token=refresh_token)
    with refresh_token_file.open('wt') as f:
        print(api.refresh_token, file=f)


def download(url: str, file: Path, headers=None, force=False):
    if file.exists() and not force:
        return

    with requests.get(url, headers=headers, stream=True) as response:
        response.raise_for_status()
        with tqdm(
                total=int(response.headers.get('Content-Length', 0)),
                desc=f'Download: {file.name}',
                unit='B', unit_scale=True, unit_divisor=1024,
                leave=False,
        ) as progress:
            file.parent.mkdir(exist_ok=True)
            with file.open('wb') as f:
                for chunk in response.iter_content(chunk_size=1024):
                    if not chunk:
                        continue
                    f.write(chunk)
                    progress.update(len(chunk))


def main():
    api = AppPixivAPI()
    auth_pixiv_api(api, REFRESH_TOKEN_FILE)

    with ThreadPoolExecutor(
            max_workers=5,
            initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),),
    ) as executor:
        qs = {'user_id': USER_ID}
        root = DOWNLOAD_DIR / USER_ID
        while qs:
            json_result = api.user_illusts(**qs)
            qs = api.parse_qs(json_result.next_url)
            for illust in json_result.illusts:
                if illust.type == 'ugoira':
                    img_urls = []  # Skip ugoira
                elif illust.page_count == 1:
                    img_urls = [illust.meta_single_page.original_image_url]
                else:
                    img_urls = [
                        page.image_urls.original
                        for page in illust.meta_pages
                    ]
                for url in img_urls:
                    executor.submit(
                        download,
                        url,
                        root / os.path.basename(url),
                        headers={'Referer': 'https://app-api.pixiv.net/'},
                        force=True,
                    )


if __name__ == '__main__':
    main()

I used to have a complex crawler that can even convert ugoira to gif, but now I don't use it anymore, so I don't continue to maintain it.

@compwron Did you want a method to "download all images from user", or did you want to know how to implement it with pixivpy?