download all images from user
compwron opened this issue · 3 comments
compwron commented
Here is some messy code which uses this library to download all images for a particular user.
# first run:
# pip install pixivpy-async
# pip install requests
# the token will eventually expire- to get a new one follow the doc at https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362
# note- to see NSFW art, log into your account and edit "Viewing restriction" https://www.pixiv.net/setting_user.php
TOKEN=""
ITER_LIMIT = 10
ILLUSTRATIONS_PAGE = 30
from queue import Empty
from pixivpy_async import *
import asyncio
from os.path import exists
def calc_next_url(current_user_id, current_offset):
return f"https://app-api.pixiv.net/v1/user/illusts?user_id={current_user_id}&filter=for_ios&type=illust&offset={current_offset}"
async def download(aapi, illust):
# if illust["x_restrict"] == 0:
# print("not restricted")
# return
create_date = illust["create_date"][:10].replace("-","_")
id = illust["id"]
artist = f"{illust['user']['id']} {illust['user']['name']} {illust['user']['account']}"
if len(illust.get("meta_single_page", {})):
await aapi.download(illust["meta_single_page"]["original_image_url"], name=f"{create_date}_01")
print(f"downloaded {artist} post {id} image 1")
elif len(illust.get("meta_pages", [])):
for index, page in enumerate(illust["meta_pages"]):
await aapi.download(page["image_urls"]["original"], name=f"{create_date}_{index+1:02d}")
print(f"downloaded {artist} post {id} image {index+1}")
else:
print(f"{id} already downloaded")
async def gettem(aapi, artist_id, current_offset, iter=0):
print("Next page...")
next_url = calc_next_url(artist_id, current_offset)
print(next_url)
await asyncio.sleep(30) # try to not get rate limited?
next_qs = aapi.parse_qs(next_url)
print(next_qs)
json_result = await aapi.user_illusts(**next_qs)
print("next url?", json_result.next_url, json_result["next_url"])
if len(json_result["illusts"]) == 0:
print(f"Rate limited? Sleeping... iter: {iter} of limit {ITER_LIMIT}")
await asyncio.sleep(10)
if iter > ITER_LIMIT:
raise Exception(f"nothing in illusts: {json_result}")
iter += 1
gettem(aapi, artist_id, current_offset - ILLUSTRATIONS_PAGE, iter)
for illust in json_result["illusts"]:
await download(aapi, illust)
async def main():
artist_id = 151689
current_user_id = 275527
current_offset = ILLUSTRATIONS_PAGE # pages are 30 items long
async with PixivClient() as client:
aapi = AppPixivAPI(client=client)
await aapi.login(refresh_token=TOKEN)
json_result = await aapi.user_illusts(artist_id)
# print(json_result)
for illust in json_result["illusts"]:
await download(aapi, illust)
print("next url?", json_result.next_url, json_result["next_url"])
print(json_result["next_url"])
while True: # continue until errorsplode
print("still true")
await gettem(aapi, artist_id, current_offset)
current_offset += ILLUSTRATIONS_PAGE
asyncio.run(main())
Deleted user commented
Xdynix commented
Not sure the purpose of this thread, but here is mine. It utilized tqdm
to create a nice looking progress bar.
Code
import os
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
import requests
from tqdm import tqdm
from pixivpy3 import AppPixivAPI
USER_ID = '15919563'
DOWNLOAD_DIR = Path(r'SOME-WHERE')
REFRESH_TOKEN_FILE = Path(r'SOME-WHERE\refresh-token.txt')
def auth_pixiv_api(api: AppPixivAPI, refresh_token_file: Path):
with refresh_token_file.open('rt') as f:
refresh_token = f.read().strip()
api.auth(refresh_token=refresh_token)
with refresh_token_file.open('wt') as f:
print(api.refresh_token, file=f)
def download(url: str, file: Path, headers=None, force=False):
if file.exists() and not force:
return
with requests.get(url, headers=headers, stream=True) as response:
response.raise_for_status()
with tqdm(
total=int(response.headers.get('Content-Length', 0)),
desc=f'Download: {file.name}',
unit='B', unit_scale=True, unit_divisor=1024,
leave=False,
) as progress:
file.parent.mkdir(exist_ok=True)
with file.open('wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if not chunk:
continue
f.write(chunk)
progress.update(len(chunk))
def main():
api = AppPixivAPI()
auth_pixiv_api(api, REFRESH_TOKEN_FILE)
with ThreadPoolExecutor(
max_workers=5,
initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),),
) as executor:
qs = {'user_id': USER_ID}
root = DOWNLOAD_DIR / USER_ID
while qs:
json_result = api.user_illusts(**qs)
qs = api.parse_qs(json_result.next_url)
for illust in json_result.illusts:
if illust.type == 'ugoira':
img_urls = [] # Skip ugoira
elif illust.page_count == 1:
img_urls = [illust.meta_single_page.original_image_url]
else:
img_urls = [
page.image_urls.original
for page in illust.meta_pages
]
for url in img_urls:
executor.submit(
download,
url,
root / os.path.basename(url),
headers={'Referer': 'https://app-api.pixiv.net/'},
force=True,
)
if __name__ == '__main__':
main()
I used to have a complex crawler that can even convert ugoira to gif, but now I don't use it anymore, so I don't continue to maintain it.
Deleted user commented
@compwron Did you want a method to "download all images from user", or did you want to know how to implement it with pixivpy?