Add option to show missing images?
palmerj opened this issue · 2 comments
palmerj commented
Could you add an option just to list the missing thumbnail which do not strictly match the title? This would be useful for debugging purposes.
palmerj commented
For now I've hacked a simple script based on your code:
import requests
import re
import sys
import json
import urllib.parse
from bs4 import BeautifulSoup
if len(sys.argv) != 2:
sys.exit("Please provide playllist as the first argument")
playlist = sys.argv[1]
url = "https://thumbnails.libretro.com/"
thumbnail_types = ["Named_Boxarts", "Named_Titles", "Named_Snaps"]
# According to https://github.com/libretro/RetroArch/blob/master/gfx/gfx_thumbnail_path.c#L218
# the need eplace &*/:`\"<>?\\| characters in the title entry name with underscores
forbidden = (
r"[\u0022\u003c\u003e\u007c\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008"
+ r"\u0009\u000a\u000b\u000c\u000d\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015"
+ r"\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f\u003a\u002a\u003f\u005c\u002f\u0026]"
)
names = []
dbnames = set()
with open(playlist, "r") as pl_file:
data = json.load(pl_file)
for r in data["items"]:
assert (
"label" in r and r["label"].strip() != ""
), f"\n{json.dumps(r,indent=4)} of playlist {playlist} has no label"
assert "db_name" in r and r["db_name"].endswith(
".lpl"
), f"\n{json.dumps(r,indent=4)} of playlist {playlist} has no valid db_name"
# add the label name and the db name (it's a playlist name, minus the extension '.lpl')
db = r["db_name"][:-4]
dbnames.add(db)
names.append(r["label"])
assert len(dbnames) == 1, f"\nmore than one system detected in playlist"
db_name = dbnames.pop()
page = requests.get(url).text
soup = BeautifulSoup(page, "html.parser")
SYSTEMS = [
urllib.parse.unquote(node.get("href")[:-1])
for node in soup.find_all("a")
if node.get("href").endswith("/") and not node.get("href").endswith("../")
]
# Hack: There is only one MAME thumbnail repo,
# so filter any input starting with 'MAME' to use the MAME repo
if 'MAME' in db_name:
db_name = 'MAME'
assert db_name in SYSTEMS, f"\n{db_name} is not supported remotely"
for type in thumbnail_types:
system_path = url + db_name + "/" + type
page = requests.get(system_path).text
soup = BeautifulSoup(page, "html.parser")
thumbs = {
urllib.parse.unquote(node.get("href"))
for node in soup.find_all("a")
if node.get("href").endswith(".png")
}
for name in names:
thumbfile = re.sub(forbidden, '_', name) + ".png"
if thumbfile not in thumbs:
print(f"{system_path}/{thumbfile} does not exist")
Cheers
i30817 commented
Ahhh. I think this request is a bit out of scope of this program. It's not intended as a linter of libretro-database or its server. If you have the repository you can use a unix bash command with find . -type l ! -readable
or similar to find broken symlinks, and find . -type f -size -200c
to find broken files (or at least small enough that it's almost a certainty they're broken).
And since you have your own script in progress, i'll close this. Thanks for the MacOS path, i didn't know what it was myself.