[QUESTION] Deleting all found duplicates at once?
Opened this issue · 3 comments
elec2 commented
Hey,
is there a possability to delete all my 3352 duplicates at once? It is realy enoying to click and load every single dublicate.
Thank you for the app anyways.
asukahan commented
I have 6300+ dupes, really pain
hermesespinola commented
Deleting assets from the tool is painfully slow. Maybe write script to scrape all the the ids from the webUI and create a list to paste in here https://immich.app/docs/api/delete-assets
const duplicateAssetNodes = document.querySelectorAll('#root > div > div.withScreencast > div > div > div > section.main > div.block-container > div > div > div > div > div:nth-child(2) > div > div > div > div:nth-child(1) > div > div > ul > li:nth-child(2)');
const assetsToDelete = duplicateAssetNodes.values().map(li => `"${li.childNodes[1].textContent.trimStart()}"`).toArray().join(',');
console.log(assetsToDelete);
JSON_BODY="{\"force\": true, \"ids\": [$ASSETS_TO_DELETE]}"
curl -o - -L -X DELETE "$IMMICH_HOST/api/asset" \
-H 'Content-Type: application/json' \
-H "x-api-key: $IMMICH_API_KEY" \
-d "$JSON_BODY"
fsniper commented
I wrote a very basic cli command to delete first item from the pairs.
import os
import requests, json
from db import is_db_populated, load_settings_from_db, load_duplicate_pairs
from api import deleteAsset, getAssetInfo
immich_server_url, api_key, images_folder, timeout = load_settings_from_db()
def list_duplicate_photos_faiss(assets, min_threshold, max_threshold,immich_server_url,api_key):
# First check if the database is populated
if not is_db_populated():
print("The database does not contain any duplicate entries. Please generate/update the database.")
return # Exit the function early if the database is not populated
# Load duplicates from database
duplicates = load_duplicate_pairs(min_threshold, max_threshold)
if duplicates:
print(f"Found {len(duplicates)} duplicate pairs with FAISS code within threshold {min_threshold} < x < {max_threshold}:")
for i, dup_pair in enumerate(duplicates):
asset_id_1, asset_id_2 = dup_pair
asset1_info = getAssetInfo(asset_id_1, assets)
asset2_info = getAssetInfo(asset_id_2, assets)
#if image1 is not None and image2 is not None:
# Proceed with image comparison
print(f"Pair:\n\timg1: {asset_id_1} {asset1_info}\n\timg1: {asset_id_2} {asset2_info}")
if deleteAsset(immich_server_url, asset_id_1, api_key):
print("\t\tDeleted photo")
else:
print("\t\tdelete failed")
#else:
# print(f"Missing information for one or both assets: {asset_id_1}, {asset_id_2}")
else:
print("No duplicates found.")
def fetchAssets(immich_server_url, api_key, timeout, type):
assets = []
# Remove trailing slash from immich_server_url if present
base_url = immich_server_url.rstrip('/')
asset_info_url = f"{base_url}/api/asset/"
# Make the HTTP GET request
response = requests.get(asset_info_url, headers={'Accept': 'application/json', 'x-api-key': api_key}, verify=False, timeout=timeout)
response.raise_for_status() # This will raise an exception for HTTP errors
content_type = response.headers.get('Content-Type', '')
if 'application/json' in content_type:
if response.text:
assets = response.json() # Decode JSON response into a list of assets
assets = [asset for asset in assets if asset.get("type") == type]
else:
assets = [] # Set assets to empty list if response is empty
else:
print(f'Unexpected Content-Type: {content_type}\nResponse content: {response.text}')
assets = [] # Set assets to empty list if unexpected content type
return assets
def main():
min_threshold = 0.0
max_threshold = 0.6
assets = fetchAssets(immich_server_url, api_key,timeout, 'IMAGE')
duplicates = load_duplicate_pairs(min_threshold, max_threshold)
list_duplicate_photos_faiss(
assets, min_threshold, max_threshold,
immich_server_url,
api_key
)
if __name__ == "__main__":
main()