alexandersokol/sd-model-organizer

[Feature Request] CRC/Hash-based model recognition

pmcculler opened this issue · 7 comments

Once a model is moved, it's both forgotten and not forgotten; a new un-added entry appears, and its phantom remains to be removed. Any chance we could add an optional feature that recognizes models by their hashes? At least some, like loras, declare their hashes in metadata, I think, lowering the computational and disk read burden dramatically.

Thanks! I love this software.

Initially I had an idea to bind database records with file hash not file path. But I had to change this logic to simply store file location, because computing any hash for large files is extremely long operations. Maybe as separate feature, like some model scanner to repair/bind files to records might be a good idea.

Right! But these models, such as loras, contain metadata that includes hash values, so you can skip the work!
Here's an example. Hashes galore!

{
"ss_sd_model_name": "Stable-Diffusion-v1-5.safetensors",
"ss_resolution": "(512, 512)",
"ss_clip_skip": "2",
"ss_num_train_images": "610",
"ss_tag_frequency": {
"meta_lat.json": {
"multiple girls": 1,
"outdoors": 31,
"multiple boys": 4,
"sky": 23,
"signature": 4,
"bird": 2,
"moon": 8,
"grass": 3,
"star (sky)": 11,
"scenery": 54,
"starry sky": 4,
"sunset": 3,
"horizon": 2,
"monochrome": 1,
"no humans": 26,
"night": 24,
"blue theme": 4,
"light": 5,
"glowing": 6,
"building": 24,
"reflection": 8,
"science fiction": 15,
"rain": 5,
"cable": 3,
"solo": 26,
"1boy": 16,
"holding": 7,
"sitting": 3,
"flower": 3,
"helmet": 3,
"1other": 5,
"rock": 4,
"reading": 1,
"space": 8,
"planet": 9,
"ambiguous gender": 4,
"spacesuit": 3,
"space helmet": 3,
"astronaut": 3,
"indoors": 14,
"window": 12,
"door": 6,
"horror (theme)": 2,
"day": 3,
"cloud": 6,
"water": 5,
"ocean": 1,
"cloudy sky": 2,
"multiple others": 2,
"mountain": 2,
"6+others": 1,
"lying": 2,
"earth (planet)": 4,
"1girl": 8,
"long hair": 4,
"blonde hair": 1,
"standing": 16,
"wide shot": 5,
"crack": 1,
"ground vehicle": 5,
"city": 13,
"sign": 9,
"road": 7,
"neon lights": 13,
"sand": 3,
"sun": 2,
"spacecraft": 5,
"desert": 3,
"from behind": 8,
"sunrise": 1,
"short hair": 5,
"black hair": 4,
"underwear": 1,
"panties": 1,
"barefoot": 1,
"kneeling": 1,
"bed": 3,
"tank top": 1,
"hologram": 1,
"industrial pipe": 1,
"male focus": 9,
"artist name": 1,
"stairs": 4,
"railing": 1,
"silhouette": 3,
"power lines": 1,
"skirt": 2,
"dress": 1,
"hallway": 1,
"cityscape": 6,
"street": 3,
"city lights": 3,
"tiles": 3,
"dark": 3,
"ruins": 3,
"orange sky": 1,
"red sky": 2,
"english text": 5,
"tree": 4,
"aircraft": 1,
"shoes": 1,
"sneakers": 1,
"pants": 3,
"night sky": 4,
"plant": 3,
"cape": 1,
"walking": 2,
"cherry blossoms": 1,
"nature": 2,
"blurry": 1,
"forest": 1,
"torii": 1,
"graffiti": 3,
"traditional media": 1,
"watercraft": 1,
"skyscraper": 4,
"motor vehicle": 3,
"car": 2,
"vehicle focus": 1,
"alley": 1,
"trash bag": 1,
"rooftop": 1,
"virtual youtuber": 1,
"pink theme": 1,
"brown hair": 1,
"labcoat": 1,
"pillow": 2,
"lamp": 1,
"overgrown": 1,
"jacket": 2,
"cup": 1,
"bottle": 1,
"stool": 1,
"weapon": 1,
"gun": 1,
"clock": 1,
"tower": 1,
"animal": 1,
"cat": 2,
"television": 1,
"monitor": 1,
"debris": 1,
"tile floor": 2,
"lamppost": 2,
"shirt": 1,
"white shirt": 1,
"bed sheet": 1,
"on bed": 1,
"on side": 1,
"umbrella": 1,
"holding umbrella": 1,
"bag": 1,
"backpack": 1,
"train": 1,
"train station": 2,
"hood": 1,
"2boys": 1,
"looking down": 1,
"crossed arms": 1,
"white jacket": 1,
"robot": 1,
"goggles": 1,
"lantern": 1,
"one-eyed": 1,
"paper lantern": 1,
"mechanical legs": 1,
"humanoid robot": 1,
"blue sky": 1,
"very wide shot": 1,
"chinese text": 1
}
},
"ss_batch_size_per_device": "6",
"ss_bucket_info": {
"buckets": {
"0": {
"resolution": [
320,
704
],
"count": 10
},
"1": {
"resolution": [
384,
640
],
"count": 220
},
"2": {
"resolution": [
448,
576
],
"count": 260
},
"3": {
"resolution": [
512,
512
],
"count": 50
},
"4": {
"resolution": [
576,
448
],
"count": 10
},
"5": {
"resolution": [
640,
384
],
"count": 40
},
"6": {
"resolution": [
704,
320
],
"count": 20
}
},
"mean_img_ar_error": 0.0
},
"ss_bucket_no_upscale": "False",
"ss_cache_latents": "False",
"ss_caption_dropout_every_n_epochs": "0",
"ss_caption_dropout_rate": "0",
"ss_caption_tag_dropout_rate": "0",
"ss_color_aug": "False",
"ss_dataset_dirs": {
"meta_lat.json": {
"n_repeats": 10,
"img_count": 61
}
},
"ss_enable_bucket": "True",
"ss_epoch": "20",
"ss_face_crop_aug_range": "None",
"ss_flip_aug": "False",
"ss_full_fp16": "False",
"ss_gradient_accumulation_steps": "1",
"ss_gradient_checkpointing": "False",
"ss_keep_tokens": "0",
"ss_learning_rate": "0.0001",
"ss_lowram": "True",
"ss_lr_scheduler": "constant",
"ss_lr_warmup_steps": "0",
"ss_max_bucket_reso": "None",
"ss_max_grad_norm": "1.0",
"ss_max_token_length": "225",
"ss_max_train_steps": "2100",
"ss_min_bucket_reso": "None",
"ss_min_snr_gamma": "None",
"ss_mixed_precision": "fp16",
"ss_network_alpha": "16",
"ss_network_dim": "32",
"ss_network_module": "networks.lora",
"ss_new_sd_model_hash": "658a49c08ab2ca4f39a705a437de058e1b13d305479f0b0154548ce1b3bc9253",
"ss_new_vae_hash": "c6a580b13a5bc05a5e16e4dbb80608ff2ec251a162311590c1f34c013d7f3dab",
"ss_noise_offset": "None",
"ss_num_batches_per_epoch": "105",
"ss_num_epochs": "20",
"ss_num_reg_images": "0",
"ss_optimizer": "bitsandbytes.optim.adamw.AdamW8bit",
"ss_output_name": "last",
"ss_prior_loss_weight": "1.0",
"ss_random_crop": "False",
"ss_reg_dataset_dirs": {},
"ss_sd_model_hash": "aa366209",
"ss_sd_scripts_commit_hash": "d9c629f6242c6f00dfa322287807e6c1948d2271",
"ss_seed": "396926761",
"ss_session_id": "626454392",
"ss_shuffle_caption": "True",
"ss_text_encoder_lr": "5e-05",
"ss_total_batch_size": "6",
"ss_training_comment": "None",
"ss_training_finished_at": "1689163634.9973602",
"ss_training_started_at": "1689160562.7512345",
"ss_unet_lr": "0.0001",
"ss_v2": "False",
"ss_vae_hash": "223531c6",
"ss_vae_name": "stablediffusion.vae.pt",
"sshs_legacy_hash": "c6e35656",
"sshs_model_hash": "b85a905a0f7fb8a7f10ce31fbe73358ae9dae7b9336ef57d0c1cccfcd7d08fb8"
}

IMHO there is problem with duplicates. I do not understand why it exist. I edit record, click save and MO save new record instead current record. Why? MO knows id of record what I edit or not...

As I see MO may not know id of record of local files if it not store them in base.

If records of local files not save to base at all then may be need save them to temp table and rescan all local files models every time as user click MO or click Reload.

And second need unfreeze "File location" if you save and will not remove this field. IMHO not need limit users one fixed path. For example I changed path of Models and MO was broken database of models.

IMHO all issues will be solve. Or not... :)

@pmcculler Where CRC of model do locate? In file of model?

@LLKoder that's right, the metadata example I showed is in the model itself; the model's hash is one of the fields ending in _hash I believe.

@pmcculler Which software do you use to see metadata?

@LLKoder Automatic1111 has a metadata inspector built into it, you just go to the model card, hover, and press (i)

Reading hash from metadata would be a great option, but it only available for .safetensors file.