huggingface/diffusion-models-class

A suggestion in unit3

yizhangliu opened this issue · 0 comments

https://colab.research.google.com/github/huggingface/diffusion-models-class/blob/main/unit3/01_stable_diffusion_introduction.ipynb

from torchvision import transforms
display(init_image)
# pil image convert to torch.tensor
images = transforms.Compose([transforms.ToTensor()])(init_image).unsqueeze(0).to(device,torch.float)
print("Input images shape:", images.shape)

# Encode to latent space
with torch.no_grad():
  latents = 0.18215 * pipe.vae.encode(images).latent_dist.mean
print("Encoded latents shape:", latents.shape)

# Decode again
with torch.no_grad():
  decoded_images = pipe.vae.decode(latents / 0.18215).sample
print("Decoded images shape:", decoded_images.shape)
display(transforms.functional.to_pil_image(decoded_images[0]))