文章目录
隐藏
第一章:模型加载与基础生成
1.1 基础模型加载
from diffusers import StableDiffusionPipeline import torch # 加载SD 1.5基础模型(FP32精度) pipe = StableDiffusionPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float32 ).to("cuda") # 生成第一张图片 image = pipe("a cat wearing sunglasses").images[0] image.save("basic_cat.png")
1.2 半精度优化加载
# 加载SDXL模型(FP16精度 + xformers加速)
pipe = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16,
use_xformers=True
).to("cuda")
# 生成高分辨率图片
image = pipe("cyberpunk city at night, 8k").images[0]
image.save("cyber_city.png")
1.3 低显存设备适配
# CPU卸载模式(显存第二章:Prompt工程优化
2.1 结构化Prompt模板
template = """ (cute corgi:1.3) wearing {glasses|hat|scarf}, # 主体 Studio Ghibli style, soft shading, # 风格 in a flower field at sunset, # 环境 4k resolution, bokeh effect # 画质 [blurry, low quality] # 负面提示 """ image = pipe(template.format("sunglasses")).images[0] image.save("styled_corgi.png")2.2 动态权重控制
# 使用数值权重调整元素重要性 prompt = """ (a beautiful castle:1.5) on a cliff, (medieval style:0.8) with (futuristic elements:0.6), intricate details, 8k cinematic lighting """ image = pipe(prompt, guidance_scale=7).images[0] image.save("hybrid_castle.png")2.3 多语言Prompt融合
# 中英混合Prompt(需CLIP多语言支持) prompt = """ A girl in 汉服 (hanfu:1.2) standing by 西湖 (West Lake), 水墨画风格 (ink wash painting style), ultra detailed """ image = pipe(prompt).images[0] image.save("hanfu_girl.png")第三章:高级控制技术
3.1 ControlNet姿势控制
from diffusers import ControlNetModel, StableDiffusionControlNetPipeline from PIL import Image # 加载OpenPose ControlNet controlnet = ControlNetModel.from_pretrained( "lllyasviel/sd-controlnet-openpose", torch_dtype=torch.float16 ) pipe = StableDiffusionControlNetPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16 ).to("cuda") # 输入姿势图 pose_image = Image.open("pose_ref.png") image = pipe("dancing woman", image=pose_image).images[0] image.save("controlled_dance.png")3.2 LoRA风格叠加
from diffusers import StableDiffusionXLPipeline import torch # 加载基础模型 pipe = StableDiffusionXLPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 ).to("cuda") # 加载动漫LoRA pipe.load_lora_weights("lora/anime_style_xl.safetensors") image = pipe("a warrior in armor").images[0] image.save("anime_warrior.png")3.3 多ControlNet联合控制
# 同时使用Canny边缘和深度图控制 controlnets = [ ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16), ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16) ] pipe = StableDiffusionControlNetPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", controlnet=controlnets, torch_dtype=torch.float16 ).to("cuda") # 输入多控制图 canny_img = Image.open("edge.png") depth_img = Image.open("depth.png") image = pipe("futuristic car", image=[canny_img, depth_img]).images[0] image.save("multi_control_car.png")第四章:性能优化
4.1 TensorRT加速
from diffusers import DiffusionPipeline import torch_tensorrt # 转换模型为TensorRT格式 pipe = DiffusionPipeline.from_pretrained(...) trt_unet = torch_tensorrt.compile( pipe.unet, inputs=[torch.randn(1,4,64,64).to("cuda")], enabled_precisions={torch.float16} ) pipe.unet = trt_unet # 加速生成 image = pipe("speed test image").images[0]4.2 批处理生成
# 一次生成4张不同提示的图片 prompts = [ "a red rose", "a blue rose", "a golden rose", "a black rose" ] images = pipe(prompt=prompts, num_images_per_prompt=1).images for idx, img in enumerate(images): img.save(f"rose_{idx}.png")4.3 缓存优化
from diffusers import StableDiffusionPipeline import torch # 启用KV缓存加速 pipe = StableDiffusionPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16, enable_sequential_cpu_offload=True, enable_kv_caching=True # 关键优化 ).to("cuda") # 第一次生成较慢(编译缓存) image = pipe("warmup image").images[0] # 后续生成加速30% image = pipe("optimized image").images[0]第五章:商业化生产
5.1 批量产品图生成
product_data = [ {"name": "sneakers", "color": "neon green"}, {"name": "backpack", "color": "matte black"}, {"name": "watch", "color": "rose gold"} ] for product in product_data: prompt = f""" Professional product photo of {product['color']} {product['name']}, studio lighting, 8k resolution, product design award winner """ image = pipe(prompt).images[0] image.save(f"{product['name']}_{product['color']}.png")5.2 自动质量检测
from transformers import CLIPModel, CLIPProcessor # 加载CLIP模型 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") def quality_check(image, target_prompt): inputs = clip_processor(text=target_prompt, images=image, return_tensors="pt") outputs = clip_model(**inputs) similarity = outputs.logits_per_image.item() return similarity > 25 # 阈值根据实际情况调整 if quality_check(image, prompt): image.save("approved.png") else: print("Quality check failed!")5.3 多尺寸适配生成
resolutions = [(512,512), (768,768), (1024,1024)] for w, h in resolutions: pipe = StableDiffusionPipeline.from_pretrained(...) image = pipe( prompt, width=w, height=h, target_size=(w,h) ).images[0] image.save(f"output_{w}x{h}.png")第六章:故障排查
6.1 显存监控
import nvidia_smi nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0) def print_mem_usage(): info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) print(f"Used VRAM: {info.used//1024**2} MB") print_mem_usage() # 生成前后调用检测6.2 异常处理
try: image = pipe("problematic prompt").images[0] except torch.cuda.OutOfMemoryError: print("显存不足!尝试启用--medvram") pipe.enable_model_cpu_offload() image = pipe("problematic prompt").images[0]6.3 采样调试
# 记录采样过程 pipe = StableDiffusionPipeline.from_pretrained(...) pipe.set_progress_bar_config(leave=True) # 显示详细进度 # 生成并保存中间步骤 for i in range(pipe.scheduler.config.num_train_timesteps): image = pipe( prompt, callback_on_step_end=lambda step, t, latents: latents.save(f"step_{step}.pt") )文章来源于互联网:《Stable Diffusion绘画完全指南:从入门到精通的Prompt设计艺术》-配套代码示例
相关推荐: 提升自媒体写作效率:9款必备工具推荐! #经验分享#AI写作
这些工具不仅可以快速生成高质量的文本内容,还可以根据用户的需求进行个性化定制。它们可以帮助我们节省大量的时间和精力,让我们更加专注于创意和细节的打磨。本文将为大家详细介绍几个AI写作工具,让你在写作领域更上一层楼。 1.写作兔 这是一个微信公众号 面向专业写作…
5bei.cn大模型教程网










