Generating Images with Z-image Model Quickstart
Prerequisite:
1
2
See an example of inference code
import torch
from diffusers import ZImagePipelineprint("Loading Z-Image-Turbo model...")
pipe = ZImagePipeline.from_pretrained(
"Tongyi-MAI/Z-Image-Turbo",
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=False,
)
pipe.to("cuda")
print("Model loaded successfully!")prompt = "Young Chinese woman in red Hanfu, intricate embroidery..."print("Generating image...")
image = pipe(
prompt=prompt,
height=1024,
width=1024,
num_inference_steps=9,
guidance_scale=0.0,
generator=torch.Generator("cuda").manual_seed(42),
).images[0]image.save("example.png")
print("Image saved as 'example.png'!")4
More to see: batch inference generation
import os
from pathlib import Path
import time
import torch
from inference import ensure_weights
from utils import AttentionBackend, load_from_local_dir, set_attention_backend
from zimage import generatedef read_prompts(path: str) -> list[str]:
"""Read prompts from a text file (one per line, empty lines skipped)."""
prompt_path = Path(path)
if not prompt_path.exists():
raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
with prompt_path.open("r", encoding="utf-8") as f:
prompts = [line.strip() for line in f if line.strip()]
if not prompts:
raise ValueError(f"No prompts found in {prompt_path}")
return prompts
PROMPTS = read_prompts(os.environ.get("PROMPTS_FILE", "prompts/prompt1.txt"))
A serene mountain landscape at sunset
Futuristic city with flying cars
Portrait of a wise old wizarddef slugify(text: str, max_len: int = 60) -> str:
"""Create a filesystem-safe slug from the prompt."""
slug = "".join(ch.lower() if ch.isalnum() else "-" for ch in text)
slug = "-".join(part for part in slug.split("-") if part)
return slug[:max_len].rstrip("-") or "prompt"def select_device() -> str:
"""Choose the best available device without repeating detection logic."""
if torch.cuda.is_available():
print("Chosen device: cuda")
return "cuda"
try:
import torch_xla.core.xla_model as xm
device = xm.xla_device()
print("Chosen device: tpu")
return device
except (ImportError, RuntimeError):
if torch.backends.mps.is_available():
print("Chosen device: mps")
return "mps"
print("Chosen device: cpu")
return "cpu"def main():
model_path = ensure_weights("ckpts/Z-Image-Turbo")
dtype = torch.bfloat16
compile = False
height = 1024
width = 1024
num_inference_steps = 8
guidance_scale = 0.0
attn_backend = os.environ.get("ZIMAGE_ATTENTION", "_native_flash")
output_dir = Path("outputs")
output_dir.mkdir(exist_ok=True) device = select_device()
components = load_from_local_dir(model_path, device=device, dtype=dtype, compile=compile)
AttentionBackend.print_available_backends()
set_attention_backend(attn_backend)
print(f"Chosen attention backend: {attn_backend}") for idx, prompt in enumerate(PROMPTS, start=1):
output_path = output_dir / f"prompt-{idx:02d}-{slugify(prompt)}.png"
seed = 42 + idx - 1
generator = torch.Generator(device).manual_seed(seed)
start_time = time.time()
images = generate(
prompt=prompt,
**components,
height=height,
width=width,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
generator=generator,
)
elapsed = time.time() - start_time
images[0].save(output_path)
print(f"[{idx}/{len(PROMPTS)}] Saved {output_path} in {elapsed:.2f} seconds")
print("Done.")
🎨 Customizing Your Generation
Change Image Size
image = pipe(
prompt=prompt,
height=768, # Adjust height
width=768, # Adjust width
...
).images[0]Adjust Quality vs Speed
# Faster generation (lower quality)
num_inference_steps=5
# Higher quality (slower generation)
num_inference_steps=15Use Different Seeds
Last updated
Was this helpful?