Several fixes to Flux ControlNet pipelines (#9472)

vladmandic · yiyixuxu · sayakpaul · commit e1ea942b4279 · 2024-12-23T13:02:16.000+05:30
* fix flux controlnet pipelines

---------

Co-authored-by: yiyixuxu &lt;yixu310@gmail.com&gt;
diff --git a/src/diffusers/pipelines/auto_pipeline.py b/src/diffusers/pipelines/auto_pipeline.py
@@ -29,7 +29,14 @@
     StableDiffusionXLControlNetPipeline,
 )
 from .deepfloyd_if import IFImg2ImgPipeline, IFInpaintingPipeline, IFPipeline
-from .flux import FluxControlNetPipeline, FluxImg2ImgPipeline, FluxInpaintPipeline, FluxPipeline
+from .flux import (
+    FluxControlNetImg2ImgPipeline,
+    FluxControlNetInpaintPipeline,
+    FluxControlNetPipeline,
+    FluxImg2ImgPipeline,
+    FluxInpaintPipeline,
+    FluxPipeline,
+)
 from .hunyuandit import HunyuanDiTPipeline
 from .kandinsky import (
     KandinskyCombinedPipeline,
@@ -128,6 +135,7 @@
         ("stable-diffusion-xl-controlnet-pag", StableDiffusionXLControlNetPAGImg2ImgPipeline),
         ("lcm", LatentConsistencyModelImg2ImgPipeline),
         ("flux", FluxImg2ImgPipeline),
+        ("flux-controlnet", FluxControlNetImg2ImgPipeline),
     ]
 )
 
@@ -143,6 +151,7 @@
         ("stable-diffusion-xl-controlnet", StableDiffusionXLControlNetInpaintPipeline),
         ("stable-diffusion-xl-pag", StableDiffusionXLPAGInpaintPipeline),
         ("flux", FluxInpaintPipeline),
+        ("flux-controlnet", FluxControlNetInpaintPipeline),
     ]
 )
 
diff --git a/src/diffusers/pipelines/flux/pipeline_flux_controlnet.py b/src/diffusers/pipelines/flux/pipeline_flux_controlnet.py
@@ -729,7 +729,7 @@ def __call__(
                 batch_size=batch_size * num_images_per_prompt,
                 num_images_per_prompt=num_images_per_prompt,
                 device=device,
-                dtype=dtype,
+                dtype=self.vae.dtype,
             )
             height, width = control_image.shape[-2:]
 
@@ -763,7 +763,7 @@ def __call__(
                     batch_size=batch_size * num_images_per_prompt,
                     num_images_per_prompt=num_images_per_prompt,
                     device=device,
-                    dtype=dtype,
+                    dtype=self.vae.dtype,
                 )
                 height, width = control_image_.shape[-2:]
 
@@ -840,12 +840,10 @@ def __call__(
                 # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
                 timestep = t.expand(latents.shape[0]).to(latents.dtype)
 
-                # handle guidance
-                if self.transformer.config.guidance_embeds:
-                    guidance = torch.tensor([guidance_scale], device=device)
-                    guidance = guidance.expand(latents.shape[0])
-                else:
-                    guidance = None
+                guidance = (
+                    torch.tensor([guidance_scale], device=device) if self.controlnet.config.guidance_embeds else None
+                )
+                guidance = guidance.expand(latents.shape[0]) if guidance is not None else None
 
                 # controlnet
                 controlnet_block_samples, controlnet_single_block_samples = self.controlnet(
@@ -863,6 +861,11 @@ def __call__(
                     return_dict=False,
                 )
 
+                guidance = (
+                    torch.tensor([guidance_scale], device=device) if self.transformer.config.guidance_embeds else None
+                )
+                guidance = guidance.expand(latents.shape[0]) if guidance is not None else None
+
                 noise_pred = self.transformer(
                     hidden_states=latents,
                     timestep=timestep / 1000,
diff --git a/src/diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py b/src/diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py
@@ -767,7 +767,7 @@ def __call__(
                 batch_size=batch_size * num_images_per_prompt,
                 num_images_per_prompt=num_images_per_prompt,
                 device=device,
-                dtype=dtype,
+                dtype=self.vae.dtype,
             )
             height, width = control_image.shape[-2:]
 
@@ -798,7 +798,7 @@ def __call__(
                     batch_size=batch_size * num_images_per_prompt,
                     num_images_per_prompt=num_images_per_prompt,
                     device=device,
-                    dtype=dtype,
+                    dtype=self.vae.dtype,
                 )
                 height, width = control_image_.shape[-2:]
 
diff --git a/src/diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py b/src/diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py
@@ -899,7 +899,7 @@ def __call__(
                 batch_size=batch_size * num_images_per_prompt,
                 num_images_per_prompt=num_images_per_prompt,
                 device=device,
-                dtype=dtype,
+                dtype=self.vae.dtype,
             )
             height, width = control_image.shape[-2:]
 
@@ -933,7 +933,7 @@ def __call__(
                     batch_size=batch_size * num_images_per_prompt,
                     num_images_per_prompt=num_images_per_prompt,
                     device=device,
-                    dtype=dtype,
+                    dtype=self.vae.dtype,
                 )
                 height, width = control_image_.shape[-2:]
 

Original file line number	Diff line number	Diff line change
`@@ -29,7 +29,14 @@`
`29`	`29`	`StableDiffusionXLControlNetPipeline,`
`30`	`30`	`)`
`31`	`31`	`from .deepfloyd_if import IFImg2ImgPipeline, IFInpaintingPipeline, IFPipeline`
`32`		`-from .flux import FluxControlNetPipeline, FluxImg2ImgPipeline, FluxInpaintPipeline, FluxPipeline`
	`32`	`+from .flux import (`
	`33`	`+ FluxControlNetImg2ImgPipeline,`
	`34`	`+ FluxControlNetInpaintPipeline,`
	`35`	`+ FluxControlNetPipeline,`
	`36`	`+ FluxImg2ImgPipeline,`
	`37`	`+ FluxInpaintPipeline,`
	`38`	`+ FluxPipeline,`
	`39`	`+)`
`33`	`40`	`from .hunyuandit import HunyuanDiTPipeline`
`34`	`41`	`from .kandinsky import (`
`35`	`42`	`KandinskyCombinedPipeline,`
`@@ -128,6 +135,7 @@`
`128`	`135`	`("stable-diffusion-xl-controlnet-pag", StableDiffusionXLControlNetPAGImg2ImgPipeline),`
`129`	`136`	`("lcm", LatentConsistencyModelImg2ImgPipeline),`
`130`	`137`	`("flux", FluxImg2ImgPipeline),`
	`138`	`+ ("flux-controlnet", FluxControlNetImg2ImgPipeline),`
`131`	`139`	`]`
`132`	`140`	`)`
`133`	`141`
`@@ -143,6 +151,7 @@`
`143`	`151`	`("stable-diffusion-xl-controlnet", StableDiffusionXLControlNetInpaintPipeline),`
`144`	`152`	`("stable-diffusion-xl-pag", StableDiffusionXLPAGInpaintPipeline),`
`145`	`153`	`("flux", FluxInpaintPipeline),`
	`154`	`+ ("flux-controlnet", FluxControlNetInpaintPipeline),`
`146`	`155`	`]`
`147`	`156`	`)`
`148`	`157`