Support SD3 controlnet inpainting (#9099)

JPlin · 鹏徙 · a-r-r-o-w · sayakpaul · commit 095393a5b8bf · 2024-12-23T13:02:15.000+05:30
* add controlnet inpainting pipeline

* [SD3] add controlnet inpaint example

* update example and fix code style

* fix code style with ruff

* Update controlnet_sd3.md : add control inpaint pipeline

* Update docs/source/en/api/pipelines/controlnet_sd3.md

Co-authored-by: Aryan &lt;contact.aryanvs@gmail.com&gt;

* Update docs/source/en/api/pipelines/controlnet_sd3.md

Co-authored-by: Aryan &lt;contact.aryanvs@gmail.com&gt;

* Update docs/source/en/api/pipelines/controlnet_sd3.md

Co-authored-by: Aryan &lt;contact.aryanvs@gmail.com&gt;

* Update src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py

Co-authored-by: Aryan &lt;contact.aryanvs@gmail.com&gt;

* Update __init__.py : add sd3 control pipelines

* Update pipeline : add new param doc &amp; check input reference.

* fix typo

* make style &amp; make quality

* add unittest for sd3 controlnet inpaint

---------

Co-authored-by: 鹏徙 &lt;linjinpeng.ljp@alibaba-inc.com&gt;
Co-authored-by: Aryan &lt;contact.aryanvs@gmail.com&gt;
diff --git a/docs/source/en/api/pipelines/controlnet_sd3.md b/docs/source/en/api/pipelines/controlnet_sd3.md
@@ -1,4 +1,4 @@
-<!--Copyright 2023 The HuggingFace Team and The InstantX Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -22,7 +22,16 @@ The abstract from the paper is:
 
 *We present ControlNet, a neural network architecture to add spatial conditioning controls to large, pretrained text-to-image diffusion models. ControlNet locks the production-ready large diffusion models, and reuses their deep and robust encoding layers pretrained with billions of images as a strong backbone to learn a diverse set of conditional controls. The neural architecture is connected with "zero convolutions" (zero-initialized convolution layers) that progressively grow the parameters from zero and ensure that no harmful noise could affect the finetuning. We test various conditioning controls, eg, edges, depth, segmentation, human pose, etc, with Stable Diffusion, using single or multiple conditions, with or without prompts. We show that the training of ControlNets is robust with small (<50k) and large (>1m) datasets. Extensive results show that ControlNet may facilitate wider applications to control image diffusion models.*
 
-This code is implemented by [The InstantX Team](https://huggingface.co/InstantX). You can find pre-trained checkpoints for SD3-ControlNet on [The InstantX Team](https://huggingface.co/InstantX) Hub profile.
+This controlnet code is mainly implemented by [The InstantX Team](https://huggingface.co/InstantX). The inpainting-related code was developed by [The Alimama Creative Team](https://huggingface.co/alimama-creative). You can find pre-trained checkpoints for SD3-ControlNet in the table below: 
+
+
+| ControlNet type | Developer | Link |
+| -------- | ---------- | ---- |
+| Canny | [The InstantX Team](https://huggingface.co/InstantX) | [Link](https://huggingface.co/InstantX/SD3-Controlnet-Canny) |
+| Pose | [The InstantX Team](https://huggingface.co/InstantX) | [Link](https://huggingface.co/InstantX/SD3-Controlnet-Pose) |
+| Tile | [The InstantX Team](https://huggingface.co/InstantX) | [Link](https://huggingface.co/InstantX/SD3-Controlnet-Tile) |
+| Inpainting | [The AlimamaCreative Team](https://huggingface.co/alimama-creative) | [link](https://huggingface.co/alimama-creative/SD3-Controlnet-Inpainting) |
+
 
 <Tip>
 
@@ -35,5 +44,10 @@ Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers)
 	- all
 	- __call__
 
+## StableDiffusion3ControlNetInpaintingPipeline
+[[autodoc]] pipelines.controlnet_sd3.pipeline_stable_diffusion_3_controlnet_inpainting.StableDiffusion3ControlNetInpaintingPipeline
+	- all
+	- __call__
+
 ## StableDiffusion3PipelineOutput
 [[autodoc]] pipelines.stable_diffusion_3.pipeline_output.StableDiffusion3PipelineOutput
diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py
@@ -308,6 +308,7 @@
             "StableCascadeCombinedPipeline",
             "StableCascadeDecoderPipeline",
             "StableCascadePriorPipeline",
+            "StableDiffusion3ControlNetInpaintingPipeline",
             "StableDiffusion3ControlNetPipeline",
             "StableDiffusion3Img2ImgPipeline",
             "StableDiffusion3InpaintPipeline",
diff --git a/src/diffusers/models/controlnet_sd3.py b/src/diffusers/models/controlnet_sd3.py
@@ -55,6 +55,7 @@ def __init__(
         pooled_projection_dim: int = 2048,
         out_channels: int = 16,
         pos_embed_max_size: int = 96,
+        extra_conditioning_channels: int = 0,
     ):
         super().__init__()
         default_out_channels = in_channels
@@ -98,7 +99,7 @@ def __init__(
             height=sample_size,
             width=sample_size,
             patch_size=patch_size,
-            in_channels=in_channels,
+            in_channels=in_channels + extra_conditioning_channels,
             embed_dim=self.inner_dim,
             pos_embed_type=None,
         )
diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py
@@ -173,6 +173,7 @@
     _import_structure["controlnet_sd3"].extend(
         [
             "StableDiffusion3ControlNetPipeline",
+            "StableDiffusion3ControlNetInpaintingPipeline",
         ]
     )
     _import_structure["deepfloyd_if"] = [
@@ -465,9 +466,7 @@
         from .controlnet_hunyuandit import (
             HunyuanDiTControlNetPipeline,
         )
-        from .controlnet_sd3 import (
-            StableDiffusion3ControlNetPipeline,
-        )
+        from .controlnet_sd3 import StableDiffusion3ControlNetInpaintingPipeline, StableDiffusion3ControlNetPipeline
         from .controlnet_xs import (
             StableDiffusionControlNetXSPipeline,
             StableDiffusionXLControlNetXSPipeline,
diff --git a/src/diffusers/pipelines/controlnet_sd3/__init__.py b/src/diffusers/pipelines/controlnet_sd3/__init__.py
@@ -23,6 +23,9 @@
     _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
 else:
     _import_structure["pipeline_stable_diffusion_3_controlnet"] = ["StableDiffusion3ControlNetPipeline"]
+    _import_structure["pipeline_stable_diffusion_3_controlnet_inpainting"] = [
+        "StableDiffusion3ControlNetInpaintingPipeline"
+    ]
 
 if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
     try:
@@ -33,6 +36,7 @@
         from ...utils.dummy_torch_and_transformers_objects import *
     else:
         from .pipeline_stable_diffusion_3_controlnet import StableDiffusion3ControlNetPipeline
+        from .pipeline_stable_diffusion_3_controlnet_inpainting import StableDiffusion3ControlNetInpaintingPipeline
 
     try:
         if not (is_transformers_available() and is_flax_available()):
diff --git a/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py b/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py
diff --git a/tests/pipelines/controlnet_sd3/test_controlnet_inpaint_sd3.py b/tests/pipelines/controlnet_sd3/test_controlnet_inpaint_sd3.py

Original file line number	Diff line number	Diff line change
`@@ -173,6 +173,7 @@`
`173`	`173`	`_import_structure["controlnet_sd3"].extend(`
`174`	`174`	`[`
`175`	`175`	`"StableDiffusion3ControlNetPipeline",`
	`176`	`+ "StableDiffusion3ControlNetInpaintingPipeline",`
`176`	`177`	`]`
`177`	`178`	`)`
`178`	`179`	`_import_structure["deepfloyd_if"] = [`
`@@ -465,9 +466,7 @@`
`465`	`466`	`from .controlnet_hunyuandit import (`
`466`	`467`	`HunyuanDiTControlNetPipeline,`
`467`	`468`	`)`
`468`		`- from .controlnet_sd3 import (`
`469`		`- StableDiffusion3ControlNetPipeline,`
`470`		`- )`
	`469`	`+ from .controlnet_sd3 import StableDiffusion3ControlNetInpaintingPipeline, StableDiffusion3ControlNetPipeline`
`471`	`470`	`from .controlnet_xs import (`
`472`	`471`	`StableDiffusionControlNetXSPipeline,`
`473`	`472`	`StableDiffusionXLControlNetXSPipeline,`