Skip to content

Commit b08b6ca

Browse files
authored
Enhance PowerPaint (open-mmlab#2093)
* Update gradio_PowerPaint.py * Update gradio_PowerPaint.py * Update gradio_PowerPaint.py * Update gradio_PowerPaint.py * Update README.md * Update README.md * Update gradio_PowerPaint.py * Update gradio_PowerPaint.py * Update README.md
1 parent ab610dd commit b08b6ca

File tree

2 files changed

+89
-73
lines changed

2 files changed

+89
-73
lines changed

projects/powerpaint/README.md

+12
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,20 @@
44

55
This README provides a step-by-step guide to download the repository, set up the required virtual environment named "PowerPaint" using conda, and run PowerPaint with or without ControlNet.
66

7+
## News
8+
9+
**December 18, 2023**
10+
11+
*Enhanced PowerPaint Model*
12+
13+
- We are delighted to announce the release of more stable model weights. These refined weights can now be accessed on [Hugging Face](https://huggingface.co/JunhaoZhuang/PowerPaint-v1/tree/main). The `gradio_PowerPaint.py` file and [Online Demo](https://openxlab.org.cn/apps/detail/rangoliu/PowerPaint) have also been updated as part of this release.
14+
15+
16+
## Next
17+
718
**Stronger Model Weights Coming Soon!**
819

20+
________________
921
<img src='https://github.com/open-mmlab/mmagic/assets/12782558/acd01391-c73f-4997-aafd-0869aebcc915'/>
1022

1123
## Getting Started

projects/powerpaint/gradio_PowerPaint.py

+77-73
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
StableDiffusionInpaintPipeline as Pipeline
1212
from pipeline.pipeline_PowerPaint_ControlNet import \
1313
StableDiffusionControlNetInpaintPipeline as controlnetPipeline
14-
from safetensors.torch import load_file
1514
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
1615
from utils.utils import TokenizerWrapper, add_tokens
1716

@@ -21,8 +20,7 @@
2120
global pipe
2221
pipe = Pipeline.from_pretrained(
2322
'runwayml/stable-diffusion-inpainting',
24-
torch_dtype=weight_dtype,
25-
safety_checker=None)
23+
torch_dtype=weight_dtype)
2624
pipe.tokenizer = TokenizerWrapper(
2725
from_pretrained='runwayml/stable-diffusion-v1-5',
2826
subfolder='tokenizer',
@@ -34,14 +32,13 @@
3432
placeholder_tokens=['P_ctxt', 'P_shape', 'P_obj'],
3533
initialize_tokens=['a', 'a', 'a'],
3634
num_vectors_per_token=10)
37-
pipe.unet.load_state_dict(
38-
load_file(
39-
'./models/unet/diffusion_pytorch_model.safetensors', device='cuda'),
40-
strict=False)
41-
pipe.text_encoder.load_state_dict(
42-
torch.load('./models/text_encoder/pytorch_model.bin'), strict=False)
35+
36+
from safetensors.torch import load_model
37+
load_model(pipe.unet, "./models/unet/diffusion_pytorch_model.safetensors")
38+
pipe.text_encoder.load_state_dict(torch.load("./models/text_encoder/pytorch_model.bin"), strict=False)
4339
pipe = pipe.to('cuda')
4440

41+
4542
depth_estimator = DPTForDepthEstimation.from_pretrained(
4643
'Intel/dpt-hybrid-midas').to('cuda')
4744
feature_extractor = DPTFeatureExtractor.from_pretrained(
@@ -51,7 +48,7 @@
5148

5249
global current_control
5350
current_control = 'canny'
54-
controlnet_conditioning_scale = 0.5
51+
# controlnet_conditioning_scale = 0.8
5552

5653

5754
def set_seed(seed):
@@ -94,8 +91,8 @@ def add_task(prompt, negative_prompt, control_type):
9491
elif control_type == 'shape-guided':
9592
promptA = prompt + ' P_shape'
9693
promptB = prompt + ' P_ctxt'
97-
negative_promptA = negative_prompt + ' P_shape'
98-
negative_promptB = negative_prompt + ' P_ctxt'
94+
negative_promptA = negative_prompt
95+
negative_promptB = negative_prompt
9996
elif control_type == 'image-outpainting':
10097
promptA = prompt + ' P_ctxt'
10198
promptB = prompt + ' P_ctxt'
@@ -104,18 +101,18 @@ def add_task(prompt, negative_prompt, control_type):
104101
else:
105102
promptA = prompt + ' P_obj'
106103
promptB = prompt + ' P_obj'
107-
negative_promptA = negative_prompt + ' P_obj'
108-
negative_promptB = negative_prompt + ' P_obj'
104+
negative_promptA = negative_prompt
105+
negative_promptB = negative_prompt
109106

110107
return promptA, promptB, negative_promptA, negative_promptB
111108

112109

110+
113111
def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed,
114-
negative_prompt, task, vertical_expansion_ratio,
115-
horizontal_expansion_ratio):
112+
negative_prompt, task,vertical_expansion_ratio,horizontal_expansion_ratio):
116113
size1, size2 = input_image['image'].convert('RGB').size
117114

118-
if task != 'image-outpainting':
115+
if task!='image-outpainting':
119116
if size1 < size2:
120117
input_image['image'] = input_image['image'].convert('RGB').resize(
121118
(640, int(size2 / size1 * 640)))
@@ -130,44 +127,34 @@ def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed,
130127
input_image['image'] = input_image['image'].convert('RGB').resize(
131128
(int(size1 / size2 * 512), 512))
132129

133-
if (vertical_expansion_ratio is not None) and (horizontal_expansion_ratio
134-
is not None): # noqa
135-
o_W, o_H = input_image['image'].convert('RGB').size
136-
c_W = int(horizontal_expansion_ratio * o_W)
137-
c_H = int(vertical_expansion_ratio * o_H)
130+
if vertical_expansion_ratio!=None and horizontal_expansion_ratio!=None:
131+
o_W,o_H = input_image['image'].convert('RGB').size
132+
c_W = int(horizontal_expansion_ratio*o_W)
133+
c_H = int(vertical_expansion_ratio*o_H)
138134

139-
expand_img = np.ones((c_H, c_W, 3), dtype=np.uint8) * 127
135+
expand_img = np.ones((c_H, c_W,3), dtype=np.uint8)*127
140136
original_img = np.array(input_image['image'])
141-
expand_img[int((c_H - o_H) / 2.0):int((c_H - o_H) / 2.0) + o_H,
142-
int((c_W - o_W) / 2.0):int((c_W - o_W) / 2.0) +
143-
o_W, :] = original_img
137+
expand_img[int((c_H-o_H)/2.0):int((c_H-o_H)/2.0)+o_H,int((c_W-o_W)/2.0):int((c_W-o_W)/2.0)+o_W,:] = original_img
144138

145139
blurry_gap = 10
146140

147-
expand_mask = np.ones((c_H, c_W, 3), dtype=np.uint8) * 255
148-
if vertical_expansion_ratio == 1 and horizontal_expansion_ratio != 1:
149-
expand_mask[int((c_H - o_H) / 2.0):int((c_H - o_H) / 2.0) + o_H,
150-
int((c_W - o_W) / 2.0) +
151-
blurry_gap:int((c_W - o_W) / 2.0) + o_W -
152-
blurry_gap, :] = 0
153-
elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio != 1:
154-
expand_mask[int((c_H - o_H) / 2.0) +
155-
blurry_gap:int((c_H - o_H) / 2.0) + o_H - blurry_gap,
156-
int((c_W - o_W) / 2.0) +
157-
blurry_gap:int((c_W - o_W) / 2.0) + o_W -
158-
blurry_gap, :] = 0
159-
elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio == 1:
160-
expand_mask[int((c_H - o_H) / 2.0) +
161-
blurry_gap:int((c_H - o_H) / 2.0) + o_H - blurry_gap,
162-
int((c_W - o_W) /
163-
2.0):int((c_W - o_W) / 2.0) + o_W, :] = 0
141+
expand_mask = np.ones((c_H, c_W,3), dtype=np.uint8)*255
142+
if vertical_expansion_ratio == 1 and horizontal_expansion_ratio!=1:
143+
expand_mask[int((c_H-o_H)/2.0):int((c_H-o_H)/2.0)+o_H,int((c_W-o_W)/2.0)+blurry_gap:int((c_W-o_W)/2.0)+o_W-blurry_gap,:] = 0 #noqa
144+
elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio!=1:
145+
expand_mask[int((c_H-o_H)/2.0)+blurry_gap:int((c_H-o_H)/2.0)+o_H-blurry_gap,int((c_W-o_W)/2.0)+blurry_gap:int((c_W-o_W)/2.0)+o_W-blurry_gap,:] = 0 #noqa
146+
elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio==1:
147+
expand_mask[int((c_H-o_H)/2.0)+blurry_gap:int((c_H-o_H)/2.0)+o_H-blurry_gap,int((c_W-o_W)/2.0):int((c_W-o_W)/2.0)+o_W,:] = 0 #noqa
164148

165149
input_image['image'] = Image.fromarray(expand_img)
166150
input_image['mask'] = Image.fromarray(expand_mask)
167151

152+
153+
154+
168155
promptA, promptB, negative_promptA, negative_promptB = add_task(
169156
prompt, negative_prompt, task)
170-
# print(promptA, promptB, negative_promptA, negative_promptB)
157+
print(promptA, promptB, negative_promptA, negative_promptB)
171158
img = np.array(input_image['image'].convert('RGB'))
172159

173160
W = int(np.shape(img)[0] - np.shape(img)[0] % 8)
@@ -191,8 +178,8 @@ def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed,
191178
num_inference_steps=ddim_steps).images[0]
192179
mask_np = np.array(input_image['mask'].convert('RGB'))
193180
red = np.array(result).astype('float') * 1
194-
red[:, :, 0] = 0
195-
red[:, :, 2] = 180.0
181+
red[:, :, 0] = 180.0
182+
red[:, :, 2] = 0
196183
red[:, :, 1] = 0
197184
result_m = np.array(result)
198185
result_m = Image.fromarray(
@@ -208,15 +195,17 @@ def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed,
208195

209196
dict_res = [input_image['mask'].convert('RGB'), result_m]
210197

211-
return result_paste, dict_res
198+
dict_out = [input_image['image'].convert('RGB'), result_paste]
199+
200+
return dict_out, dict_res
212201

213202

214203
def predict_controlnet(input_image, input_control_image, control_type, prompt,
215-
ddim_steps, scale, seed, negative_prompt):
204+
ddim_steps, scale, seed, negative_prompt,controlnet_conditioning_scale):
216205
promptA = prompt + ' P_obj'
217206
promptB = prompt + ' P_obj'
218-
negative_promptA = negative_prompt + ' P_obj'
219-
negative_promptB = negative_prompt + ' P_obj'
207+
negative_promptA = negative_prompt
208+
negative_promptB = negative_prompt
220209
size1, size2 = input_image['image'].convert('RGB').size
221210

222211
if size1 < size2:
@@ -289,6 +278,7 @@ def predict_controlnet(input_image, input_control_image, control_type, prompt,
289278
width=H,
290279
height=W,
291280
guidance_scale=scale,
281+
controlnet_conditioning_scale = controlnet_conditioning_scale,
292282
num_inference_steps=ddim_steps).images[0]
293283
red = np.array(result).astype('float') * 1
294284
red[:, :, 0] = 180.0
@@ -307,29 +297,29 @@ def predict_controlnet(input_image, input_control_image, control_type, prompt,
307297
ours_np = np.asarray(result) / 255.0
308298
ours_np = ours_np * m_img + (1 - m_img) * img_np
309299
result_paste = Image.fromarray(np.uint8(ours_np * 255))
310-
return result_paste, [controlnet_image, result_m]
300+
return [input_image['image'].convert('RGB'), result_paste], [controlnet_image, result_m]
311301

312302

313303
def infer(input_image, text_guided_prompt, text_guided_negative_prompt,
314304
shape_guided_prompt, shape_guided_negative_prompt, fitting_degree,
315305
ddim_steps, scale, seed, task, enable_control, input_control_image,
316-
control_type, vertical_expansion_ratio, horizontal_expansion_ratio,
317-
outpaint_prompt, outpaint_negative_prompt):
306+
control_type,vertical_expansion_ratio,horizontal_expansion_ratio,outpaint_prompt,
307+
outpaint_negative_prompt,controlnet_conditioning_scale,removal_prompt,
308+
removal_negative_prompt):
318309
if task == 'text-guided':
319310
prompt = text_guided_prompt
320311
negative_prompt = text_guided_negative_prompt
321312
elif task == 'shape-guided':
322313
prompt = shape_guided_prompt
323314
negative_prompt = shape_guided_negative_prompt
324315
elif task == 'object-removal':
325-
prompt = ''
326-
negative_prompt = ''
316+
prompt = removal_prompt
317+
negative_prompt = removal_negative_prompt
327318
elif task == 'image-outpainting':
328319
prompt = outpaint_prompt
329320
negative_prompt = outpaint_negative_prompt
330321
return predict(input_image, prompt, fitting_degree, ddim_steps, scale,
331-
seed, negative_prompt, task, vertical_expansion_ratio,
332-
horizontal_expansion_ratio)
322+
seed, negative_prompt, task,vertical_expansion_ratio,horizontal_expansion_ratio)
333323
else:
334324
task = 'text-guided'
335325
prompt = text_guided_prompt
@@ -338,10 +328,10 @@ def infer(input_image, text_guided_prompt, text_guided_negative_prompt,
338328
if enable_control and task == 'text-guided':
339329
return predict_controlnet(input_image, input_control_image,
340330
control_type, prompt, ddim_steps, scale,
341-
seed, negative_prompt)
331+
seed, negative_prompt,controlnet_conditioning_scale)
342332
else:
343333
return predict(input_image, prompt, fitting_degree, ddim_steps, scale,
344-
seed, negative_prompt, task, None, None)
334+
seed, negative_prompt, task,None,None)
345335

346336

347337
def select_tab_text_guided():
@@ -351,7 +341,6 @@ def select_tab_text_guided():
351341
def select_tab_object_removal():
352342
return 'object-removal'
353343

354-
355344
def select_tab_image_outpainting():
356345
return 'image-outpainting'
357346

@@ -371,16 +360,16 @@ def select_tab_shape_guided():
371360
"<a href='https://arxiv.org/abs/2312.03594/'>Paper</a> &ensp;"
372361
"<a href='https://github.com/open-mmlab/mmagic/tree/main/projects/powerpaint'>Code</a> </font></div>" # noqa
373362
)
374-
363+
with gr.Row():
364+
gr.Markdown(
365+
"**Note:** Due to network-related factors, the page may experience occasional bugs! If the inpainting results deviate significantly from expectations, consider toggling between task options to refresh the content." # noqa
366+
)
375367
with gr.Row():
376368
with gr.Column():
377369
gr.Markdown('### Input image and draw mask')
378370
input_image = gr.Image(source='upload', tool='sketch', type='pil')
379371

380-
task = gr.Radio([
381-
'text-guided', 'object-removal', 'shape-guided',
382-
'image-outpainting'
383-
],
372+
task = gr.Radio(['text-guided', 'object-removal', 'shape-guided', 'image-outpainting'],
384373
show_label=False,
385374
visible=False)
386375

@@ -397,6 +386,13 @@ def select_tab_shape_guided():
397386
enable_control = gr.Checkbox(
398387
label='Enable controlnet',
399388
info='Enable this if you want to use controlnet')
389+
controlnet_conditioning_scale = gr.Slider(
390+
label='controlnet conditioning scale',
391+
minimum=0,
392+
maximum=1,
393+
step=0.05,
394+
value=0.5,
395+
)
400396
control_type = gr.Radio(['canny', 'pose', 'depth', 'hed'],
401397
label='Control type')
402398
input_control_image = gr.Image(source='upload', type='pil')
@@ -408,7 +404,13 @@ def select_tab_shape_guided():
408404
enable_object_removal = gr.Checkbox(
409405
label='Enable object removal inpainting',
410406
value=True,
407+
info='The recommended configuration for the Guidance Scale is 10 or higher. \
408+
If undesired objects appear in the masked area, \
409+
you can address this by specifically increasing the Guidance Scale.',
411410
interactive=False)
411+
removal_prompt = gr.Textbox(label='Prompt')
412+
removal_negative_prompt = gr.Textbox(
413+
label='negative_prompt')
412414
tab_object_removal.select(
413415
fn=select_tab_object_removal, inputs=None, outputs=task)
414416

@@ -417,6 +419,9 @@ def select_tab_shape_guided():
417419
enable_object_removal = gr.Checkbox(
418420
label='Enable image outpainting',
419421
value=True,
422+
info='The recommended configuration for the Guidance Scale is 10 or higher. \
423+
If unwanted random objects appear in the extended image region, \
424+
you can enhance the cleanliness of the extension area by increasing the Guidance Scale.',
420425
interactive=False)
421426
outpaint_prompt = gr.Textbox(label='Outpainting_prompt')
422427
outpaint_negative_prompt = gr.Textbox(
@@ -463,10 +468,7 @@ def select_tab_shape_guided():
463468
label='Steps', minimum=1, maximum=50, value=45, step=1)
464469
scale = gr.Slider(
465470
label='Guidance Scale',
466-
info='For object removal, \
467-
it is recommended to set the value at 10 or above, \
468-
while for image outpainting, \
469-
it is advisable to set it at 18 or above.',
471+
info='For object removal and image outpainting, it is recommended to set the value at 10 or above.', #noqa
470472
minimum=0.1,
471473
maximum=30.0,
472474
value=7.5,
@@ -480,19 +482,21 @@ def select_tab_shape_guided():
480482
)
481483
with gr.Column():
482484
gr.Markdown('### Inpainting result')
483-
inpaint_result = gr.Image()
485+
inpaint_result = gr.Gallery(
486+
label='Generated images', show_label=False, columns=2)
484487
gr.Markdown('### Mask')
485488
gallery = gr.Gallery(
486-
label='Generated images', show_label=False, columns=2)
489+
label='Generated masks', show_label=False, columns=2)
487490

488491
run_button.click(
489492
fn=infer,
490493
inputs=[
491494
input_image, text_guided_prompt, text_guided_negative_prompt,
492495
shape_guided_prompt, shape_guided_negative_prompt, fitting_degree,
493496
ddim_steps, scale, seed, task, enable_control, input_control_image,
494-
control_type, vertical_expansion_ratio, horizontal_expansion_ratio,
495-
outpaint_prompt, outpaint_negative_prompt
497+
control_type,vertical_expansion_ratio,horizontal_expansion_ratio,
498+
outpaint_prompt,outpaint_negative_prompt,controlnet_conditioning_scale,
499+
removal_prompt,removal_negative_prompt
496500
],
497501
outputs=[inpaint_result, gallery])
498502

0 commit comments

Comments
 (0)