11
11
StableDiffusionInpaintPipeline as Pipeline
12
12
from pipeline .pipeline_PowerPaint_ControlNet import \
13
13
StableDiffusionControlNetInpaintPipeline as controlnetPipeline
14
- from safetensors .torch import load_file
15
14
from transformers import DPTFeatureExtractor , DPTForDepthEstimation
16
15
from utils .utils import TokenizerWrapper , add_tokens
17
16
21
20
global pipe
22
21
pipe = Pipeline .from_pretrained (
23
22
'runwayml/stable-diffusion-inpainting' ,
24
- torch_dtype = weight_dtype ,
25
- safety_checker = None )
23
+ torch_dtype = weight_dtype )
26
24
pipe .tokenizer = TokenizerWrapper (
27
25
from_pretrained = 'runwayml/stable-diffusion-v1-5' ,
28
26
subfolder = 'tokenizer' ,
34
32
placeholder_tokens = ['P_ctxt' , 'P_shape' , 'P_obj' ],
35
33
initialize_tokens = ['a' , 'a' , 'a' ],
36
34
num_vectors_per_token = 10 )
37
- pipe .unet .load_state_dict (
38
- load_file (
39
- './models/unet/diffusion_pytorch_model.safetensors' , device = 'cuda' ),
40
- strict = False )
41
- pipe .text_encoder .load_state_dict (
42
- torch .load ('./models/text_encoder/pytorch_model.bin' ), strict = False )
35
+
36
+ from safetensors .torch import load_model
37
+ load_model (pipe .unet , "./models/unet/diffusion_pytorch_model.safetensors" )
38
+ pipe .text_encoder .load_state_dict (torch .load ("./models/text_encoder/pytorch_model.bin" ), strict = False )
43
39
pipe = pipe .to ('cuda' )
44
40
41
+
45
42
depth_estimator = DPTForDepthEstimation .from_pretrained (
46
43
'Intel/dpt-hybrid-midas' ).to ('cuda' )
47
44
feature_extractor = DPTFeatureExtractor .from_pretrained (
51
48
52
49
global current_control
53
50
current_control = 'canny'
54
- controlnet_conditioning_scale = 0.5
51
+ # controlnet_conditioning_scale = 0.8
55
52
56
53
57
54
def set_seed (seed ):
@@ -94,8 +91,8 @@ def add_task(prompt, negative_prompt, control_type):
94
91
elif control_type == 'shape-guided' :
95
92
promptA = prompt + ' P_shape'
96
93
promptB = prompt + ' P_ctxt'
97
- negative_promptA = negative_prompt + ' P_shape'
98
- negative_promptB = negative_prompt + ' P_ctxt'
94
+ negative_promptA = negative_prompt
95
+ negative_promptB = negative_prompt
99
96
elif control_type == 'image-outpainting' :
100
97
promptA = prompt + ' P_ctxt'
101
98
promptB = prompt + ' P_ctxt'
@@ -104,18 +101,18 @@ def add_task(prompt, negative_prompt, control_type):
104
101
else :
105
102
promptA = prompt + ' P_obj'
106
103
promptB = prompt + ' P_obj'
107
- negative_promptA = negative_prompt + ' P_obj'
108
- negative_promptB = negative_prompt + ' P_obj'
104
+ negative_promptA = negative_prompt
105
+ negative_promptB = negative_prompt
109
106
110
107
return promptA , promptB , negative_promptA , negative_promptB
111
108
112
109
110
+
113
111
def predict (input_image , prompt , fitting_degree , ddim_steps , scale , seed ,
114
- negative_prompt , task , vertical_expansion_ratio ,
115
- horizontal_expansion_ratio ):
112
+ negative_prompt , task ,vertical_expansion_ratio ,horizontal_expansion_ratio ):
116
113
size1 , size2 = input_image ['image' ].convert ('RGB' ).size
117
114
118
- if task != 'image-outpainting' :
115
+ if task != 'image-outpainting' :
119
116
if size1 < size2 :
120
117
input_image ['image' ] = input_image ['image' ].convert ('RGB' ).resize (
121
118
(640 , int (size2 / size1 * 640 )))
@@ -130,44 +127,34 @@ def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed,
130
127
input_image ['image' ] = input_image ['image' ].convert ('RGB' ).resize (
131
128
(int (size1 / size2 * 512 ), 512 ))
132
129
133
- if (vertical_expansion_ratio is not None ) and (horizontal_expansion_ratio
134
- is not None ): # noqa
135
- o_W , o_H = input_image ['image' ].convert ('RGB' ).size
136
- c_W = int (horizontal_expansion_ratio * o_W )
137
- c_H = int (vertical_expansion_ratio * o_H )
130
+ if vertical_expansion_ratio != None and horizontal_expansion_ratio != None :
131
+ o_W ,o_H = input_image ['image' ].convert ('RGB' ).size
132
+ c_W = int (horizontal_expansion_ratio * o_W )
133
+ c_H = int (vertical_expansion_ratio * o_H )
138
134
139
- expand_img = np .ones ((c_H , c_W , 3 ), dtype = np .uint8 ) * 127
135
+ expand_img = np .ones ((c_H , c_W ,3 ), dtype = np .uint8 )* 127
140
136
original_img = np .array (input_image ['image' ])
141
- expand_img [int ((c_H - o_H ) / 2.0 ):int ((c_H - o_H ) / 2.0 ) + o_H ,
142
- int ((c_W - o_W ) / 2.0 ):int ((c_W - o_W ) / 2.0 ) +
143
- o_W , :] = original_img
137
+ expand_img [int ((c_H - o_H )/ 2.0 ):int ((c_H - o_H )/ 2.0 )+ o_H ,int ((c_W - o_W )/ 2.0 ):int ((c_W - o_W )/ 2.0 )+ o_W ,:] = original_img
144
138
145
139
blurry_gap = 10
146
140
147
- expand_mask = np .ones ((c_H , c_W , 3 ), dtype = np .uint8 ) * 255
148
- if vertical_expansion_ratio == 1 and horizontal_expansion_ratio != 1 :
149
- expand_mask [int ((c_H - o_H ) / 2.0 ):int ((c_H - o_H ) / 2.0 ) + o_H ,
150
- int ((c_W - o_W ) / 2.0 ) +
151
- blurry_gap :int ((c_W - o_W ) / 2.0 ) + o_W -
152
- blurry_gap , :] = 0
153
- elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio != 1 :
154
- expand_mask [int ((c_H - o_H ) / 2.0 ) +
155
- blurry_gap :int ((c_H - o_H ) / 2.0 ) + o_H - blurry_gap ,
156
- int ((c_W - o_W ) / 2.0 ) +
157
- blurry_gap :int ((c_W - o_W ) / 2.0 ) + o_W -
158
- blurry_gap , :] = 0
159
- elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio == 1 :
160
- expand_mask [int ((c_H - o_H ) / 2.0 ) +
161
- blurry_gap :int ((c_H - o_H ) / 2.0 ) + o_H - blurry_gap ,
162
- int ((c_W - o_W ) /
163
- 2.0 ):int ((c_W - o_W ) / 2.0 ) + o_W , :] = 0
141
+ expand_mask = np .ones ((c_H , c_W ,3 ), dtype = np .uint8 )* 255
142
+ if vertical_expansion_ratio == 1 and horizontal_expansion_ratio != 1 :
143
+ expand_mask [int ((c_H - o_H )/ 2.0 ):int ((c_H - o_H )/ 2.0 )+ o_H ,int ((c_W - o_W )/ 2.0 )+ blurry_gap :int ((c_W - o_W )/ 2.0 )+ o_W - blurry_gap ,:] = 0 #noqa
144
+ elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio != 1 :
145
+ expand_mask [int ((c_H - o_H )/ 2.0 )+ blurry_gap :int ((c_H - o_H )/ 2.0 )+ o_H - blurry_gap ,int ((c_W - o_W )/ 2.0 )+ blurry_gap :int ((c_W - o_W )/ 2.0 )+ o_W - blurry_gap ,:] = 0 #noqa
146
+ elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio == 1 :
147
+ expand_mask [int ((c_H - o_H )/ 2.0 )+ blurry_gap :int ((c_H - o_H )/ 2.0 )+ o_H - blurry_gap ,int ((c_W - o_W )/ 2.0 ):int ((c_W - o_W )/ 2.0 )+ o_W ,:] = 0 #noqa
164
148
165
149
input_image ['image' ] = Image .fromarray (expand_img )
166
150
input_image ['mask' ] = Image .fromarray (expand_mask )
167
151
152
+
153
+
154
+
168
155
promptA , promptB , negative_promptA , negative_promptB = add_task (
169
156
prompt , negative_prompt , task )
170
- # print(promptA, promptB, negative_promptA, negative_promptB)
157
+ print (promptA , promptB , negative_promptA , negative_promptB )
171
158
img = np .array (input_image ['image' ].convert ('RGB' ))
172
159
173
160
W = int (np .shape (img )[0 ] - np .shape (img )[0 ] % 8 )
@@ -191,8 +178,8 @@ def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed,
191
178
num_inference_steps = ddim_steps ).images [0 ]
192
179
mask_np = np .array (input_image ['mask' ].convert ('RGB' ))
193
180
red = np .array (result ).astype ('float' ) * 1
194
- red [:, :, 0 ] = 0
195
- red [:, :, 2 ] = 180. 0
181
+ red [:, :, 0 ] = 180. 0
182
+ red [:, :, 2 ] = 0
196
183
red [:, :, 1 ] = 0
197
184
result_m = np .array (result )
198
185
result_m = Image .fromarray (
@@ -208,15 +195,17 @@ def predict(input_image, prompt, fitting_degree, ddim_steps, scale, seed,
208
195
209
196
dict_res = [input_image ['mask' ].convert ('RGB' ), result_m ]
210
197
211
- return result_paste , dict_res
198
+ dict_out = [input_image ['image' ].convert ('RGB' ), result_paste ]
199
+
200
+ return dict_out , dict_res
212
201
213
202
214
203
def predict_controlnet (input_image , input_control_image , control_type , prompt ,
215
- ddim_steps , scale , seed , negative_prompt ):
204
+ ddim_steps , scale , seed , negative_prompt , controlnet_conditioning_scale ):
216
205
promptA = prompt + ' P_obj'
217
206
promptB = prompt + ' P_obj'
218
- negative_promptA = negative_prompt + ' P_obj'
219
- negative_promptB = negative_prompt + ' P_obj'
207
+ negative_promptA = negative_prompt
208
+ negative_promptB = negative_prompt
220
209
size1 , size2 = input_image ['image' ].convert ('RGB' ).size
221
210
222
211
if size1 < size2 :
@@ -289,6 +278,7 @@ def predict_controlnet(input_image, input_control_image, control_type, prompt,
289
278
width = H ,
290
279
height = W ,
291
280
guidance_scale = scale ,
281
+ controlnet_conditioning_scale = controlnet_conditioning_scale ,
292
282
num_inference_steps = ddim_steps ).images [0 ]
293
283
red = np .array (result ).astype ('float' ) * 1
294
284
red [:, :, 0 ] = 180.0
@@ -307,29 +297,29 @@ def predict_controlnet(input_image, input_control_image, control_type, prompt,
307
297
ours_np = np .asarray (result ) / 255.0
308
298
ours_np = ours_np * m_img + (1 - m_img ) * img_np
309
299
result_paste = Image .fromarray (np .uint8 (ours_np * 255 ))
310
- return result_paste , [controlnet_image , result_m ]
300
+ return [ input_image [ 'image' ]. convert ( 'RGB' ), result_paste ] , [controlnet_image , result_m ]
311
301
312
302
313
303
def infer (input_image , text_guided_prompt , text_guided_negative_prompt ,
314
304
shape_guided_prompt , shape_guided_negative_prompt , fitting_degree ,
315
305
ddim_steps , scale , seed , task , enable_control , input_control_image ,
316
- control_type , vertical_expansion_ratio , horizontal_expansion_ratio ,
317
- outpaint_prompt , outpaint_negative_prompt ):
306
+ control_type ,vertical_expansion_ratio ,horizontal_expansion_ratio ,outpaint_prompt ,
307
+ outpaint_negative_prompt ,controlnet_conditioning_scale ,removal_prompt ,
308
+ removal_negative_prompt ):
318
309
if task == 'text-guided' :
319
310
prompt = text_guided_prompt
320
311
negative_prompt = text_guided_negative_prompt
321
312
elif task == 'shape-guided' :
322
313
prompt = shape_guided_prompt
323
314
negative_prompt = shape_guided_negative_prompt
324
315
elif task == 'object-removal' :
325
- prompt = ''
326
- negative_prompt = ''
316
+ prompt = removal_prompt
317
+ negative_prompt = removal_negative_prompt
327
318
elif task == 'image-outpainting' :
328
319
prompt = outpaint_prompt
329
320
negative_prompt = outpaint_negative_prompt
330
321
return predict (input_image , prompt , fitting_degree , ddim_steps , scale ,
331
- seed , negative_prompt , task , vertical_expansion_ratio ,
332
- horizontal_expansion_ratio )
322
+ seed , negative_prompt , task ,vertical_expansion_ratio ,horizontal_expansion_ratio )
333
323
else :
334
324
task = 'text-guided'
335
325
prompt = text_guided_prompt
@@ -338,10 +328,10 @@ def infer(input_image, text_guided_prompt, text_guided_negative_prompt,
338
328
if enable_control and task == 'text-guided' :
339
329
return predict_controlnet (input_image , input_control_image ,
340
330
control_type , prompt , ddim_steps , scale ,
341
- seed , negative_prompt )
331
+ seed , negative_prompt , controlnet_conditioning_scale )
342
332
else :
343
333
return predict (input_image , prompt , fitting_degree , ddim_steps , scale ,
344
- seed , negative_prompt , task , None , None )
334
+ seed , negative_prompt , task ,None ,None )
345
335
346
336
347
337
def select_tab_text_guided ():
@@ -351,7 +341,6 @@ def select_tab_text_guided():
351
341
def select_tab_object_removal ():
352
342
return 'object-removal'
353
343
354
-
355
344
def select_tab_image_outpainting ():
356
345
return 'image-outpainting'
357
346
@@ -371,16 +360,16 @@ def select_tab_shape_guided():
371
360
"<a href='https://arxiv.org/abs/2312.03594/'>Paper</a>  "
372
361
"<a href='https://github.com/open-mmlab/mmagic/tree/main/projects/powerpaint'>Code</a> </font></div>" # noqa
373
362
)
374
-
363
+ with gr .Row ():
364
+ gr .Markdown (
365
+ "**Note:** Due to network-related factors, the page may experience occasional bugs! If the inpainting results deviate significantly from expectations, consider toggling between task options to refresh the content." # noqa
366
+ )
375
367
with gr .Row ():
376
368
with gr .Column ():
377
369
gr .Markdown ('### Input image and draw mask' )
378
370
input_image = gr .Image (source = 'upload' , tool = 'sketch' , type = 'pil' )
379
371
380
- task = gr .Radio ([
381
- 'text-guided' , 'object-removal' , 'shape-guided' ,
382
- 'image-outpainting'
383
- ],
372
+ task = gr .Radio (['text-guided' , 'object-removal' , 'shape-guided' , 'image-outpainting' ],
384
373
show_label = False ,
385
374
visible = False )
386
375
@@ -397,6 +386,13 @@ def select_tab_shape_guided():
397
386
enable_control = gr .Checkbox (
398
387
label = 'Enable controlnet' ,
399
388
info = 'Enable this if you want to use controlnet' )
389
+ controlnet_conditioning_scale = gr .Slider (
390
+ label = 'controlnet conditioning scale' ,
391
+ minimum = 0 ,
392
+ maximum = 1 ,
393
+ step = 0.05 ,
394
+ value = 0.5 ,
395
+ )
400
396
control_type = gr .Radio (['canny' , 'pose' , 'depth' , 'hed' ],
401
397
label = 'Control type' )
402
398
input_control_image = gr .Image (source = 'upload' , type = 'pil' )
@@ -408,7 +404,13 @@ def select_tab_shape_guided():
408
404
enable_object_removal = gr .Checkbox (
409
405
label = 'Enable object removal inpainting' ,
410
406
value = True ,
407
+ info = 'The recommended configuration for the Guidance Scale is 10 or higher. \
408
+ If undesired objects appear in the masked area, \
409
+ you can address this by specifically increasing the Guidance Scale.' ,
411
410
interactive = False )
411
+ removal_prompt = gr .Textbox (label = 'Prompt' )
412
+ removal_negative_prompt = gr .Textbox (
413
+ label = 'negative_prompt' )
412
414
tab_object_removal .select (
413
415
fn = select_tab_object_removal , inputs = None , outputs = task )
414
416
@@ -417,6 +419,9 @@ def select_tab_shape_guided():
417
419
enable_object_removal = gr .Checkbox (
418
420
label = 'Enable image outpainting' ,
419
421
value = True ,
422
+ info = 'The recommended configuration for the Guidance Scale is 10 or higher. \
423
+ If unwanted random objects appear in the extended image region, \
424
+ you can enhance the cleanliness of the extension area by increasing the Guidance Scale.' ,
420
425
interactive = False )
421
426
outpaint_prompt = gr .Textbox (label = 'Outpainting_prompt' )
422
427
outpaint_negative_prompt = gr .Textbox (
@@ -463,10 +468,7 @@ def select_tab_shape_guided():
463
468
label = 'Steps' , minimum = 1 , maximum = 50 , value = 45 , step = 1 )
464
469
scale = gr .Slider (
465
470
label = 'Guidance Scale' ,
466
- info = 'For object removal, \
467
- it is recommended to set the value at 10 or above, \
468
- while for image outpainting, \
469
- it is advisable to set it at 18 or above.' ,
471
+ info = 'For object removal and image outpainting, it is recommended to set the value at 10 or above.' , #noqa
470
472
minimum = 0.1 ,
471
473
maximum = 30.0 ,
472
474
value = 7.5 ,
@@ -480,19 +482,21 @@ def select_tab_shape_guided():
480
482
)
481
483
with gr .Column ():
482
484
gr .Markdown ('### Inpainting result' )
483
- inpaint_result = gr .Image ()
485
+ inpaint_result = gr .Gallery (
486
+ label = 'Generated images' , show_label = False , columns = 2 )
484
487
gr .Markdown ('### Mask' )
485
488
gallery = gr .Gallery (
486
- label = 'Generated images ' , show_label = False , columns = 2 )
489
+ label = 'Generated masks ' , show_label = False , columns = 2 )
487
490
488
491
run_button .click (
489
492
fn = infer ,
490
493
inputs = [
491
494
input_image , text_guided_prompt , text_guided_negative_prompt ,
492
495
shape_guided_prompt , shape_guided_negative_prompt , fitting_degree ,
493
496
ddim_steps , scale , seed , task , enable_control , input_control_image ,
494
- control_type , vertical_expansion_ratio , horizontal_expansion_ratio ,
495
- outpaint_prompt , outpaint_negative_prompt
497
+ control_type ,vertical_expansion_ratio ,horizontal_expansion_ratio ,
498
+ outpaint_prompt ,outpaint_negative_prompt ,controlnet_conditioning_scale ,
499
+ removal_prompt ,removal_negative_prompt
496
500
],
497
501
outputs = [inpaint_result , gallery ])
498
502
0 commit comments