-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathOpen_WebUI.py
172 lines (156 loc) · 7.16 KB
/
Open_WebUI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
print("importing dependiencies")
import gradio as gr
import argparse
import torch
import sys
import base64
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
def parse_args():
parser = argparse.ArgumentParser(description='A Simple Gradio implemation for summary models')
parser.add_argument('--model', default='pszemraj/led-base-book-summary',
help='Hugging Face directory of the model to use. format: (userName/modelName)')
return parser.parse_args()
args = parse_args()
#dont know if i should use with torch.no_grad(): or not
print("loading tokenizer")
tokenizer = AutoTokenizer.from_pretrained(args.model,cache_dir="modelsCache/"+args.model.split("/")[1])
print("loading model")
model = AutoModelForSeq2SeqLM.from_pretrained(args.model,cache_dir="modelsCache/"+args.model.split("/")[1])
print("building summarizer pipeline")
summarizer = pipeline(
"summarization",
model=model,
tokenizer=tokenizer,
device=0 if torch.cuda.is_available() else -1,
)
#the different possable arguements for the loaded model. tp in this case
modelArgs=[
"max_length",
"min_length",
"temperature",
"num_beams",
"top_k",
"top_p",
"typical_p",
"num_return_sequences",
"num_beam_groups",
"diversity_penalty",
"no_repeat_ngram_size",
"encoder_no_repeat_ngram_size",
"repetition_penalty",
"length_penalty",
"early_stopping",
"renormalize_logits",
"do_sample",
"use_cache",
"remove_invalid_values",
"synced_gpus"
]
#get argument default data type (model.generate.__wrapped__.__annotations__["early_stopping"].__args__[0])
#(model.generate.__wrapped__.__annotations__[modelArgs[i]].__args__[0])
#model.generation_config.__dict__
#(type(model.generation_config.__dict__))
#if((type(model.config.__dict__[(list(model.config.__dict__.keys()))[i]])))==int:
webUiInteractions=[]
defaultModelArgsValues=[]
validModelKwargs=[]
argumentDict={}
#assigns the gradio interface type for each argument to be able to control them
#checks python version
def chooseInterface(argType):
match (argType.__name__):
case "int":
webUiInteractions.append(gr.Number(label=(validModelKwargs[i]),value=defaultModelArgsValues[i],precision=0))
case "float":
webUiInteractions.append(gr.Number(label=(validModelKwargs[i]),value=defaultModelArgsValues[i]))
case "str":
webUiInteractions.append(gr.Textbox(label=(validModelKwargs[i]),value=defaultModelArgsValues[i]))
case "bool":
webUiInteractions.append(gr.Checkbox(label=(validModelKwargs[i]),value=defaultModelArgsValues[i]))
if sys.version_info[1] == 10:
#Need to impliment case where continue is called when model.generate.__wrapped__.__annotations__[modelArgs[i]] dosent exist
for i in range(len(modelArgs)):
try:
defaultArg=model.config.__dict__[modelArgs[i]]
except:
defaultArg=None
defaultModelArgsValues.append(defaultArg)
validModelKwargs.append(modelArgs[i])
defaultModelArgsValues[validModelKwargs.index("use_cache")]=True
for i in range(len(validModelKwargs)):
argType=(model.generate.__wrapped__.__annotations__[validModelKwargs[i]].__args__[0])
chooseInterface(argType)
#case _:
#print("arg not found")
else:
for i in range(len(modelArgs)):
try:
defaultArg=model.config.__dict__[modelArgs[i]]
except:
try:
defaultArg=model.generation_config.__dict__[modelArgs[i]]
except:
continue
#dont ask
defaultModelArgsValues.append(defaultArg)
validModelKwargs.append(modelArgs[i])
defaultModelArgsValues[validModelKwargs.index("use_cache")]=True
for i in range(len(validModelKwargs)):
argType=type(model.generation_config.__dict__[validModelKwargs[i]])
chooseInterface(argType)
#if(type(model.generation_config.__dict__[validModelKwargs[i]]))==int:
#webUiInteractions.append(gr.Number(label=(validModelKwargs[i]),value=defaultModelArgsValues[i],precision=0))
#if(type(model.generation_config.__dict__[validModelKwargs[i]]))==float:
#webUiInteractions.append(gr.Number(label=(validModelKwargs[i]),value=defaultModelArgsValues[i]))
#if(type(model.generation_config.__dict__[validModelKwargs[i]]))==str:
#webUiInteractions.append(gr.Textbox(label=(validModelKwargs[i]),value=defaultModelArgsValues[i]))
#if(type(model.generation_config.__dict__[validModelKwargs[i]]))==bool:
#webUiInteractions.append(gr.Checkbox(label=(validModelKwargs[i]),value=defaultModelArgsValues[i]))
#combines the argument names (modelArgs) and the default values from the loaded model into a new dict
def largeInputPipeline(batch, newArgs):
print("using larger batch processing and attention masks")
print("preparing input...")
#with torch.no_grad(): # everything except result
inputs_dict = tokenizer(batch, padding="max_length", max_length=16384, return_tensors="pt", truncation=True)
input_ids = inputs_dict.input_ids.to("cuda")
attention_mask = inputs_dict.attention_mask.to("cuda")
global_attention_mask = torch.zeros_like(attention_mask)
# put global attention on <s> token
global_attention_mask[:, 0] = 1
print("starting summarization...")
predicted_abstract_ids = model.generate(input_ids, attention_mask=attention_mask, global_attention_mask=global_attention_mask, **newArgs)
print("preparing output...")
batchResult = tokenizer.batch_decode(predicted_abstract_ids, skip_special_tokens=True)
return batchResult
for A, B in zip(validModelKwargs, defaultModelArgsValues):
argumentDict[A] = B
#main handler function
def generateOutput(wall_of_text,txtFile,isLargeText, *newArgs):
for i in range(len(validModelKwargs)):
argumentDict[validModelKwargs[i]] = newArgs[i]
if txtFile:
#aparently gradio file system dosent retain origional binary information and converts \n into \r\n
#also gradio forces temp files for "security" and using binary mode is not supposed to do that acording to the docs
#with default settings it makes acually makes 2 identical temp files. which is stupid. atleast i cut it in half with this method -_-
text=txtFile.replace(b'\r\n',b'\n').decode()
print("|using file|")
else:
text=wall_of_text
if(isLargeText):
result=largeInputPipeline(text,argumentDict)
print(result)
result=result[0]
else:
print("starting summarization...")
#with torch.no_grad():# just result line
result=summarizer(text,**argumentDict)
print(result)
result=result[0]["summary_text"]
return (result.__str__()), len(tokenizer.tokenize(str(text)))+2, len(tokenizer.tokenize(str(result)))+2
#
inputTokens = gr.Number(label="input token count")
outputTokens = gr.Number(label="output token count")
file=gr.File(type="binary")
isBigFile=gr.Checkbox(label="check me if large text ~>10,000 words",value=False)
demo = gr.Interface(fn=generateOutput, inputs=[gr.Textbox(lines=30),file,isBigFile,*webUiInteractions], outputs=[gr.Textbox(),inputTokens,outputTokens])
demo.launch(inbrowser=True,inline=True)