Skip to content

Commit c80d903

Browse files
committed
fix
1 parent bf8a59d commit c80d903

File tree

7 files changed

+395
-348
lines changed

7 files changed

+395
-348
lines changed

CaptchaCracker/__init__.py

Lines changed: 5 additions & 337 deletions
Original file line numberDiff line numberDiff line change
@@ -1,344 +1,12 @@
1+
2+
from .core import CreateModel, ApplyModel
3+
14
## Information
2-
__version__ = """0.0.4"""
5+
__version__ = """0.0.5"""
36
__info__ = """
47
- Author : Wooil Jeong
58
69
- Github : https://github.com/WooilJeong/
710
- Blog : https://wooiljeong.github.io\
811
"""
9-
__all__ = ["__version__", "__info__"]
10-
11-
import os
12-
import glob
13-
import numpy as np
14-
15-
from pathlib import Path
16-
from collections import Counter
17-
18-
import tensorflow as tf
19-
from tensorflow import keras
20-
from tensorflow.keras import layers
21-
22-
class CTCLayer(layers.Layer):
23-
def __init__(self, name=None):
24-
super().__init__(name=name)
25-
self.loss_fn = keras.backend.ctc_batch_cost
26-
27-
def call(self, y_true, y_pred):
28-
# Compute the training-time loss value and add it
29-
# to the layer using `self.add_loss()`.
30-
batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
31-
input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
32-
label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")
33-
34-
input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
35-
label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
36-
37-
loss = self.loss_fn(y_true, y_pred, input_length, label_length)
38-
self.add_loss(loss)
39-
40-
# At test time, just return the computed predictions
41-
return y_pred
42-
43-
44-
45-
class CreateModel:
46-
47-
def __init__(self, train_img_path, img_width=200, img_height=50):
48-
# 이미지 크기
49-
self.img_width = img_width
50-
self.img_height = img_height
51-
# 학습 이미지 파일 경로 리스트
52-
self.images = sorted(train_img_path)
53-
# 학습 이미지 파일 라벨 리스트
54-
self.labels = [img.split(os.path.sep)[-1].split(".png")[0] for img in self.images]
55-
# 라벨 SET
56-
self.characters = set(char for label in self.labels for char in label)
57-
# 라벨 최대 길이
58-
self.max_length = max([len(label) for label in self.labels])
59-
60-
# Mapping characters to integers
61-
self.char_to_num = layers.experimental.preprocessing.StringLookup(
62-
vocabulary=sorted(self.characters), num_oov_indices=0, mask_token=None
63-
)
64-
# Mapping integers back to original characters
65-
self.num_to_char = layers.experimental.preprocessing.StringLookup(
66-
vocabulary=self.char_to_num.get_vocabulary(), mask_token=None, invert=True
67-
)
68-
69-
def train_model(self, epochs=100):
70-
# 학습 및 검증을 위한 배치 사이즈 정의
71-
batch_size = 16
72-
# 다운 샘플링 요인 수 (Conv: 2, Pooling: 2)
73-
downsample_factor = 4
74-
75-
# Splitting data into training and validation sets
76-
x_train, x_valid, y_train, y_valid = self.split_data(np.array(self.images), np.array(self.labels))
77-
78-
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
79-
train_dataset = (
80-
train_dataset.map(
81-
self.encode_single_sample, num_parallel_calls=tf.data.experimental.AUTOTUNE
82-
)
83-
.batch(batch_size)
84-
.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
85-
)
86-
87-
validation_dataset = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
88-
validation_dataset = (
89-
validation_dataset.map(
90-
self.encode_single_sample, num_parallel_calls=tf.data.experimental.AUTOTUNE
91-
)
92-
.batch(batch_size)
93-
.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
94-
)
95-
96-
# Get the model
97-
model = self.build_model()
98-
99-
100-
early_stopping_patience = 10
101-
# Add early stopping
102-
early_stopping = keras.callbacks.EarlyStopping(
103-
monitor="val_loss", patience=early_stopping_patience, restore_best_weights=True
104-
)
105-
106-
# Train the model
107-
history = model.fit(
108-
train_dataset,
109-
validation_data=validation_dataset,
110-
epochs=epochs,
111-
callbacks=[early_stopping],
112-
)
113-
114-
return model
115-
116-
117-
def encode_single_sample(self, img_path, label):
118-
# 1. Read image
119-
img = tf.io.read_file(img_path)
120-
# 2. Decode and convert to grayscale
121-
img = tf.io.decode_png(img, channels=1)
122-
# 3. Convert to float32 in [0, 1] range
123-
img = tf.image.convert_image_dtype(img, tf.float32)
124-
# 4. Resize to the desired size
125-
img = tf.image.resize(img, [self.img_height, self.img_width])
126-
# 5. Transpose the image because we want the time
127-
# dimension to correspond to the width of the image.
128-
img = tf.transpose(img, perm=[1, 0, 2])
129-
# 6. Map the characters in label to numbers
130-
label = self.char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
131-
# 7. Return a dict as our model is expecting two inputs
132-
return {"image": img, "label": label}
133-
134-
def build_model(self):
135-
# Inputs to the model
136-
input_img = layers.Input(
137-
shape=(self.img_width, self.img_height, 1), name="image", dtype="float32"
138-
)
139-
labels = layers.Input(name="label", shape=(None,), dtype="float32")
140-
141-
# First conv block
142-
x = layers.Conv2D(
143-
32,
144-
(3, 3),
145-
activation="relu",
146-
kernel_initializer="he_normal",
147-
padding="same",
148-
name="Conv1",
149-
)(input_img)
150-
x = layers.MaxPooling2D((2, 2), name="pool1")(x)
151-
152-
# Second conv block
153-
x = layers.Conv2D(
154-
64,
155-
(3, 3),
156-
activation="relu",
157-
kernel_initializer="he_normal",
158-
padding="same",
159-
name="Conv2",
160-
)(x)
161-
x = layers.MaxPooling2D((2, 2), name="pool2")(x)
162-
163-
# We have used two max pool with pool size and strides 2.
164-
# Hence, downsampled feature maps are 4x smaller. The number of
165-
# filters in the last layer is 64. Reshape accordingly before
166-
# passing the output to the RNN part of the model
167-
new_shape = ((self.img_width // 4), (self.img_height // 4) * 64)
168-
x = layers.Reshape(target_shape=new_shape, name="reshape")(x)
169-
x = layers.Dense(64, activation="relu", name="dense1")(x)
170-
x = layers.Dropout(0.2)(x)
171-
172-
# RNNs
173-
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.25))(x)
174-
x = layers.Bidirectional(layers.LSTM(64, return_sequences=True, dropout=0.25))(x)
175-
176-
# Output layer
177-
x = layers.Dense(len(self.characters) + 1, activation="softmax", name="dense2")(x)
178-
179-
# Add CTC layer for calculating CTC loss at each step
180-
output = CTCLayer(name="ctc_loss")(labels, x)
181-
182-
# Define the model
183-
model = keras.models.Model(
184-
inputs=[input_img, labels], outputs=output, name="ocr_model_v1"
185-
)
186-
# Optimizer
187-
opt = keras.optimizers.Adam()
188-
# Compile the model and return
189-
model.compile(optimizer=opt)
190-
return model
191-
192-
def split_data(self, images, labels, train_size=0.9, shuffle=True):
193-
# 1. Get the total size of the dataset
194-
size = len(images)
195-
# 2. Make an indices array and shuffle it, if required
196-
indices = np.arange(size)
197-
if shuffle:
198-
np.random.shuffle(indices)
199-
# 3. Get the size of training samples
200-
train_samples = int(size * train_size)
201-
# 4. Split data into training and validation sets
202-
x_train, y_train = images[indices[:train_samples]], labels[indices[:train_samples]]
203-
x_valid, y_valid = images[indices[train_samples:]], labels[indices[train_samples:]]
204-
return x_train, x_valid, y_train, y_valid
205-
206-
207-
class ApplyModel:
208-
209-
def __init__(self,
210-
weights_path,
211-
img_width=200,
212-
img_height=50,
213-
max_length=6,
214-
characters={'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}):
215-
216-
self.img_width = img_width
217-
self.img_height = img_height
218-
self.max_length = max_length
219-
self.characters = characters
220-
221-
# Mapping characters to integers
222-
self.char_to_num = layers.experimental.preprocessing.StringLookup(
223-
vocabulary=sorted(self.characters), num_oov_indices=0, mask_token=None
224-
)
225-
# Mapping integers back to original characters
226-
self.num_to_char = layers.experimental.preprocessing.StringLookup(
227-
vocabulary=self.char_to_num.get_vocabulary(), mask_token=None, invert=True
228-
)
229-
# Model
230-
self.model = self.build_model()
231-
self.model.load_weights(weights_path)
232-
self.prediction_model = keras.models.Model(
233-
self.model.get_layer(name="image").input, self.model.get_layer(name="dense2").output
234-
)
235-
236-
def predict(self, target_img_path):
237-
target_img = self.encode_single_sample(target_img_path)['image']
238-
target_img = tf.reshape(target_img, shape=[1,self.img_width,self.img_height,1])
239-
pred_val = self.prediction_model.predict(target_img)
240-
pred = self.decode_batch_predictions(pred_val)[0]
241-
return pred
242-
243-
def encode_single_sample(self, img_path):
244-
# 1. Read image
245-
img = tf.io.read_file(img_path)
246-
# 2. Decode and convert to grayscale
247-
img = tf.io.decode_png(img, channels=1)
248-
# 3. Convert to float32 in [0, 1] range
249-
img = tf.image.convert_image_dtype(img, tf.float32)
250-
# 4. Resize to the desired size
251-
img = tf.image.resize(img, [self.img_height, self.img_width])
252-
# 5. Transpose the image because we want the time
253-
# dimension to correspond to the width of the image.
254-
img = tf.transpose(img, perm=[1, 0, 2])
255-
# 6. Map the characters in label to numbers
256-
# 7. Return a dict as our model is expecting two inputs
257-
return {"image": img}
258-
259-
def build_model(self):
260-
# Inputs to the model
261-
input_img = layers.Input(
262-
shape=(self.img_width, self.img_height, 1), name="image", dtype="float32"
263-
)
264-
labels = layers.Input(name="label", shape=(None,), dtype="float32")
265-
266-
# First conv block
267-
x = layers.Conv2D(
268-
32,
269-
(3, 3),
270-
activation="relu",
271-
kernel_initializer="he_normal",
272-
padding="same",
273-
name="Conv1",
274-
)(input_img)
275-
x = layers.MaxPooling2D((2, 2), name="pool1")(x)
276-
277-
# Second conv block
278-
x = layers.Conv2D(
279-
64,
280-
(3, 3),
281-
activation="relu",
282-
kernel_initializer="he_normal",
283-
padding="same",
284-
name="Conv2",
285-
)(x)
286-
x = layers.MaxPooling2D((2, 2), name="pool2")(x)
287-
288-
# We have used two max pool with pool size and strides 2.
289-
# Hence, downsampled feature maps are 4x smaller. The number of
290-
# filters in the last layer is 64. Reshape accordingly before
291-
# passing the output to the RNN part of the model
292-
new_shape = ((self.img_width // 4), (self.img_height // 4) * 64)
293-
x = layers.Reshape(target_shape=new_shape, name="reshape")(x)
294-
x = layers.Dense(64, activation="relu", name="dense1")(x)
295-
x = layers.Dropout(0.2)(x)
296-
297-
# RNNs
298-
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.25))(x)
299-
x = layers.Bidirectional(layers.LSTM(64, return_sequences=True, dropout=0.25))(x)
300-
301-
# Output layer
302-
x = layers.Dense(len(self.characters) + 1, activation="softmax", name="dense2")(x)
303-
304-
# Add CTC layer for calculating CTC loss at each step
305-
output = CTCLayer(name="ctc_loss")(labels, x)
306-
307-
# Define the model
308-
model = keras.models.Model(
309-
inputs=[input_img, labels], outputs=output, name="ocr_model_v1"
310-
)
311-
# Optimizer
312-
opt = keras.optimizers.Adam()
313-
# Compile the model and return
314-
model.compile(optimizer=opt)
315-
return model
316-
317-
def split_data(self, images, labels, train_size=0.9, shuffle=True):
318-
# 1. Get the total size of the dataset
319-
size = len(images)
320-
# 2. Make an indices array and shuffle it, if required
321-
indices = np.arange(size)
322-
if shuffle:
323-
np.random.shuffle(indices)
324-
# 3. Get the size of training samples
325-
train_samples = int(size * train_size)
326-
# 4. Split data into training and validation sets
327-
x_train, y_train = images[indices[:train_samples]], labels[indices[:train_samples]]
328-
x_valid, y_valid = images[indices[train_samples:]], labels[indices[train_samples:]]
329-
return x_train, x_valid, y_train, y_valid
330-
331-
332-
# A utility function to decode the output of the network
333-
def decode_batch_predictions(self, pred):
334-
input_len = np.ones(pred.shape[0]) * pred.shape[1]
335-
# Use greedy search. For complex tasks, you can use beam search
336-
results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
337-
:, :self.max_length
338-
]
339-
# Iterate over the results and get back the text
340-
output_text = []
341-
for res in results:
342-
res = tf.strings.reduce_join(self.num_to_char(res+1)).numpy().decode("utf-8")
343-
output_text.append(res)
344-
return output_text
12+
__all__ = ["__version__", "__info__", "CreateModel", "ApplyModel"]

0 commit comments

Comments
 (0)