1
- from decimal import Decimal # Used to offer any user-defined precision.
1
+ from decimal import Decimal
2
2
3
3
class ArithmeticEncoding :
4
4
"""
@@ -20,6 +20,10 @@ def __init__(self, frequency_table, save_stages=False):
20
20
def get_probability_table (self , frequency_table ):
21
21
"""
22
22
Calculates the probability table out of the frequency table.
23
+
24
+ frequency_table: A table of the term frequencies.
25
+
26
+ Returns the probability table.
23
27
"""
24
28
total_frequency = sum (list (frequency_table .values ()))
25
29
@@ -32,6 +36,10 @@ def get_probability_table(self, frequency_table):
32
36
def get_encoded_value (self , last_stage_probs ):
33
37
"""
34
38
After encoding the entire message, this method returns the single value that represents the entire message.
39
+
40
+ last_stage_probs: A list of the probabilities in the last stage.
41
+
42
+ Returns the minimum and maximum probabilites in the last stage in addition to the value encoding the message.
35
43
"""
36
44
last_stage_probs = list (last_stage_probs .values ())
37
45
last_stage_values = []
@@ -41,13 +49,21 @@ def get_encoded_value(self, last_stage_probs):
41
49
42
50
last_stage_min = min (last_stage_values )
43
51
last_stage_max = max (last_stage_values )
52
+ encoded_value = (last_stage_min + last_stage_max )/ 2
44
53
45
- return ( last_stage_min + last_stage_max ) / 2
54
+ return last_stage_min , last_stage_max , encoded_value
46
55
47
56
def process_stage (self , probability_table , stage_min , stage_max ):
48
57
"""
49
58
Processing a stage in the encoding/decoding process.
59
+
60
+ probability_table: The probability table.
61
+ stage_min: The minumim probability of the current stage.
62
+ stage_max: The maximum probability of the current stage.
63
+
64
+ Returns the probabilities in the stage.
50
65
"""
66
+
51
67
stage_probs = {}
52
68
stage_domain = stage_max - stage_min
53
69
for term_idx in range (len (probability_table .items ())):
@@ -60,10 +76,14 @@ def process_stage(self, probability_table, stage_min, stage_max):
60
76
61
77
def encode (self , msg , probability_table ):
62
78
"""
63
- Encodes a message.
79
+ Encodes a message using arithmetic encoding.
80
+
81
+ msg: The message to be encoded.
82
+ probability_table: The probability table.
83
+
84
+ Returns the encoder, the floating-point value representing the encoded message, and the maximum and minimum values of the interval in which the floating-point value falls.
64
85
"""
65
86
66
- # Make sure
67
87
msg = list (msg )
68
88
69
89
encoder = []
@@ -86,13 +106,98 @@ def encode(self, msg, probability_table):
86
106
if self .save_stages :
87
107
encoder .append (last_stage_probs )
88
108
89
- encoded_msg = self .get_encoded_value (last_stage_probs )
109
+ interval_min_value , interval_max_value , encoded_msg = self .get_encoded_value (last_stage_probs )
110
+
111
+ return encoded_msg , encoder , interval_min_value , interval_max_value
112
+
113
+ def process_stage_binary (self , float_interval_min , float_interval_max , stage_min_bin , stage_max_bin ):
114
+ """
115
+ Processing a stage in the encoding/decoding process.
116
+
117
+ float_interval_min: The minimum floating-point value in the interval in which the floating-point value that encodes the message is located.
118
+ float_interval_max: The maximum floating-point value in the interval in which the floating-point value that encodes the message is located.
119
+ stage_min_bin: The minimum binary number in the current stage.
120
+ stage_max_bin: The maximum binary number in the current stage.
121
+
122
+ Returns the probabilities of the terms in this stage. There are only 2 terms.
123
+ """
124
+
125
+ stage_mid_bin = stage_min_bin + "1"
126
+ stage_min_bin = stage_min_bin + "0"
127
+
128
+ stage_probs = {}
129
+ stage_probs [0 ] = [stage_min_bin , stage_mid_bin ]
130
+ stage_probs [1 ] = [stage_mid_bin , stage_max_bin ]
131
+
132
+ return stage_probs
133
+
134
+ def encode_binary (self , float_interval_min , float_interval_max ):
135
+ """
136
+ Calculates the binary code that represents the floating-point value that encodes the message.
137
+
138
+ float_interval_min: The minimum floating-point value in the interval in which the floating-point value that encodes the message is located.
139
+ float_interval_max: The maximum floating-point value in the interval in which the floating-point value that encodes the message is located.
140
+
141
+ Returns the binary code representing the encoded message.
142
+ """
143
+
144
+ binary_encoder = []
145
+ binary_code = None
146
+
147
+ stage_min_bin = "0.0"
148
+ stage_max_bin = "1.0"
90
149
91
- return encoded_msg , encoder
150
+ stage_probs = {}
151
+ stage_probs [0 ] = [stage_min_bin , "0.1" ]
152
+ stage_probs [1 ] = ["0.1" , stage_max_bin ]
153
+
154
+ while True :
155
+ if float_interval_max < bin2float (stage_probs [0 ][1 ]):
156
+ stage_min_bin = stage_probs [0 ][0 ]
157
+ stage_max_bin = stage_probs [0 ][1 ]
158
+ else :
159
+ stage_min_bin = stage_probs [1 ][0 ]
160
+ stage_max_bin = stage_probs [1 ][1 ]
161
+
162
+ if self .save_stages :
163
+ binary_encoder .append (stage_probs )
164
+
165
+ stage_probs = self .process_stage_binary (float_interval_min ,
166
+ float_interval_max ,
167
+ stage_min_bin ,
168
+ stage_max_bin )
169
+
170
+ # print(stage_probs[0][0], bin2float(stage_probs[0][0]))
171
+ # print(stage_probs[0][1], bin2float(stage_probs[0][1]))
172
+ if (bin2float (stage_probs [0 ][0 ]) >= float_interval_min ) and (bin2float (stage_probs [0 ][1 ]) < float_interval_max ):
173
+ # The binary code is found.
174
+ # print(stage_probs[0][0], bin2float(stage_probs[0][0]))
175
+ # print(stage_probs[0][1], bin2float(stage_probs[0][1]))
176
+ # print("The binary code is : ", stage_probs[0][0])
177
+ binary_code = stage_probs [0 ][0 ]
178
+ break
179
+ elif (bin2float (stage_probs [1 ][0 ]) >= float_interval_min ) and (bin2float (stage_probs [1 ][1 ]) < float_interval_max ):
180
+ # The binary code is found.
181
+ # print(stage_probs[1][0], bin2float(stage_probs[1][0]))
182
+ # print(stage_probs[1][1], bin2float(stage_probs[1][1]))
183
+ # print("The binary code is : ", stage_probs[1][0])
184
+ binary_code = stage_probs [1 ][0 ]
185
+ break
186
+
187
+ if self .save_stages :
188
+ binary_encoder .append (stage_probs )
189
+
190
+ return binary_code , binary_encoder
92
191
93
192
def decode (self , encoded_msg , msg_length , probability_table ):
94
193
"""
95
- Decodes a message.
194
+ Decodes a message from a floating-point number.
195
+
196
+ encoded_msg: The floating-point value that encodes the message.
197
+ msg_length: Length of the message.
198
+ probability_table: The probability table.
199
+
200
+ Returns the decoded message.
96
201
"""
97
202
98
203
decoder = []
@@ -122,3 +227,80 @@ def decode(self, encoded_msg, msg_length, probability_table):
122
227
decoder .append (last_stage_probs )
123
228
124
229
return decoded_msg , decoder
230
+
231
+ def float2bin (float_num , num_bits = None ):
232
+ """
233
+ Converts a floating-point number into binary.
234
+
235
+ float_num: The floating-point number.
236
+ num_bits: The number of bits expected in the result. If None, then the number of bits depends on the number.
237
+
238
+ Returns the binary representation of the number.
239
+ """
240
+
241
+ float_num = str (float_num )
242
+ if float_num .find ("." ) == - 1 :
243
+ # No decimals in the floating-point number.
244
+ integers = float_num
245
+ decimals = ""
246
+ else :
247
+ integers , decimals = float_num .split ("." )
248
+ decimals = "0." + decimals
249
+ decimals = Decimal (decimals )
250
+ integers = int (integers )
251
+
252
+ result = ""
253
+ num_used_bits = 0
254
+ while True :
255
+ mul = decimals * 2
256
+ int_part = int (mul )
257
+ result = result + str (int_part )
258
+ num_used_bits = num_used_bits + 1
259
+
260
+ decimals = mul - int (mul )
261
+ if type (num_bits ) is type (None ):
262
+ if decimals == 0 :
263
+ break
264
+ elif num_used_bits >= num_bits :
265
+ break
266
+ if type (num_bits ) is type (None ):
267
+ pass
268
+ elif len (result ) < num_bits :
269
+ num_remaining_bits = num_bits - len (result )
270
+ result = result + "0" * num_remaining_bits
271
+
272
+ integers_bin = bin (integers )[2 :]
273
+ result = str (integers_bin ) + "." + str (result )
274
+ return result
275
+
276
+ def bin2float (bin_num ):
277
+ """
278
+ Converts a binary number to a floating-point number.
279
+
280
+ bin_num: The binary number as a string.
281
+
282
+ Returns the floating-point representation.
283
+ """
284
+
285
+ if bin_num .find ("." ) == - 1 :
286
+ # No decimals in the binary number.
287
+ integers = bin_num
288
+ decimals = ""
289
+ else :
290
+ integers , decimals = bin_num .split ("." )
291
+ result = Decimal (0.0 )
292
+
293
+ # Working with integers.
294
+ for idx , bit in enumerate (integers ):
295
+ if bit == "0" :
296
+ continue
297
+ mul = 2 ** idx
298
+ result = result + Decimal (mul )
299
+
300
+ # Working with decimals.
301
+ for idx , bit in enumerate (decimals ):
302
+ if bit == "0" :
303
+ continue
304
+ mul = Decimal (1.0 )/ Decimal ((2 ** (idx + 1 )))
305
+ result = result + mul
306
+ return result
0 commit comments