Support of binary encoding

ahmedfgad · web-flow · commit bb65962a872b · 2021-02-20T14:10:47.000-05:00
diff --git a/README.md b/README.md
@@ -2,6 +2,8 @@
 
 This project implements the lossless data compression technique called **arithmetic encoding (AE)**. The project is simple and has just some basic features.
 
+The project supports encoding the input as both a floating-point value and a binary code.
+
 The project has a main module called `pyae.py` which contains a class called `ArithmeticEncoding` to encode and decode messages.
 
 # Usage Steps
@@ -12,7 +14,8 @@ To use the project, follow these steps:
 2. Instantiate the `ArithmeticEncoding` Class
 3. Prepare a Message
 4. Encode the Message
-5. Decode the Message
+5. Get the binary code of the encoded message.
+6. Decode the Message
 
 ## Import `pyae`
 
@@ -53,8 +56,17 @@ original_msg = "abc"
 Encode the message using the `encode()` method. It accepts the message to be encoded and the probability table. It returns the encoded message (single double value) and the encoder stages.
 
 ```python
-encoded_msg, encoder = AE.encode(msg=original_msg, 
-                                 probability_table=AE.probability_table)
+encoded_msg, encoder , interval_min_value, interval_max_value = AE.encode(msg=original_msg, 
+                                                                          probability_table=AE.probability_table)
+```
+
+## Get the Binary Code of the Encoded Message
+
+Convert the floating-point value returned from the `AE.encode()` function into a binary code using the `AE.encode_binary()` function.
+
+```python
+binary_code, encoder_binary = AE.encode_binary(float_interval_min=interval_min_value,
+                                               float_interval_max=interval_max_value)
 ```
 
 ## Decode the Message
@@ -95,6 +107,7 @@ The [`example.py`](/example.py) script has an example that compresses the messag
 import pyae
 
 # Example for encoding a simple text message using the PyAE module.
+# This example returns the floating-point value in addition to its binary code that encodes the message. 
 
 frequency_table = {"a": 2,
                    "b": 7,
@@ -106,16 +119,22 @@ AE = pyae.ArithmeticEncoding(frequency_table=frequency_table,
 original_msg = "abc"
 print("Original Message: {msg}".format(msg=original_msg))
 
-encoded_msg, encoder = AE.encode(msg=original_msg, 
-                                 probability_table=AE.probability_table)
+# Encode the message
+encoded_msg, encoder , interval_min_value, interval_max_value = AE.encode(msg=original_msg, 
+                                                                          probability_table=AE.probability_table)
 print("Encoded Message: {msg}".format(msg=encoded_msg))
 
+# Get the binary code out of the floating-point value
+binary_code, encoder_binary = AE.encode_binary(float_interval_min=interval_min_value,
+                                               float_interval_max=interval_max_value)
+print("The binary code is: {binary_code}".format(binary_code=binary_code))
+
+# Decode the message
 decoded_msg, decoder = AE.decode(encoded_msg=encoded_msg, 
                                  msg_length=len(original_msg),
                                  probability_table=AE.probability_table)
-print("Decoded Message: {msg}".format(msg=decoded_msg))
-
 decoded_msg = "".join(decoded_msg)
+print("Decoded Message: {msg}".format(msg=decoded_msg))
 print("Message Decoded Successfully? {result}".format(result=original_msg == decoded_msg))
 ```
 
@@ -124,6 +143,7 @@ The printed messages out of the code are:
 ```
 Original Message: abc
 Encoded Message: 0.1729999999999999989175325511
+The binary code is: 0.0010110
 Decoded Message: abc
 Message Decoded Successfully? True
 ```
@@ -161,6 +181,22 @@ print(encoder)
    Decimal('0.5599999999999999349409307570')]}]
 ```
 
+Here is the binary encoder:
+
+```python
+print(encoder_binary)
+```
+
+```python
+[{0: ['0.0', '0.1'], 1: ['0.1', '1.0']},
+ {0: ['0.00', '0.01'], 1: ['0.01', '0.1']},
+ {0: ['0.000', '0.001'], 1: ['0.001', '0.01']},
+ {0: ['0.0010', '0.0011'], 1: ['0.0011', '0.01']},
+ {0: ['0.00100', '0.00101'], 1: ['0.00101', '0.0011']},
+ {0: ['0.001010', '0.001011'], 1: ['0.001011', '0.0011']},
+ {0: ['0.0010110', '0.0010111'], 1: ['0.0010111', '0.0011']}]
+```
+
 ## Low Precision
 
 Assume the message to be encoded is `"abc"*20` (i.e. `abc` repeated 20 times) while using the default precision 28. The length of the message is 60.
@@ -184,16 +220,15 @@ AE = pyae.ArithmeticEncoding(frequency_table=frequency_table,
 original_msg = "abc"*20
 print("Original Message: {msg}".format(msg=original_msg))
 
-encoded_msg, encoder = AE.encode(msg=original_msg, 
-                                 probability_table=AE.probability_table)
+encoded_msg, encoder , interval_min_value, interval_max_value = AE.encode(msg=original_msg, 
+                                                                          probability_table=AE.probability_table)
 print("Encoded Message: {msg}".format(msg=encoded_msg))
 
 decoded_msg, decoder = AE.decode(encoded_msg=encoded_msg, 
                                  msg_length=len(original_msg),
                                  probability_table=AE.probability_table)
-print("Decoded Message: {msg}".format(msg=decoded_msg))
-
 decoded_msg = "".join(decoded_msg)
+print("Decoded Message: {msg}".format(msg=decoded_msg))
 print("Message Decoded Successfully? {result}".format(result=original_msg == decoded_msg))
 ```
 
@@ -232,16 +267,15 @@ AE = pyae.ArithmeticEncoding(frequency_table=frequency_table,
 original_msg = "abc"*20
 print("Original Message: {msg}".format(msg=original_msg))
 
-encoded_msg, encoder = AE.encode(msg=original_msg, 
-                                 probability_table=AE.probability_table)
+encoded_msg, encoder , interval_min_value, interval_max_value = AE.encode(msg=original_msg, 
+                                                                          probability_table=AE.probability_table)
 print("Encoded Message: {msg}".format(msg=encoded_msg))
 
 decoded_msg, decoder = AE.decode(encoded_msg=encoded_msg, 
                                  msg_length=len(original_msg),
                                  probability_table=AE.probability_table)
-print("Decoded Message: {msg}".format(msg=decoded_msg))
-
 decoded_msg = "".join(decoded_msg)
+print("Decoded Message: {msg}".format(msg=decoded_msg))
 print("Message Decoded Successfully? {result}".format(result=original_msg == decoded_msg))
 ```
 
diff --git a/example.py b/example.py
@@ -1,6 +1,8 @@
 import pyae
 
 # Example for encoding a simple text message using the PyAE module.
+# This example only returns the floating-point value that encodes the message. 
+# Check the example_binary.py to return the binary code of the floating-point value.
 
 frequency_table = {"a": 2,
                    "b": 7,
@@ -12,8 +14,8 @@
 original_msg = "abc"
 print("Original Message: {msg}".format(msg=original_msg))
 
-encoded_msg, encoder = AE.encode(msg=original_msg, 
-                                 probability_table=AE.probability_table)
+encoded_msg, encoder , interval_min_value, interval_max_value = AE.encode(msg=original_msg, 
+                                                                          probability_table=AE.probability_table)
 print("Encoded Message: {msg}".format(msg=encoded_msg))
 
 decoded_msg, decoder = AE.decode(encoded_msg=encoded_msg, 
diff --git a/example_binary.py b/example_binary.py
@@ -0,0 +1,33 @@
+import pyae
+
+# Example for encoding a simple text message using the PyAE module.
+# This example returns the floating-point value in addition to its binary code that encodes the message. 
+
+frequency_table = {"a": 2,
+                   "b": 7,
+                   "c": 1}
+
+AE = pyae.ArithmeticEncoding(frequency_table=frequency_table,
+                            save_stages=True)
+
+original_msg = "abc"
+print("Original Message: {msg}".format(msg=original_msg))
+
+# Encode the message
+encoded_msg, encoder , interval_min_value, interval_max_value = AE.encode(msg=original_msg, 
+                                                                          probability_table=AE.probability_table)
+print("Encoded Message: {msg}".format(msg=encoded_msg))
+
+# Get the binary code out of the floating-point value
+binary_code, encoder_binary = AE.encode_binary(float_interval_min=interval_min_value,
+                                               float_interval_max=interval_max_value)
+print("The binary code is: {binary_code}".format(binary_code=binary_code))
+
+# Decode the message
+decoded_msg, decoder = AE.decode(encoded_msg=encoded_msg, 
+                                 msg_length=len(original_msg),
+                                 probability_table=AE.probability_table)
+decoded_msg = "".join(decoded_msg)
+print("Decoded Message: {msg}".format(msg=decoded_msg))
+
+print("Message Decoded Successfully? {result}".format(result=original_msg == decoded_msg))
diff --git a/example_image.py b/example_image.py
@@ -3,15 +3,19 @@
 import numpy
 import matplotlib.pyplot
 
+# Example for encoding an image using the PyAE module.
+# This example only returns the floating-point value that encodes the image. 
+# Check the example_image_binary.py to return the binary code of the floating-point value.
+
 # Change the precision to a bigger value
 from decimal import getcontext
-getcontext().prec = 10000
+getcontext().prec = 444
 
 # Read an image.
 im = scipy.misc.face(gray=True)
 
 # Just work on a small part to save time. The larger the image, the more time consumed.
-im = im[:50, :50]
+im = im[:15, :15]
 
 # Convert the image into a 1D vector.
 msg = im.flatten()
@@ -25,13 +29,13 @@
 AE = pyae.ArithmeticEncoding(frequency_table=frequency_table)
 
 # Encode the message
-encoded_msg, _ = AE.encode(msg=msg, 
-                        probability_table=AE.probability_table)
+encoded_msg, encoder, interval_min_value, interval_max_value = AE.encode(msg=msg, 
+                                                                         probability_table=AE.probability_table)
 
 # Decode the message
-decoded_msg, _ = AE.decode(encoded_msg=encoded_msg, 
-                        msg_length=len(msg),
-                        probability_table=AE.probability_table)
+decoded_msg, decoder = AE.decode(encoded_msg=encoded_msg, 
+                                 msg_length=len(msg),
+                                 probability_table=AE.probability_table)
 
 # Reshape the image to its original shape.
 decoded_msg = numpy.reshape(decoded_msg, im.shape)
diff --git a/example_image_binary.py b/example_image_binary.py
@@ -0,0 +1,56 @@
+import scipy.misc
+import pyae
+import numpy
+import matplotlib.pyplot
+
+# Example for encoding an image using the PyAE module.
+# This example returns the floating-point value in addition to its binary code that encodes the image. 
+
+# Change the precision to a bigger value
+from decimal import getcontext
+getcontext().prec = 444
+
+# Read an image.
+im = scipy.misc.face(gray=True)
+
+# Just work on a small part to save time. The larger the image, the more time consumed.
+im = im[:15, :15]
+
+# Convert the image into a 1D vector.
+msg = im.flatten()
+
+# Create the frequency table based on its hitogram.
+hist, bin_edges = numpy.histogram(a=im,
+                                  bins=range(0, 257))
+frequency_table = {key: value for key, value in zip(bin_edges[0:256], hist)}
+
+# Create an instance of the ArithmeticEncoding class.
+AE = pyae.ArithmeticEncoding(frequency_table=frequency_table, save_stages=True)
+
+# Encode the message
+encoded_msg, encoder, interval_min_value, interval_max_value = AE.encode(msg=msg, 
+                                                                         probability_table=AE.probability_table)
+
+# Get the binary code that encodes the image
+binary_code, encoder_binary = AE.encode_binary(float_interval_min=interval_min_value,
+                                               float_interval_max=interval_max_value)
+print("The binary code is: {binary_code}".format(binary_code=binary_code))
+
+# Decode the message
+decoded_msg, decoder = AE.decode(encoded_msg=encoded_msg, 
+                                 msg_length=len(msg),
+                                 probability_table=AE.probability_table)
+
+# Reshape the image to its original shape.
+decoded_msg = numpy.reshape(decoded_msg, im.shape)
+
+# Show the original and decoded images.
+fig, ax = matplotlib.pyplot.subplots(1, 2)
+ax[0].imshow(im, cmap="gray")
+ax[0].set_title("Original Image")
+ax[0].set_xticks([])
+ax[0].set_yticks([])
+ax[1].imshow(decoded_msg, cmap="gray")
+ax[1].set_title("Reconstructed Image")
+ax[1].set_xticks([])
+ax[1].set_yticks([])
diff --git a/pyae.py b/pyae.py

-Original file line number
+Diff line change
 -from decimal import Decimal # Used to offer any user-defined precision.
 +from decimal import Decimal
 class ArithmeticEncoding:
     """
     def get_probability_table(self, frequency_table):
         """
         Calculates the probability table out of the frequency table.
++
 +        frequency_table: A table of the term frequencies.
++
 +        Returns the probability table.
         """
         total_frequency = sum(list(frequency_table.values()))
     def get_encoded_value(self, last_stage_probs):
         """
         After encoding the entire message, this method returns the single value that represents the entire message.
++
 +        last_stage_probs: A list of the probabilities in the last stage.
++
 +        Returns the minimum and maximum probabilites in the last stage in addition to the value encoding the message.
         """
         last_stage_probs = list(last_stage_probs.values())
         last_stage_values = []
         last_stage_min = min(last_stage_values)
         last_stage_max = max(last_stage_values)
 +        encoded_value = (last_stage_min + last_stage_max)/2
 -        return (last_stage_min + last_stage_max)/2
 +        return last_stage_min, last_stage_max, encoded_value
     def process_stage(self, probability_table, stage_min, stage_max):
         """
         Processing a stage in the encoding/decoding process.
++
 +        probability_table: The probability table.
 +        stage_min: The minumim probability of the current stage.
 +        stage_max: The maximum probability of the current stage.
++
 +        Returns the probabilities in the stage.
         """
++
         stage_probs = {}
         stage_domain = stage_max - stage_min
         for term_idx in range(len(probability_table.items())):
     def encode(self, msg, probability_table):
         """
 -        Encodes a message.
 +        Encodes a message using arithmetic encoding.
++
 +        msg: The message to be encoded.
 +        probability_table: The probability table.
++
 +        Returns the encoder, the floating-point value representing the encoded message, and the maximum and minimum values of the interval in which the floating-point value falls.
         """
 -        # Make sure
         msg = list(msg)
         encoder = []
         if self.save_stages:
             encoder.append(last_stage_probs)
 -        encoded_msg = self.get_encoded_value(last_stage_probs)
 +        interval_min_value, interval_max_value, encoded_msg = self.get_encoded_value(last_stage_probs)
++
 +        return encoded_msg, encoder, interval_min_value, interval_max_value
++
 +    def process_stage_binary(self, float_interval_min, float_interval_max, stage_min_bin, stage_max_bin):
 +        """
 +        Processing a stage in the encoding/decoding process.
++
 +        float_interval_min: The minimum floating-point value in the interval in which the floating-point value that encodes the message is located.
 +        float_interval_max: The maximum floating-point value in the interval in which the floating-point value that encodes the message is located.
 +        stage_min_bin: The minimum binary number in the current stage.
 +        stage_max_bin: The maximum binary number in the current stage.
++
 +        Returns the probabilities of the terms in this stage. There are only 2 terms.
 +        """
++
 +        stage_mid_bin = stage_min_bin + "1"
 +        stage_min_bin = stage_min_bin + "0"
++
 +        stage_probs = {}
 +        stage_probs[0] = [stage_min_bin, stage_mid_bin]
 +        stage_probs[1] = [stage_mid_bin, stage_max_bin]
++
 +        return stage_probs
++
 +    def encode_binary(self, float_interval_min, float_interval_max):
 +        """
 +        Calculates the binary code that represents the floating-point value that encodes the message.
++
 +        float_interval_min: The minimum floating-point value in the interval in which the floating-point value that encodes the message is located.
 +        float_interval_max: The maximum floating-point value in the interval in which the floating-point value that encodes the message is located.
++
 +        Returns the binary code representing the encoded message.
 +        """
++
 +        binary_encoder = []
 +        binary_code = None
++
 +        stage_min_bin = "0.0"
 +        stage_max_bin = "1.0"
 -        return encoded_msg, encoder
 +        stage_probs = {}
 +        stage_probs[0] = [stage_min_bin, "0.1"]
 +        stage_probs[1] = ["0.1", stage_max_bin]
++
 +        while True:
 +            if float_interval_max < bin2float(stage_probs[0][1]):
 +                stage_min_bin = stage_probs[0][0]
 +                stage_max_bin = stage_probs[0][1]
 +            else:
 +                stage_min_bin = stage_probs[1][0]
 +                stage_max_bin = stage_probs[1][1]
++
 +            if self.save_stages:
 +                binary_encoder.append(stage_probs)
++
 +            stage_probs = self.process_stage_binary(float_interval_min,
 +                                                    float_interval_max,
 +                                                    stage_min_bin,
 +                                                    stage_max_bin)
++
 +            # print(stage_probs[0][0], bin2float(stage_probs[0][0]))
 +            # print(stage_probs[0][1], bin2float(stage_probs[0][1]))
 +            if (bin2float(stage_probs[0][0]) >= float_interval_min) and (bin2float(stage_probs[0][1]) < float_interval_max):
 +                # The binary code is found.
 +                # print(stage_probs[0][0], bin2float(stage_probs[0][0]))
 +                # print(stage_probs[0][1], bin2float(stage_probs[0][1]))
 +                # print("The binary code is : ", stage_probs[0][0])
 +                binary_code = stage_probs[0][0]
 +                break
 +            elif (bin2float(stage_probs[1][0]) >= float_interval_min) and (bin2float(stage_probs[1][1]) < float_interval_max):
 +                # The binary code is found.
 +                # print(stage_probs[1][0], bin2float(stage_probs[1][0]))
 +                # print(stage_probs[1][1], bin2float(stage_probs[1][1]))
 +                # print("The binary code is : ", stage_probs[1][0])
 +                binary_code = stage_probs[1][0]
 +                break
++
 +        if self.save_stages:
 +            binary_encoder.append(stage_probs)
++
 +        return binary_code, binary_encoder
     def decode(self, encoded_msg, msg_length, probability_table):
         """
 -        Decodes a message.
 +        Decodes a message from a floating-point number.
++
 +        encoded_msg: The floating-point value that encodes the message.
 +        msg_length: Length of the message.
 +        probability_table: The probability table.
++
 +        Returns the decoded message.
         """
         decoder = []
             decoder.append(last_stage_probs)
         return decoded_msg, decoder
++
 +def float2bin(float_num, num_bits=None):
 +    """
 +    Converts a floating-point number into binary.
++
 +    float_num: The floating-point number.
 +    num_bits: The number of bits expected in the result. If None, then the number of bits depends on the number.
++
 +    Returns the binary representation of the number.
 +    """
++
 +    float_num = str(float_num)
 +    if float_num.find(".") == -1:
 +        # No decimals in the floating-point number.
 +        integers = float_num
 +        decimals = ""
 +    else:
 +        integers, decimals = float_num.split(".")
 +    decimals = "0." + decimals
 +    decimals = Decimal(decimals)
 +    integers = int(integers)
++
 +    result = ""
 +    num_used_bits = 0
 +    while True:
 +        mul = decimals * 2
 +        int_part = int(mul)
 +        result = result + str(int_part)
 +        num_used_bits = num_used_bits + 1
++
 +        decimals = mul - int(mul)
 +        if type(num_bits) is type(None):
 +            if decimals == 0:
 +                break
 +        elif num_used_bits >= num_bits:
 +            break
 +    if type(num_bits) is type(None):
 +        pass
 +    elif len(result) < num_bits:
 +        num_remaining_bits = num_bits - len(result)
 +        result = result + "0"*num_remaining_bits
++
 +    integers_bin = bin(integers)[2:]
 +    result = str(integers_bin) + "." + str(result)
 +    return result
++
 +def bin2float(bin_num):
 +    """
 +    Converts a binary number to a floating-point number.
++
 +    bin_num: The binary number as a string.
++
 +    Returns the floating-point representation.
 +    """
++
 +    if bin_num.find(".") == -1:
 +        # No decimals in the binary number.
 +        integers = bin_num
 +        decimals = ""
 +    else:
 +        integers, decimals = bin_num.split(".")
 +    result = Decimal(0.0)
++
 +    # Working with integers.
 +    for idx, bit in enumerate(integers):
 +        if bit == "0":
 +            continue
 +        mul = 2**idx
 +        result = result + Decimal(mul)
++
 +    # Working with decimals.
 +    for idx, bit in enumerate(decimals):
 +        if bit == "0":
 +            continue
 +        mul = Decimal(1.0)/Decimal((2**(idx+1)))
 +        result = result + mul
 +    return result