MatrixAI
diff --git a/‎src/Generator.ts
+117-33 b/‎src/Generator.ts
+117-33
diff --git a/‎src/Parser.ts
+92 b/‎src/Parser.ts
+92
@@ -5,42 +5,67 @@ import * as errors from './errors';
 import * as utils from './utils';
 
 /**
- * The TAR headers follow this structure:
- * Start    Size    Description
- * ------------------------------
- * 0        100     File name (first 100 bytes)
- * 100      8       File mode (null-padded octal)
- * 108      8       Owner user id (null-padded octal)
- * 116      8       Owner group id (null-padded octal)
- * 124      12      File size in bytes (null-padded octal, 0 for directories)
- * 136      12      Mtime (null-padded octal)
- * 148      8       Checksum (fill with ASCII spaces for computation)
- * 156      1       Type flag ('0' for file, '5' for directory)
- * 157      100     Link name (null-terminated ASCII/UTF-8)
- * 257      6       'ustar\0' (magic string)
- * 263      2       '00' (ustar version)
- * 265      32      Owner user name (null-terminated ASCII/UTF-8)
- * 297      32      Owner group name (null-terminated ASCII/UTF-8)
- * 329      8       Device major (unset in this implementation)
- * 337      8       Device minor (unset in this implementation)
- * 345      155     File name (last 155 bytes, total 255 bytes, null-padded)
- * 500      12      '\0' (unused)
+ * The Generator can be used to generate blocks for a tar archive. The generator
+ * can create three kinds of headers: FILE, DIRECTORY, and EXTENDED. The file and
+ * directory is expected, but the extended header is able to store additional
+ * metadata that does not fit in the standard header.
+ * 
+ * This class can also be used to generate data chunks padded to 512 bytes. Note
+ * that the chunk size shouldn't exceed 512 bytes.
+ * 
+ * Note that the generator maintains an internal state and must be used for
+ * operations like generating data chunks, end chunks, or headers, otherwise an
+ * error will be thrown.
+ * 
+ * For reference, this is the structure of a tar header.
+ * 
+ * | Start  | Size | Description                                               |
+ * |--------|------|-----------------------------------------------------------|
+ * | 0      | 100  | File name (first 100 bytes)                               |
+ * | 100    | 8    | File mode (null-padded octal)                             |
+ * | 108    | 8    | Owner user ID (null-padded octal)                         |
+ * | 116    | 8    | Owner group ID (null-padded octal)                        |
+ * | 124    | 12   | File size in bytes (null-padded octal, 0 for directories) |
+ * | 136    | 12   | Mtime (null-padded octal)                                 |
+ * | 148    | 8    | Checksum (fill with ASCII spaces for computation)         |
+ * | 156    | 1    | Type flag ('0' for file, '5' for directory)               |
+ * | 157    | 100  | Link name (null-terminated ASCII/UTF-8)                   |
+ * | 257    | 6    | 'ustar\0' (magic string)                                  |
+ * | 263    | 2    | '00' (ustar version)                                      |
+ * | 265    | 32   | Owner user name (null-terminated ASCII/UTF-8)             |
+ * | 297    | 32   | Owner group name (null-terminated ASCII/UTF-8)            |
+ * | 329    | 8    | Device major (unset in this implementation)               |
+ * | 337    | 8    | Device minor (unset in this implementation)               |
+ * | 345    | 155  | File name (last 155 bytes, total 255 bytes, null-padded)  |
+ * | 500    | 12   | '\0' (unused)                                             |
  *
- * Note that all numbers are in stringified octal format.
+ * Note that all numbers are in stringified octal format, as opposed to the
+ * numbers used in the extended header, which are all in stringified decimal.
  *
  * The following data will be left blank (null):
  *  - Link name
- *  - Owner user name
- *  - Owner group name
  *  - Device major
  *  - Device minor
  *
- *  This is because this implementation does not interact with linked files.
- *  Owner user name and group name cannot be extracted via regular stat-ing,
- *  so it is left blank. In virtual situations, this field won't be useful
- *  anyways. The device major and minor are specific to linux kernel, which
- *  is not relevant to this virtual tar implementation. This is the reason
- *  these fields have been left blank.
+ * This is because this implementation does not interact with linked files.
+ * The device major and minor are specific to linux kernel, which is not
+ * relevant to this virtual tar implementation. This is the reason these fields
+ * have been left blank.
+ * 
+ * The data for extended headers is formatted slightly differently, with the
+ * general format following this structure.
+ *  <size> <key>=<value>\n
+ * 
+ * Here, the <size> stands for the byte length of the entire line (including the
+ * size number itself, the space, the equals, and the \n). Unlike in regular
+ * strings, the end marker for a key-value pair is the \n (newline) character.
+ * Moreover, unlike the USTAR header, the numbers are written in stringified
+ * decimal format.
+ * 
+ * The key can be any supported metadata key, and the value is binary data
+ * storing the actual value. These are the currently supported keys for
+ * the extended metadata:
+ *  - path (corresponding to file path if it is longer than 255 characters)
  */
 class Generator {
   protected state: GeneratorState = GeneratorState.HEADER;
@@ -85,6 +110,7 @@ class Generator {
       filePath = filePath.endsWith('/') ? filePath : filePath + '/';
     }
 
+    // Write the relevant sections in the header with the provided data
     utils.writeUstarMagic(header);
     utils.writeFileType(header, type);
     utils.writeFilePath(header, filePath);
@@ -103,10 +129,27 @@ class Generator {
     return header;
   }
 
+  /**
+   * Generates a file header based on the file path and the stat. Note that the
+   * stat must provide a size for the file, but all other fields are optional.
+   * If the file path is longer than 255 characters, then an error will be
+   * thrown. An extended header needs to be generated first, then the file path
+   * can be set to an empty string.
+   * 
+   * The content of the file must follow this header in separate chunks.
+   * 
+   * @param filePath the path of the file relative to the tar root
+   * @param stat the stats of the file
+   * @returns one 512-byte chunk corresponding to the header
+   * 
+   * @see {@link generateExtended} for generating headers with extended metadata
+   * @see {@link generateDirectory} for generating directory headers instead
+   * @see {@link generateData} for generating data chunks
+   */
   generateFile(filePath: string, stat: FileStat): Uint8Array {
     if (this.state === GeneratorState.HEADER) {
       // Make sure the size is valid
-      if (stat.size == null) {
+      if (stat.size == null || stat.size < 0) {
         throw new errors.ErrorVirtualTarGeneratorInvalidStat(
           'Files must have valid file sizes',
         );
@@ -130,6 +173,19 @@ class Generator {
     );
   }
 
+  /**
+   * Generates a directory header based on the file path and the stat. Note that
+   * the size is ignored and set to 0 for directories. If the file path is longer
+   * than 255 characters, then an error will be thrown. An extended header needs
+   * to be generated first, then the file path can be set to an empty string.
+   * 
+   * @param filePath the path of the file relative to the tar root
+   * @param stat the stats of the file
+   * @returns one 512-byte chunk corresponding to the header
+   * 
+   * @see {@link generateExtended} for generating headers with extended metadata
+   * @see {@link generateFile} for generating file headers instead
+   */
   generateDirectory(filePath: string, stat?: FileStat): Uint8Array {
     if (this.state === GeneratorState.HEADER) {
       // The size is zero for directories. Override this value in the stat if
@@ -147,6 +203,14 @@ class Generator {
     );
   }
 
+  /**
+   * Generates an extended metadata header based on the total size of the data
+   * following the header. If there is no need for extended metadata, then avoid
+   * using this, as it would just waste space.
+   * 
+   * @param size the size of the binary data block containing the metadata
+   * @returns one 512-byte chunk corresponding to the header
+   */
   generateExtended(size: number): Uint8Array {
     if (this.state === GeneratorState.HEADER) {
       this.state = GeneratorState.DATA;
@@ -160,6 +224,22 @@ class Generator {
     );
   }
 
+  /**
+   * Generates a data block. The input must be 512 bytes in size or smaller. The
+   * input data cannot be chunked smaller than 512 bytes. For example, if the
+   * file size is 1023 bytes, then you need to provide a 512-byte chunk first,
+   * then provide the remaining 511-byte chunk later. You can not chunk it up
+   * like sending over the first 100 bytes, then sending over the next 512.
+   * 
+   * This method is used to generate blocks for both a file and the exnteded
+   * header.
+   * 
+   * @param data a block of binary data (512-bytes at largest)
+   * @returns one 512-byte padded chunk corresponding to the data block
+   * 
+   * @see {@link generateExtended} for generating headers with extended metadata
+   * @see {@link generateFile} for generating file headers preceeding data block
+   */
   generateData(data: Uint8Array): Uint8Array {
     if (this.state === GeneratorState.DATA) {
       if (data.byteLength > constants.BLOCK_SIZE) {
@@ -198,9 +278,13 @@ class Generator {
     );
   }
 
-  // Creates a single null block. A null block is a block filled with all zeros.
-  // This is needed to end the archive, as two of these blocks mark the end of
-  // archive.
+  /**
+   * Generates a null chunk. Two invocations are needed to create a valid
+   * archive end marker. After two invocations, the generator state will be
+   * set to ENDED and no further data can be fed through the generator.
+   * 
+   * @returns one 512-byte null chunk
+   */
   generateEnd(): Uint8Array {
     switch (this.state) {
       case GeneratorState.HEADER:
 
@@ -4,6 +4,64 @@ import * as constants from './constants';
 import * as errors from './errors';
 import * as utils from './utils';
 
+/**
+ * The Parser is used to parse blocks from a tar archive. Each written chunk can
+ * return either a token or undefined. Undefined will only be returned when
+ * parsing the first null chunk which signifies that the archive has ended. The
+ * tokens can be either a header token corresponding to either a file, a
+ * directory, or an extended header, a data token returning the data, and an end
+ * token signifiying the ending of the archive.
+ * 
+ * For reference, this is the structure of a tar header.
+ * 
+ * | Start  | Size | Description                                               |
+ * |--------|------|-----------------------------------------------------------|
+ * | 0      | 100  | File name (first 100 bytes)                               |
+ * | 100    | 8    | File mode (null-padded octal)                             |
+ * | 108    | 8    | Owner user ID (null-padded octal)                         |
+ * | 116    | 8    | Owner group ID (null-padded octal)                        |
+ * | 124    | 12   | File size in bytes (null-padded octal, 0 for directories) |
+ * | 136    | 12   | Mtime (null-padded octal)                                 |
+ * | 148    | 8    | Checksum (fill with ASCII spaces for computation)         |
+ * | 156    | 1    | Type flag ('0' for file, '5' for directory)               |
+ * | 157    | 100  | Link name (null-terminated ASCII/UTF-8)                   |
+ * | 257    | 6    | 'ustar\0' (magic string)                                  |
+ * | 263    | 2    | '00' (ustar version)                                      |
+ * | 265    | 32   | Owner user name (null-terminated ASCII/UTF-8)             |
+ * | 297    | 32   | Owner group name (null-terminated ASCII/UTF-8)            |
+ * | 329    | 8    | Device major (unset in this implementation)               |
+ * | 337    | 8    | Device minor (unset in this implementation)               |
+ * | 345    | 155  | File name (last 155 bytes, total 255 bytes, null-padded)  |
+ * | 500    | 12   | '\0' (unused)                                             |
+ *
+ * Note that all numbers are in stringified octal format, as opposed to the
+ * numbers used in the extended header, which are all in stringified decimal.
+ *
+ * The following data will be left blank (null):
+ *  - Link name
+ *  - Device major
+ *  - Device minor
+ *
+ * This is because this implementation does not interact with linked files.
+ * The device major and minor are specific to linux kernel, which is not
+ * relevant to this virtual tar implementation. This is the reason these fields
+ * have been left blank.
+ * 
+ * The data for extended headers is formatted slightly differently, with the
+ * general format following this structure.
+ *  <size> <key>=<value>\n
+ * 
+ * Here, the <size> stands for the byte length of the entire line (including the
+ * size number itself, the space, the equals, and the \n). Unlike in regular
+ * strings, the end marker for a key-value pair is the \n (newline) character.
+ * Moreover, unlike the USTAR header, the numbers are written in stringified
+ * decimal format.
+ * 
+ * The key can be any supported metadata key, and the value is binary data
+ * storing the actual value. These are the currently supported keys for
+ * the extended metadata:
+ *  - path (corresponding to file path if it is longer than 255 characters)
+ */
 class Parser {
   protected state: ParserState = ParserState.HEADER;
   protected remainingBytes = 0;
@@ -67,6 +125,40 @@ class Parser {
     }
   }
 
+  /**
+   * Each chunk in a tar archive is exactly 512 bytes long. This chunk needs to
+   * be written to the parser, which will return a single token. This token can
+   * be one of a header token, a data token, an end token, or undefined. The
+   * undefined token is only returned when the chunk does not correspond to an
+   * actual token. For example, the first null chunk in the archive end marker
+   * will return an undefined. The second null chunk will return an end token.
+   * 
+   * The header token can return different types of headers. The three supported
+   * headers are FILE, DIRECTORY, and EXTENDED. Note that the file stat is
+   * returned with each header. It might contain default values if it was not
+   * set in the header. The default value for strings is '', for numbers is 0,
+   * and for dates is Date(0), which is 11:00 AM 1 January 1970.
+   * 
+   * Note that extended headers will not be automatically parsed. If some
+   * metadata was put into the extended header instead, then it will need to be
+   * parsed separately to get the information out, and the metadata field in the
+   * header will contain the default value for its type.
+   * 
+   * A data header is pretty simple, containing the bytes of the file. Note that
+   * this is not aligned to the 512-byte boundary. For example, if a file has
+   * 513 bytes of data, then the first chunk will return the 512 bytes of data,
+   * and the next data chunk will return 1 byte, removing the padding. The data
+   * token also has another field, `end`. This is a boolean which is true when
+   * the last chunk of data is being sent. The expected token after an ended
+   * data token is a header or an end token.
+   * 
+   * The end token signifies that the archive has ended. This sets the internal
+   * state to ENDED, and no further data can be written to it and attempts to
+   * write any additional data will throw an error.
+   * 
+   * @param data a single 512-byte chunk from the tar file
+   * @returns a parsed token, or undefined if no tokens can be returned
+   */
   write(data: Uint8Array): TokenHeader | TokenData | TokenEnd | undefined {
     if (data.byteLength !== constants.BLOCK_SIZE) {
       throw new errors.ErrorVirtualTarParserBlockSize(