Update requirements

cseas · cseas · commit 9400d947429c · 2018-11-24T18:00:57.000+05:30
diff --git a/README.md b/README.md
@@ -1,6 +1,28 @@
 # ocr-table
 This project aims to extract tables from scanned image PDFs using Optical Character Recognition.
 
+# Install Requirements
+
+1. Tesseract OCR
+	```sh
+	sudo apt-get install tesseract-ocr
+	```
+
+2. Imagemagick
+	```sh
+	sudo apt-get install imagemagick
+	```
+
+3. PDF Utilities
+	```sh
+	sudo apt-get install poppler-utils
+	```
+
+4. Python packages
+	```sh
+	sudo pip install requirements.txt
+	```
+
 # Usage
 
 1. Clear the [pdf/](pdf) folder and copy all your pdf files to be scanned in it.
diff --git a/extract_text.sh b/extract_text.sh
@@ -21,7 +21,7 @@ for FILEPATH in $BPATH*.pdf; do
     OUTFILE=$OPATH$(basename $FILEPATH).txt
     touch "$OUTFILE"    # The text file will be created regardless of whether
                         #  text is successfully extracted.
-    # First attempt ot use pdftotext to extract embedded text.
+    # First attempt to use pdftotext to extract embedded text.
     echo -n "Attempting pdftotext extraction..."
     pdftotext "$FILEPATH" "$OUTFILE"
     FILESIZE=$(wc -w < "$OUTFILE")