diff --git a/AFINN-111.json b/AFINN-111.json index 6b0e10c..fbb5f2e 100644 --- a/AFINN-111.json +++ b/AFINN-111.json @@ -506,7 +506,7 @@ "courageous": 2, "courteous": 2, "courtesy": 2, - "cover-up": -3, + "cover up": -3, "coward": -2, "cowardly": -2, "coziness": 2, @@ -761,8 +761,9 @@ "dodging": -2, "dodgy": -2, "does not work": -3, + "doesn't work": -3, "dolorous": -2, - "dont like": -2, + "don't like": -2, "doom": -2, "doomed": -2, "doubt": -1, @@ -1487,7 +1488,7 @@ "lackadaisical": -2, "lagged": -2, "lack": -2, - "made-up": -1, + "made up": -1, "madly": -3, "madness": -3, "mandatory": -1, @@ -1587,7 +1588,7 @@ "merry": 3, "naive": -2, "natural": 1, - "naïve": -2, + "naïve": -2, "needy": -2, "negative": -2, "negativity": -2, @@ -1634,7 +1635,7 @@ "offline": -1, "oks": 2, "ominous": 3, - "once-in-a-lifetime": 3, + "once in a lifetime": 3, "opportunities": 2, "opportunity": 2, "oppressed": -2, @@ -1987,8 +1988,8 @@ "sedition": -2, "seditious": -2, "seduced": -1, - "self-confident": 2, - "self-deluded": -2, + "self confident": 2, + "self deluded": -2, "selfish": -3, "selfishness": -3, "sentence": -2, @@ -2014,8 +2015,8 @@ "shocking": -2, "shocks": -2, "shoot": -1, - "short-sighted": -2, - "short-sightedness": -2, + "short sighted": -2, + "short sightedness": -2, "shortage": -2, "shortages": -2, "shrew": -4, @@ -2074,7 +2075,7 @@ "solving": 1, "somber": -2, "some kind": 0, - "son-of-a-bitch": -5, + "son of a bitch": -5, "soothe": 3, "soothed": 3, "soothing": 3, @@ -2477,4 +2478,4 @@ "zealot": -2, "zealots": -2, "zealous": 2 -} \ No newline at end of file +} diff --git a/README.md b/README.md index 4c47af3..fd42093 100644 --- a/README.md +++ b/README.md @@ -10,18 +10,43 @@ ## Usage ```javascript -var sentimentAnalysis = require('sentiment-analysis'); - -sentimentAnalysis('Dinosaurs are awesome!'); // +0.4 -sentimentAnalysis('Everything is stupid'); // -0.2 -sentimentAnalysis('Windows is very unstable'); // -0.2 -sentimentAnalysis('London is gloomy today because of all the smog'); // -0.4 -sentimentAnalysis('I am so grateful for all the presents, thank you!'); // +0.5 -sentimentAnalysis('Really enjoying the warm weather'); // +0.3 -sentimentAnalysis('It was a catastrophic disaster'); // -0.6 +var sentimentAnalysisLib = require('sentiment-analysis'); +var sentimentAnalysis = new sentimentAnalysisLib(); + +sentimentAnalysis.analyseSentence('Dinosaurs are awesome!'); // +0.4 +sentimentAnalysis.analyseSentence('Everything is stupid'); // -0.2 +sentimentAnalysis.analyseSentence('Windows is very unstable'); // -0.2 +sentimentAnalysis.analyseSentence('London is gloomy today because of all the smog'); // -0.4 +sentimentAnalysis.analyseSentence('I am so grateful for all the presents, thank you!'); // +0.5 +sentimentAnalysis.analyseSentence('Really enjoying the warm weather'); // +0.3 +sentimentAnalysis.analyseSentence('It was a catastrophic disaster'); // -0.6 ``` sentiment-analysis will return a score between -1 and +1, where negative numbers represent a negative overall sentiment. +### Options + +*Custom file* +```javascript +var sentimentAnalysisLib = require('sentiment-analysis'); +var sentimentAnalysis = new sentimentAnalysisLib({ + customWordsFile : 'myCustomAfinn.json' +}); + +sentimentAnalysis.analyseSentence('Dinosaurs are awesome!'); +``` + +*Custom words* +```javascript +var sentimentAnalysisLib = require('sentiment-analysis'); +var sentimentAnalysis = new sentimentAnalysisLib({ + customWords : { + dinosaurs : -4 + } +}); + +sentimentAnalysis.analyseSentence('Dinosaurs are awesome!'); // 0 +``` + ## Testing `npm test` @@ -32,7 +57,7 @@ See unit test, integration testing results on [Travis CI] See the `gulpfile.js` for documentation of build process. ## License -MIT © [Alicia Sykes](http://aliciasykes.com) +MIT � [Alicia Sykes](http://aliciasykes.com), [Maurizio Carboni](https://www.linkedin.com/in/mauriziocarboni) [AFINN-111]: -[Travis CI]: \ No newline at end of file +[Travis CI]: diff --git a/gulpfile.js b/gulpfile.js index edd62d7..a9ab871 100644 --- a/gulpfile.js +++ b/gulpfile.js @@ -18,7 +18,8 @@ var watch = require('gulp-watch'); require('coffee-script/register'); var footerTxt = "\/* (C) Alicia Sykes 2015 " + - "*\\\r\n\\* MIT License. Read full license at: https:\/\/goo.gl\/IL4lQJ *\/"; + "*\\\r\n|* (C) Maurizio Carboni 2016 " + + "*|\r\n\\* MIT License. Read full license at: https:\/\/goo.gl\/IL4lQJ *\/"; /* Delete the files currently in finished directory */ gulp.task('clean', function () { @@ -37,7 +38,7 @@ gulp.task('build', ['clean'], function(){ }); /* Run unit tests and generate coverage report */ -gulp.task('test', function (cb) { +gulp.task('test', ['build'], function (cb) { gulp.src(['./index.js']) .pipe(istanbul()) .pipe(istanbul.hookRequire()) @@ -62,4 +63,4 @@ gulp.task('test-after-build',['build'],function(){ }); /* Defualt gulp task, deletes old files, compiles source files and runs tests */ -gulp.task('default', ['test-after-build', 'watch']); \ No newline at end of file +gulp.task('default', ['test-after-build', 'watch']); diff --git a/index.coffee b/index.coffee index 4dc542a..7b5a9a0 100644 --- a/index.coffee +++ b/index.coffee @@ -1,58 +1,86 @@ +sentimentAnalysisPath = __dirname -afinnWordList = require __dirname + '/AFINN-111.json' # Get the AFINN-111 list - -# Returns a boolean true if given word is found in word list -doesWordExist = (word)-> - if word of afinnWordList then true else false - -# Returns an integer value + or - sentiment score for given word -getScoreOfWord = (word)-> - if afinnWordList[word] then afinnWordList[word] else 0 - -# Formats sentence and returns a lowercase a-z array of words -getWordsInSentence = (sentence)-> - sentence = if sentence? then sentence else '' # Double check is defined - sentence = if typeof sentence == 'string' then sentence else '' - sentence = sentence.toLowerCase() - sentence = sentence.replace(/(?:https?|ftp):\/\/[\n\S]+/g, '') # Remove URLs - sentence = sentence.replace(/[^\w\s]/gi, '') # Remove special characters - sentence = sentence.split(' ') # Split into an array - sentence = sentence.filter((n) -> n != '') # Remove blanks - sentence = removeDuplicates(sentence) - -# Remove Duplicates -removeDuplicates = (arr) -> - if arr.length == 0 - return [] - res = {} - res[arr[key]] = arr[key] for key in [0..arr.length-1] - value for key, value of res - -# Ensure score is in a valid range between -1 to +1 -scaleScore = (score)-> - score = if score > 10 then 10 else score - score = if score < -10 then -10 else score - score/10 - -# Returns an overall sentiment score for sentence -analyseSentence = (sentence) -> - score = 0 - wordsArr = getWordsInSentence(sentence) - for word in wordsArr - if doesWordExist(word) - score += getScoreOfWord(word) - scaleScore(score) - -module.exports = analyseSentence # Export main method as module - - -# If we're developing/ testing then export the private methods too -if process.env.NODE_ENV == 'test' - module.exports = - main: analyseSentence - _private: - scaleScore: scaleScore - doesWordExist: doesWordExist - getScoreOfWord: getScoreOfWord - removeDuplicates: removeDuplicates - getWordsInSentence: getWordsInSentence +module.exports = class Analizer + constructor: ( opts ) -> + # Include inside the class the wordlist + @afinnWordList = require(sentimentAnalysisPath + '/AFINN-111.json') + if opts + # If using a custom file, load it + if opts.customWordsFile + opts.customWords = require(opts.customWordsFile) + # if using custom words + # ( in case of custom file, this words are loaded from the file ) + if opts.customWords + # Overwrite existing words + for i,word of opts.customWords + @afinnWordList[i] = word + @afinnPhrases = [] + @afinnPhrasesCamel = [] + for word,value of @afinnWordList + wordA = @constructor.transformPlainApostrophe(word) + if ( wordA != word ) + @afinnWordList[wordA] = value + delete @afinnWordList[word] + word = wordA + if @constructor.isPhrase(word) + compressedWord = @constructor.compressPhrase(word) + @afinnPhrases.push word + @afinnPhrasesCamel.push compressedWord + @afinnWordList[compressedWord] = value + + # Returns an overall sentiment score for sentence + analyseSentence: ( sentence ) -> + score = 0 + wordsArr = @getWordsInSentence(sentence) + for word in wordsArr + if @doesWordExist(word) + score += @getScoreOfWord(word) + @constructor.scaleScore(score) + + # Returns a boolean true if given word is found in word list + doesWordExist: ( word ) -> + word of @afinnWordList + + # Returns an integer value + or - sentiment score for given word + getScoreOfWord: ( word ) -> + @afinnWordList[word] || 0 + + + # Formats sentence and returns a lowercase a-z array of words + getWordsInSentence: ( sentence ) -> + sentence = sentence || '' # Double check is defined + sentence = if typeof sentence == 'string'then sentence.toLowerCase() else '' + sentence = sentence.replace(/(?:https?|ftp):\/\/[\n\S]+/g, '') # Remove URLs + sentence = sentence.replace(/[\n\r\t]/gi, ' ') # Transform \n and \t + sentence = @constructor.transformPlainApostrophe(sentence) # Transform ' + sentence = sentence.replace(/[^\w\s']/gi, '') # Remove special characters + # Replace phrases with the camelized version + for word,i in @afinnPhrases + sentence = sentence.replace( new RegExp(word,'g'), @afinnPhrasesCamel[i]) + sentence = sentence.split(' ') # Split into an array + sentence = sentence.filter((n) -> n != '') # Remove blanks + @constructor.removeDuplicates(sentence) + + # Remove Duplicates + @removeDuplicates: ( arr ) -> + if arr.length == 0 + return [] + res = {} + res[arr[key]] = arr[key] for key in [0..arr.length-1] + value for key, value of res + + # Ensure score is in a valid range between -1 to +1 + @scaleScore: ( score ) -> + score = if score > 10 then 10 else score + score = if score < -10 then -10 else score + score/10 + + @isPhrase: ( word ) -> + word.indexOf(' ') != -1 + + @compressPhrase: ( phrase ) -> + (w[0].toUpperCase() + w[1..-1].toLowerCase() for w in phrase.split /\s+/) + .join '' + + @transformPlainApostrophe: ( phrase ) -> + phrase.replace(/[`"]/gi, '\'') diff --git a/index.js b/index.js index fd0d23b..527adc5 100644 --- a/index.js +++ b/index.js @@ -1,88 +1,135 @@ (function() { - var afinnWordList, analyseSentence, doesWordExist, getScoreOfWord, getWordsInSentence, removeDuplicates, scaleScore; + var Analizer, sentimentAnalysisPath; - afinnWordList = require(__dirname + '/AFINN-111.json'); + sentimentAnalysisPath = __dirname; - doesWordExist = function(word) { - if (word in afinnWordList) { - return true; - } else { - return false; + module.exports = Analizer = (function() { + function Analizer(opts) { + var compressedWord, i, ref, ref1, value, word, wordA; + this.afinnWordList = require(sentimentAnalysisPath + '/AFINN-111.json'); + if (opts) { + if (opts.customWordsFile) { + opts.customWords = require(opts.customWordsFile); + } + if (opts.customWords) { + ref = opts.customWords; + for (i in ref) { + word = ref[i]; + this.afinnWordList[i] = word; + } + } + } + this.afinnPhrases = []; + this.afinnPhrasesCamel = []; + ref1 = this.afinnWordList; + for (word in ref1) { + value = ref1[word]; + wordA = this.constructor.transformPlainApostrophe(word); + if (wordA !== word) { + this.afinnWordList[wordA] = value; + delete this.afinnWordList[word]; + word = wordA; + } + if (this.constructor.isPhrase(word)) { + compressedWord = this.constructor.compressPhrase(word); + this.afinnPhrases.push(word); + this.afinnPhrasesCamel.push(compressedWord); + this.afinnWordList[compressedWord] = value; + } + } } - }; - getScoreOfWord = function(word) { - if (afinnWordList[word]) { - return afinnWordList[word]; - } else { - return 0; - } - }; + Analizer.prototype.analyseSentence = function(sentence) { + var j, len, score, word, wordsArr; + score = 0; + wordsArr = this.getWordsInSentence(sentence); + for (j = 0, len = wordsArr.length; j < len; j++) { + word = wordsArr[j]; + if (this.doesWordExist(word)) { + score += this.getScoreOfWord(word); + } + } + return this.constructor.scaleScore(score); + }; - getWordsInSentence = function(sentence) { - sentence = sentence != null ? sentence : ''; - sentence = typeof sentence === 'string' ? sentence : ''; - sentence = sentence.toLowerCase(); - sentence = sentence.replace(/(?:https?|ftp):\/\/[\n\S]+/g, ''); - sentence = sentence.replace(/[^\w\s]/gi, ''); - sentence = sentence.split(' '); - sentence = sentence.filter(function(n) { - return n !== ''; - }); - return sentence = removeDuplicates(sentence); - }; + Analizer.prototype.doesWordExist = function(word) { + return word in this.afinnWordList; + }; - removeDuplicates = function(arr) { - var i, key, ref, res, results, value; - if (arr.length === 0) { - return []; - } - res = {}; - for (key = i = 0, ref = arr.length - 1; 0 <= ref ? i <= ref : i >= ref; key = 0 <= ref ? ++i : --i) { - res[arr[key]] = arr[key]; - } - results = []; - for (key in res) { - value = res[key]; - results.push(value); - } - return results; - }; + Analizer.prototype.getScoreOfWord = function(word) { + return this.afinnWordList[word] || 0; + }; - scaleScore = function(score) { - score = score > 10 ? 10 : score; - score = score < -10 ? -10 : score; - return score / 10; - }; + Analizer.prototype.getWordsInSentence = function(sentence) { + var i, j, len, ref, word; + sentence = sentence || ''; + sentence = typeof sentence === 'string' ? sentence.toLowerCase() : ''; + sentence = sentence.replace(/(?:https?|ftp):\/\/[\n\S]+/g, ''); + sentence = sentence.replace(/[\n\r\t]/gi, ' '); + sentence = this.constructor.transformPlainApostrophe(sentence); + sentence = sentence.replace(/[^\w\s']/gi, ''); + ref = this.afinnPhrases; + for (i = j = 0, len = ref.length; j < len; i = ++j) { + word = ref[i]; + sentence = sentence.replace(new RegExp(word, 'g'), this.afinnPhrasesCamel[i]); + } + sentence = sentence.split(' '); + sentence = sentence.filter(function(n) { + return n !== ''; + }); + return this.constructor.removeDuplicates(sentence); + }; - analyseSentence = function(sentence) { - var i, len, score, word, wordsArr; - score = 0; - wordsArr = getWordsInSentence(sentence); - for (i = 0, len = wordsArr.length; i < len; i++) { - word = wordsArr[i]; - if (doesWordExist(word)) { - score += getScoreOfWord(word); + Analizer.removeDuplicates = function(arr) { + var j, key, ref, res, results, value; + if (arr.length === 0) { + return []; } - } - return scaleScore(score); - }; + res = {}; + for (key = j = 0, ref = arr.length - 1; 0 <= ref ? j <= ref : j >= ref; key = 0 <= ref ? ++j : --j) { + res[arr[key]] = arr[key]; + } + results = []; + for (key in res) { + value = res[key]; + results.push(value); + } + return results; + }; - module.exports = analyseSentence; + Analizer.scaleScore = function(score) { + score = score > 10 ? 10 : score; + score = score < -10 ? -10 : score; + return score / 10; + }; - if (process.env.NODE_ENV === 'test') { - module.exports = { - main: analyseSentence, - _private: { - scaleScore: scaleScore, - doesWordExist: doesWordExist, - getScoreOfWord: getScoreOfWord, - removeDuplicates: removeDuplicates, - getWordsInSentence: getWordsInSentence - } + Analizer.isPhrase = function(word) { + return word.indexOf(' ') !== -1; }; - } + + Analizer.compressPhrase = function(phrase) { + var w; + return ((function() { + var j, len, ref, results; + ref = phrase.split(/\s+/); + results = []; + for (j = 0, len = ref.length; j < len; j++) { + w = ref[j]; + results.push(w[0].toUpperCase() + w.slice(1).toLowerCase()); + } + return results; + })()).join(''); + }; + + Analizer.transformPlainApostrophe = function(phrase) { + return phrase.replace(/[`"]/gi, '\''); + }; + + return Analizer; + + })(); }).call(this); -/* (C) Alicia Sykes 2015 *\ +/* (C) Alicia Sykes 2015 *\ +|* (C) Maurizio Carboni 2016 *| \* MIT License. Read full license at: https://goo.gl/IL4lQJ */ \ No newline at end of file diff --git a/package.json b/package.json index bc94d07..8824517 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "sentiment-analysis", - "version": "0.1.1", + "version": "0.2.1", "description": "Sentiment analysis module using AFINN-111", "main": "index.js", "scripts": { diff --git a/test/main.test.coffee b/test/main.test.coffee index 8fa92e0..5200822 100644 --- a/test/main.test.coffee +++ b/test/main.test.coffee @@ -1,38 +1,38 @@ expect = require('chai').expect -process.env.NODE_ENV = 'test' +sentimentAnalysisLib = require('../index') -sentimentAnalysis = require('../index').main +sentimentAnalysis = new sentimentAnalysisLib describe 'Check the modules basic functionality', ()-> it 'should return an integer', () -> - expect(sentimentAnalysis('lorem ipsum dolor seit amet')) + expect(sentimentAnalysis.analyseSentence('lorem ipsum dolor seit amet')) .to.be.a('number') - expect(sentimentAnalysis('foo bar')).to.not.be.undefined; + expect(sentimentAnalysis.analyseSentence('foo bar')).to.not.be.undefined; it 'Should return the correct sentiment value for negative sentences', () -> - expect(sentimentAnalysis('I hate everything, everything is stupid')).equal(-0.5) - expect(sentimentAnalysis('London is gloomy today because of all the smog')).equal(-0.4) - expect(sentimentAnalysis('He was captured and put into slavery')).equal(-0.3) - expect(sentimentAnalysis('Windows is very unstable')).equal(-0.2) - expect(sentimentAnalysis('The slug was tired, he felt slugish')).equal(-0.2) + expect(sentimentAnalysis.analyseSentence('I hate everything, everything is stupid')).equal(-0.5) + expect(sentimentAnalysis.analyseSentence('London is gloomy today because of all the smog')).equal(-0.4) + expect(sentimentAnalysis.analyseSentence('He was captured and put into slavery')).equal(-0.3) + expect(sentimentAnalysis.analyseSentence('Windows is very unstable')).equal(-0.2) + expect(sentimentAnalysis.analyseSentence('The slug was tired, he felt slugish')).equal(-0.2) it 'Should return the correct sentiment value for positive sentences', () -> - expect(sentimentAnalysis('Today is a wonderful amazing awesome day')).equal(1) - expect(sentimentAnalysis('I am so grateful for all the presents, thank you!')).equal(0.5) + expect(sentimentAnalysis.analyseSentence('Today is a wonderful amazing awesome day')).equal(1) + expect(sentimentAnalysis.analyseSentence('I am so grateful for all the presents, thank you!')).equal(0.5) + + it 'Should return the correct sentiment value for sentences that contains phrase-words', () -> + expect(sentimentAnalysis.analyseSentence('This is some cool stuff')).equal(0.3) + expect(sentimentAnalysis.analyseSentence('This does not work')).equal(-0.3) + expect(sentimentAnalysis.analyseSentence('I don\'t like you')).equal(-0.2) it 'Should not return a score greater than 1 of smaller than -1', () -> - expect(sentimentAnalysis('happy happy amazing awesome cool')) + expect(sentimentAnalysis.analyseSentence('happy happy amazing awesome cool')) .to.be.above(-1.1).to.be.below(1.1) - expect(sentimentAnalysis('crap crap crap crap')) + expect(sentimentAnalysis.analyseSentence('crap crap crap crap')) .to.be.above(-1.1).to.be.below(1.1) it 'Should be able to cope with weird inputs and never crash', ()-> - - - - - diff --git a/test/utils.test.coffee b/test/utils.test.coffee index 8c31087..3ae755b 100644 --- a/test/utils.test.coffee +++ b/test/utils.test.coffee @@ -2,140 +2,173 @@ expect = require('chai').expect process.env.NODE_ENV = 'test' -sentimentAnalysis = require('../index')._private +sentimentAnalysisLib = require('../index') + +sentimentAnalysis = new sentimentAnalysisLib({ + customWords : { + 'special`word' : 0 + } +}) describe 'doesWordExist will return boolean weather word exists', ()-> - doesWordExist = sentimentAnalysis.doesWordExist it 'should return a boolean value', ()-> - expect(doesWordExist('coffee')).to.be.a('boolean') - expect(doesWordExist('mocha')).to.be.a('boolean') - expect(doesWordExist('java')).to.be.a('boolean') + expect(sentimentAnalysis.doesWordExist('coffee')).to.be.a('boolean') + expect(sentimentAnalysis.doesWordExist('mocha')).to.be.a('boolean') + expect(sentimentAnalysis.doesWordExist('java')).to.be.a('boolean') it 'should return true for words that exist', () -> - expect(doesWordExist('woo')).to.be.true - expect(doesWordExist('alive')).to.be.true - expect(doesWordExist('awesome')).to.be.true - expect(doesWordExist('anger')).to.be.true - expect(doesWordExist('bright')).to.be.true - expect(doesWordExist('love')).to.be.true - expect(doesWordExist('easy')).to.be.true - expect(doesWordExist('drunk')).to.be.true - expect(doesWordExist('dumb')).to.be.true - expect(doesWordExist('hacked')).to.be.true - expect(doesWordExist('important')).to.be.true - expect(doesWordExist('hug')).to.be.true - expect(doesWordExist('itchy')).to.be.true - expect(doesWordExist('laugh')).to.be.true - expect(doesWordExist('stupid')).to.be.true - expect(doesWordExist('bomb')).to.be.true + expect(sentimentAnalysis.doesWordExist('woo')).to.be.true + expect(sentimentAnalysis.doesWordExist('alive')).to.be.true + expect(sentimentAnalysis.doesWordExist('awesome')).to.be.true + expect(sentimentAnalysis.doesWordExist('anger')).to.be.true + expect(sentimentAnalysis.doesWordExist('bright')).to.be.true + expect(sentimentAnalysis.doesWordExist('love')).to.be.true + expect(sentimentAnalysis.doesWordExist('easy')).to.be.true + expect(sentimentAnalysis.doesWordExist('drunk')).to.be.true + expect(sentimentAnalysis.doesWordExist('dumb')).to.be.true + expect(sentimentAnalysis.doesWordExist('hacked')).to.be.true + expect(sentimentAnalysis.doesWordExist('important')).to.be.true + expect(sentimentAnalysis.doesWordExist('hug')).to.be.true + expect(sentimentAnalysis.doesWordExist('itchy')).to.be.true + expect(sentimentAnalysis.doesWordExist('laugh')).to.be.true + expect(sentimentAnalysis.doesWordExist('stupid')).to.be.true + expect(sentimentAnalysis.doesWordExist('bomb')).to.be.true it 'should return false for words that do not exist', () -> - expect(doesWordExist('hello')).to.be.false - expect(doesWordExist('world')).to.be.false - expect(doesWordExist('everything')).to.be.false - expect(doesWordExist('is')).to.be.false - expect(doesWordExist('stupidness')).to.be.false - expect(doesWordExist('acid')).to.be.false - expect(doesWordExist('dinosaurs')).to.be.false - expect(doesWordExist('laptop')).to.be.false - expect(doesWordExist('pepsi')).to.be.false - expect(doesWordExist('lorem')).to.be.false - expect(doesWordExist('ipsum')).to.be.false - expect(doesWordExist('squashed')).to.be.false - expect(doesWordExist('watson')).to.be.false - expect(doesWordExist('brain')).to.be.false + expect(sentimentAnalysis.doesWordExist('hello')).to.be.false + expect(sentimentAnalysis.doesWordExist('world')).to.be.false + expect(sentimentAnalysis.doesWordExist('everything')).to.be.false + expect(sentimentAnalysis.doesWordExist('is')).to.be.false + expect(sentimentAnalysis.doesWordExist('stupidness')).to.be.false + expect(sentimentAnalysis.doesWordExist('acid')).to.be.false + expect(sentimentAnalysis.doesWordExist('dinosaurs')).to.be.false + expect(sentimentAnalysis.doesWordExist('laptop')).to.be.false + expect(sentimentAnalysis.doesWordExist('pepsi')).to.be.false + expect(sentimentAnalysis.doesWordExist('lorem')).to.be.false + expect(sentimentAnalysis.doesWordExist('ipsum')).to.be.false + expect(sentimentAnalysis.doesWordExist('squashed')).to.be.false + expect(sentimentAnalysis.doesWordExist('watson')).to.be.false + expect(sentimentAnalysis.doesWordExist('brain')).to.be.false it 'should not throw an error with funny values', ()-> - expect(doesWordExist(1)).to.be.a('boolean') - expect(doesWordExist([])).to.be.a('boolean') - expect(doesWordExist(true)).to.be.a('boolean') - expect(doesWordExist(undefined)).to.be.a('boolean') - expect(doesWordExist(1)).to.be.false - expect(doesWordExist([])).to.be.false - expect(doesWordExist(undefined)).to.be.false + expect(sentimentAnalysis.doesWordExist(1)).to.be.a('boolean') + expect(sentimentAnalysis.doesWordExist([])).to.be.a('boolean') + expect(sentimentAnalysis.doesWordExist(true)).to.be.a('boolean') + expect(sentimentAnalysis.doesWordExist(undefined)).to.be.a('boolean') + expect(sentimentAnalysis.doesWordExist(1)).to.be.false + expect(sentimentAnalysis.doesWordExist([])).to.be.false + expect(sentimentAnalysis.doesWordExist(undefined)).to.be.false + +describe 'special words should be converted on a common format', () -> + it 'words that contains a ` should become a \'', () -> + expect(sentimentAnalysis.doesWordExist('special`word')).to.be.false + expect(sentimentAnalysis.doesWordExist('special\'word')).to.be.true + it 'words that contains a space should be camelized', () -> + expect(sentimentAnalysis.doesWordExist('CoolStuff')).to.be.true + expect(sentimentAnalysis.doesWordExist('CashingIn')).to.be.true + expect(sentimentAnalysis.doesWordExist('Don\'tLike')).to.be.true -describe 'getScoreOfWord method return a sentiment score for that word', ()-> - getScoreOfWord = sentimentAnalysis.getScoreOfWord + +describe 'sentimentAnalysis.getScoreOfWord method return a sentiment score for that word', ()-> + # sentimentAnalysis.getScoreOfWord = sentimentAnalysis.sentimentAnalysis.getScoreOfWord it 'should return an integer', ()-> - expect(getScoreOfWord('amazing')).to.be.a('number') - expect(getScoreOfWord('warm')).to.be.a('number') - expect(getScoreOfWord('yummy')).to.be.a('number') + expect(sentimentAnalysis.getScoreOfWord('amazing')).to.be.a('number') + expect(sentimentAnalysis.getScoreOfWord('warm')).to.be.a('number') + expect(sentimentAnalysis.getScoreOfWord('yummy')).to.be.a('number') it 'should be in a range of -5 to + 5', () -> - expect(getScoreOfWord('nice')).to.be.above(-5).to.be.below(5) - expect(getScoreOfWord('good')).to.be.below(5).to.be.below(5) - expect(getScoreOfWord('great')).to.be.above(-5).to.be.below(5) - expect(getScoreOfWord('awesome')).to.be.above(-5).to.be.below(5) + expect(sentimentAnalysis.getScoreOfWord('nice')).to.be.above(-5).to.be.below(5) + expect(sentimentAnalysis.getScoreOfWord('good')).to.be.below(5).to.be.below(5) + expect(sentimentAnalysis.getScoreOfWord('great')).to.be.above(-5).to.be.below(5) + expect(sentimentAnalysis.getScoreOfWord('awesome')).to.be.above(-5).to.be.below(5) it 'should return 0 if word doesn\'t exist, rather than crashing', ()-> - expect(getScoreOfWord('batman')).equal(0) - expect(getScoreOfWord('superman')).equal(0) - expect(getScoreOfWord('spiderman')).equal(0) - expect(getScoreOfWord('pepperpig')).equal(0) + expect(sentimentAnalysis.getScoreOfWord('batman')).equal(0) + expect(sentimentAnalysis.getScoreOfWord('superman')).equal(0) + expect(sentimentAnalysis.getScoreOfWord('spiderman')).equal(0) + expect(sentimentAnalysis.getScoreOfWord('pepperpig')).equal(0) it 'should return 0 if passed multiple words at a time that don\'t exist', ()-> - expect(getScoreOfWord('type error')).equal(0) - expect(getScoreOfWord('everything is stupid')).equal(0) - expect(getScoreOfWord('dinosaurs are awesome')).equal(0) + expect(sentimentAnalysis.getScoreOfWord('type error')).equal(0) + expect(sentimentAnalysis.getScoreOfWord('everything is stupid')).equal(0) + expect(sentimentAnalysis.getScoreOfWord('dinosaurs are awesome')).equal(0) it 'should return actual positive score for positive words that exist', ()-> - expect(getScoreOfWord('united')).equal(1) - expect(getScoreOfWord('unstoppable')).equal(2) - expect(getScoreOfWord('excited')).equal(3) - expect(getScoreOfWord('win')).equal(4) - expect(getScoreOfWord('outstanding')).equal(5) + expect(sentimentAnalysis.getScoreOfWord('united')).equal(1) + expect(sentimentAnalysis.getScoreOfWord('unstoppable')).equal(2) + expect(sentimentAnalysis.getScoreOfWord('excited')).equal(3) + expect(sentimentAnalysis.getScoreOfWord('win')).equal(4) + expect(sentimentAnalysis.getScoreOfWord('outstanding')).equal(5) it 'should return actual negative score for negative words that exist', ()-> - expect(getScoreOfWord('fight')).equal(-1) - expect(getScoreOfWord('fails')).equal(-2) - expect(getScoreOfWord('evil')).equal(-3) - expect(getScoreOfWord('fraud')).equal(-4) - expect(getScoreOfWord('twat')).equal(-5) + expect(sentimentAnalysis.getScoreOfWord('fight')).equal(-1) + expect(sentimentAnalysis.getScoreOfWord('fails')).equal(-2) + expect(sentimentAnalysis.getScoreOfWord('evil')).equal(-3) + expect(sentimentAnalysis.getScoreOfWord('fraud')).equal(-4) + expect(sentimentAnalysis.getScoreOfWord('twat')).equal(-5) it 'should return 0 for neutral words that exist', ()-> - expect(getScoreOfWord('some kind')).equal(0) + expect(sentimentAnalysis.getScoreOfWord('some kind')).equal(0) # There is only 1 neutral result in the AFINN word list! describe 'getWordsInSentence will transform a sentence into a clean array', ()-> - getWordsInSentence = sentimentAnalysis.getWordsInSentence it 'Should correctly turn a sentence into an array', ()-> - expect(getWordsInSentence('hello world')).eql(['hello', 'world']) - expect(getWordsInSentence('this is a longer sentence')) + expect(sentimentAnalysis.getWordsInSentence('hello world')).eql(['hello', 'world']) + expect(sentimentAnalysis.getWordsInSentence('this is a longer sentence')) .eql(['this', 'is', 'a', 'longer', 'sentence']) it 'Should normalise case', ()-> - expect(getWordsInSentence('HeLlO wOrLd')).eql(['hello', 'world']) - expect(getWordsInSentence('JAVASCRIPT')).eql(['javascript']) + expect(sentimentAnalysis.getWordsInSentence('HeLlO wOrLd')).eql(['hello', 'world']) + expect(sentimentAnalysis.getWordsInSentence('JAVASCRIPT')).eql(['javascript']) it 'Should remove dupplicates', ()-> - expect(getWordsInSentence('foo foo bar foo')) + expect(sentimentAnalysis.getWordsInSentence('foo foo bar foo')) .eql(['foo', 'bar']) - expect(getWordsInSentence('foo foo BAR Foo bAr foO bar foo')) + expect(sentimentAnalysis.getWordsInSentence('foo foo BAR Foo bAr foO bar foo')) .eql(['foo', 'bar', ]) it 'Should remove blanks', ()-> - expect(getWordsInSentence('space blank ')) + expect(sentimentAnalysis.getWordsInSentence('space blank ')) .eql(['space', 'blank']) + it 'Should transform new lines and tabs', ()-> + expect(sentimentAnalysis.getWordsInSentence("space\nblank\tother")) + .eql(['space', 'blank', 'other']) + it 'Should remove special characters', ()-> - expect(getWordsInSentence('foo ! ^&*^&^%^%&^^&%%^bar$$%^')) + expect(sentimentAnalysis.getWordsInSentence('foo ! ^&*^&^%^%&^^&%%^bar$$%^')) .eql(['foo', 'bar']) -describe 'removeDupplicates should remove dupplicates from an array', () -> - removeDupplicates = sentimentAnalysis.removeDuplicates + it 'Should transform apostrophes to a plain apostrophe', ()-> + expect(sentimentAnalysis.getWordsInSentence('don\'t like don`t like don"t like')) + .eql(['Don\'tLike']) + + it 'Should handle phrase words', ()-> + expect(sentimentAnalysis.getWordsInSentence('cool stuff cashing in')) + .eql(['CoolStuff', 'CashingIn']) + +describe 'removeDuplicates should remove dupplicates from an array', () -> + removeDuplicates = sentimentAnalysis.constructor.removeDuplicates it 'should remove duplicates', () -> - expect(removeDupplicates(['hello', 'world', 'hello', 'hello'])) + expect(removeDuplicates(['hello', 'world', 'hello', 'hello'])) .eql(['hello', 'world']) +describe 'transformPlainApostrophe should transform strange apostrophes to plain one', () -> + transformPlainApostrophe = sentimentAnalysis.constructor.transformPlainApostrophe + + it 'should trasform to plain', () -> + expect(transformPlainApostrophe('don\'t like don`t like don"t like')) + .eql('don\'t like don\'t like don\'t like') + describe 'scaleScore should ensure the score is within the valid range', () -> - scaleScore = sentimentAnalysis.scaleScore + scaleScore = sentimentAnalysis.constructor.scaleScore it 'should not be below -1', () -> expect(scaleScore(-1.2)).to.be.above(-1.01) @@ -163,3 +196,31 @@ describe 'scaleScore should ensure the score is within the valid range', () -> expect(scaleScore(3.1415926535897932)).to.be.within(-1,+1); expect(scaleScore(-273.15)).to.be.within(-1,+1); +describe 'isPhrase should detect words composed by multiple words', () -> + isPhrase = sentimentAnalysis.constructor.isPhrase + + it 'should not be a phrase for single words', () -> + expect(isPhrase('nice')).to.be.false + expect(isPhrase('good')).to.be.false + expect(isPhrase('great')).to.be.false + expect(isPhrase('awesome')).to.be.false + + it 'should be a phrase for multiple words', () -> + expect(isPhrase('cashing in')).to.be.true + expect(isPhrase('cool stuff')).to.be.true + expect(isPhrase('does not work')).to.be.true + expect(isPhrase('dont like')).to.be.true + +describe 'compressPhrase should camelize phrases', () -> + compressPhrase = sentimentAnalysis.constructor.compressPhrase + + it 'should camelize', () -> + expect(compressPhrase('cashing in')).to.be.equal('CashingIn') + expect(compressPhrase('cool stuff')).to.be.equal('CoolStuff') + expect(compressPhrase('does not work')).to.be.equal('DoesNotWork') + expect(compressPhrase('dont like')).to.be.equal('DontLike') + + it 'afinnWordList should contain camelized version of phrases', () -> + expect(sentimentAnalysis.afinnWordList.CashingIn).to.be.a('number') + expect(sentimentAnalysis.afinnWordList.CoolStuff).to.be.a('number') + expect(sentimentAnalysis.afinnWordList.DoesNotWork).to.be.a('number')