-
Notifications
You must be signed in to change notification settings - Fork 28
User aggregators #95
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
nmtiwari
wants to merge
31
commits into
master
Choose a base branch
from
user_Aggregators
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
User aggregators #95
Changes from all commits
Commits
Show all changes
31 commits
Select commit
Hold shift + click to select a range
917e118
Merge branch 'master' into boa_evaluator
a6e1c3d
Initial commit for User Defined Aggregation support
a31e918
Fixing bug : Only one user defined aggregator runs in the presence of…
e4b0f05
REmoving unnecessary prints
ba6b2ff
Fixing a bug: Filter non aggregator functions from list
1829489
Merge branch 'boa_evaluator' into user_Aggregators
de27178
Fixing a test case as code generation has change.
a7fd7b6
Updating latest code generation string template.
81e8f37
Fixing bug in UserDefinedCode generating process. Fixing fullyqualifi…
2972425
adding naive bayes exmaple using user defined aggragation
b161ab2
Allowing creation of arrays of nested and complex types
81af781
Adding capability to convert a tuple into array if possible. If tuple…
3295b15
code for matrix transpose, inverse, summation and substraction suppor…
a05a385
Adding machine learning examples codes in test directory
ff5b37b
Adding matrix operations
6b6aa9f
Fixing bug in getCol method in matrix operations
0da11ef
linear regression optimized and unoptimized code
737060d
adding neural network withour back propogation
db0a04f
Changes in MatrixOperations and Adding Print facility for debugging H…
98eb3ac
removing merge conflicts
fb23150
adding back propogation in neural
491adfc
adding pca
72711ce
adding optimized pca
edf12ff
adding new machine learning algorCithms
656775d
Adding changes to support options as user defined aggregations
e691a5b
Changes to support serialization of ml model in Boa
390fc86
Storing the class as part of model
6ecf209
Adding serialization support for the model using simple json
fec8ee8
adding support for loading ml model
739eb3c
Allowing options in user defined aggregator class
459000f
adding training model usage
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
p: Project = input; | ||
type fv = {a:int, b:int, c:int, d:int}; | ||
type stats = {a_stat:float, b_stat:float, c_stat:float}; | ||
type complete_stat = {avg: stats, dev: stats}; | ||
type Data = {training: fv, testing: fv}; | ||
splitRatio : float = 0.67; | ||
|
||
naive := function(vals : array of Data) : float { | ||
train : array of fv; | ||
test : array of fv; | ||
|
||
spearated: map[int] of array of fv; # classified per value | ||
summaries : map[int] of complete_stat; | ||
|
||
# separate the training and testing datasets | ||
foreach(i:int; def(vals[i])) { | ||
if(def(train)) { | ||
train = train + {vals[i].training}; | ||
} else { | ||
train = {vals[i].training}; | ||
} | ||
if(def(test)) { | ||
test = test+ {vals[i].testing}; | ||
} else { | ||
test = {vals[i].testing}; | ||
} | ||
|
||
} | ||
|
||
|
||
# classify training datasets | ||
foreach(i:int; def(train[i])) { | ||
temp : array of fv = {train[i]}; | ||
if(!haskey(spearated, train[i].d)) { | ||
spearated[train[i].d] = temp; | ||
} else { | ||
spearated[train[i].d] = spearated[train[i].d] + temp; | ||
} | ||
} | ||
|
||
# all the classes | ||
classes : array of int = keys(spearated); | ||
|
||
# summarize data from training dataset | ||
foreach(i:int; def(classes[i])) { | ||
# calculate mean | ||
feature_mean : stats = {0.0, 0.0, 0.0}; | ||
foreach(j:int; def(spearated[classes[i]][j])) { | ||
feature_mean.a_stat = feature_mean.a_stat + spearated[classes[i]][j].a; | ||
feature_mean.b_stat = feature_mean.b_stat + spearated[classes[i]][j].b; | ||
feature_mean.c_stat = feature_mean.c_stat + spearated[classes[i]][j].c; | ||
} | ||
feature_mean.a_stat = feature_mean.a_stat / len(spearated[classes[i]]); | ||
feature_mean.b_stat = feature_mean.b_stat / len(spearated[classes[i]]); | ||
feature_mean.c_stat = feature_mean.c_stat / len(spearated[classes[i]]); | ||
|
||
|
||
# calculate sd | ||
feature_sd : stats = {0.0, 0.0, 0.0}; | ||
foreach(j:int; def(spearated[classes[i]][j])) { | ||
feature_sd.a_stat = feature_sd.a_stat + (spearated[classes[i]][j].a - feature_mean.a_stat); | ||
feature_sd.b_stat = feature_sd.b_stat + (spearated[classes[i]][j].b - feature_mean.b_stat); | ||
feature_sd.c_stat = feature_sd.c_stat + (spearated[classes[i]][j].c - feature_mean.c_stat); | ||
} | ||
feature_sd.a_stat = sqrt(feature_sd.a_stat / len(spearated[classes[i]])); | ||
feature_sd.b_stat = sqrt(feature_sd.b_stat / len(spearated[classes[i]])); | ||
feature_sd.c_stat = sqrt(feature_sd.c_stat / len(spearated[classes[i]])); | ||
|
||
# summarized a class | ||
summaries[classes[i]] = {feature_mean, feature_sd}; | ||
} | ||
|
||
|
||
predictions: array of int; | ||
predictions = new(predictions, len(test), -1); | ||
|
||
# predict for each test data | ||
foreach(i:int; def(test[i])) { | ||
probabilities : map[int] of float; | ||
foreach(j: int; def(classes[j])) { | ||
probabilities[classes[j]] = 1.0; | ||
mean := summaries[classes[j]].avg; | ||
deviation := summaries[classes[j]].dev; | ||
probabilities[classes[j]] = probabilities[classes[j]] * (1/ (sqrt(2 * 3.14) * deviation.a_stat)) * (exp(-1 * ((pow((1.0 * test[i].a) - mean.a_stat, 2))/(2 * pow(deviation.a_stat, 2))))); | ||
probabilities[classes[j]] = probabilities[classes[j]] * (1/ (sqrt(2 * 3.14) * deviation.a_stat)) * (exp(-1 * ((pow((1.0 * test[i].b) - mean.b_stat, 2))/(2 * pow(deviation.b_stat, 2))))); | ||
probabilities[classes[j]] = probabilities[classes[j]] * (1/ (sqrt(2 * 3.14) * deviation.a_stat)) * (exp(-1 * ((pow((1.0 * test[i].c) - mean.c_stat, 2))/(2 * pow(deviation.c_stat, 2))))); | ||
} | ||
|
||
bestProb : float = 0; | ||
bestLab : int = -1; | ||
foreach(j: int; def(classes[j])) { | ||
if ((bestLab == -1) || (bestProb < probabilities[classes[j]])) { | ||
bestProb = probabilities[classes[j]]; | ||
bestLab = classes[j]; | ||
} | ||
} | ||
predictions[i] = bestLab; | ||
} | ||
|
||
correct : float = 0.0; | ||
foreach(i:int; def(test[i])) { | ||
if(predictions[i] == test[i].d) { | ||
correct = correct + 1.0; | ||
} | ||
} | ||
return correct/len(test) * 100; | ||
}; | ||
|
||
scale := function(ast: int, method: int, class: int) : int { | ||
total : int = 0; | ||
if(ast > 1000) { | ||
total++; | ||
} if(method > 500) { | ||
total++; | ||
} if(class > 50) { | ||
total++; | ||
} | ||
return total; | ||
}; | ||
|
||
|
||
naive_bayes : output naive of Data; | ||
|
||
# count ast nodes | ||
|
||
astCount := 0; | ||
classCount := 0; | ||
methodCount := 0; | ||
visit(p, visitor { | ||
# only look at the latest snapshot | ||
before n: CodeRepository -> { | ||
snapshot := getsnapshot(n); | ||
foreach (i: int; def(snapshot[i])) | ||
visit(snapshot[i]); | ||
stop; | ||
} | ||
before node: Declaration -> { | ||
if (node.kind == TypeKind.CLASS) { | ||
classCount++; | ||
foreach (i: int; node.methods[i]) { | ||
methodCount++; | ||
} | ||
} | ||
} | ||
# by default, count all visited nodes | ||
before _ -> astCount++; | ||
# these nodes are not part of the AST, so do nothing when visiting | ||
before Project, ChangedFile -> ; | ||
}); | ||
|
||
|
||
|
||
dummy : fv = {0, 0, 0, 0}; | ||
nondummy : fv = {astCount, methodCount, classCount, scale(astCount, methodCount, classCount)}; | ||
data1: Data = {nondummy, dummy}; | ||
data2: Data = {dummy, nondummy}; | ||
if(rand() > splitRatio) | ||
naive_bayes << data1; | ||
else | ||
naive_bayes << data2; | ||
|
||
|
||
if(rand() > splitRatio) | ||
naive_bayes << data1; | ||
else | ||
naive_bayes << data2; | ||
|
||
|
||
if(rand() > splitRatio) | ||
naive_bayes << data1; | ||
else | ||
naive_bayes << data2; |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
package boa; | ||
|
||
public interface BoaEnumInterface { | ||
Object getValue(); | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
package boa; | ||
|
||
import java.io.IOException; | ||
import java.util.Collection; | ||
|
||
|
||
public interface BoaTup { | ||
public String[] getValues(); | ||
public byte[] serialize(Object o) throws IOException; | ||
public Object getValue(String f); | ||
public String toString(); | ||
public <T> T[] asArray(T[] type); | ||
public String[] getFieldNames(); | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package boa.aggregators; | ||
|
||
|
||
import boa.compiler.UserDefinedAggregators; | ||
import boa.datagen.util.FileIO; | ||
import com.google.gson.Gson; | ||
|
||
import java.io.*; | ||
|
||
@AggregatorSpec(name = "UserDefinedAgg", formalParameters = { "any", "any" }, type = "UserDefined", canCombine = false) | ||
public abstract class UserDefinedAggregator extends Aggregator { | ||
|
||
public void store(Object object) { | ||
Gson json = new Gson(); | ||
File output = new File(UserDefinedAggregators.getFileName()); | ||
final String dest= output.getAbsolutePath() + "/"; | ||
output.mkdir(); | ||
writeAsJSON(object, dest + UserDefinedAggregators.getFileName() + ".model"); | ||
} | ||
|
||
private void writeAsJSON(Object object, String path) { | ||
Gson writer = new Gson(); | ||
FileIO.writeFileContents(new File(path), writer.toJson(object)); | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Move this to 'test/known-good/' ? We dont have an examples directory and if you are going to put code examples in there, might as well use them as test cases.