Skip to content

Commit d5069f3

Browse files
committed
Added danielmq
1 parent 0b5424e commit d5069f3

14 files changed

+107973
-0
lines changed

.DS_Store

0 Bytes
Binary file not shown.

danielmq/NeedlemanWunsch.py

+130
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
'''
2+
NeedlemanWunsch.py
3+
This file implements the Needleman-Wunsch sequence alignment algorithm. The code
4+
is not mine, the credit goes completely to:
5+
https://wilkelab.org/classes/SDS348/2018_spring/labs/lab13-solution.html
6+
'''
7+
8+
# Use these values to calculate scores
9+
gap_penalty = -1
10+
match_award = 1
11+
mismatch_penalty = -2
12+
13+
# A function for making a matrix of zeroes
14+
15+
16+
def zeros(rows, cols):
17+
# Define an empty list
18+
retval = []
19+
# Set up the rows of the matrix
20+
for x in range(rows):
21+
# For each row, add an empty list
22+
retval.append([])
23+
# Set up the columns in each row
24+
for y in range(cols):
25+
# Add a zero to each column in each row
26+
retval[-1].append(0)
27+
# Return the matrix of zeros
28+
return retval
29+
30+
# A function for determining the score between any two bases in alignment
31+
32+
33+
def match_score(alpha, beta):
34+
if alpha == beta:
35+
return match_award
36+
elif alpha == '-' or beta == '-':
37+
return gap_penalty
38+
else:
39+
return mismatch_penalty
40+
41+
# The function that actually fills out a matrix of scores
42+
43+
44+
def needleman_wunsch(seq1, seq2):
45+
46+
# Store length of two sequences
47+
n = len(seq1)
48+
m = len(seq2)
49+
50+
# Generate matrix of zeros to store scores
51+
score = zeros(m + 1, n + 1)
52+
53+
# Calculate score table
54+
55+
# Fill out first column
56+
for i in range(0, m + 1):
57+
score[i][0] = gap_penalty * i
58+
59+
# Fill out first row
60+
for j in range(0, n + 1):
61+
score[0][j] = gap_penalty * j
62+
63+
# Fill out all other values in the score matrix
64+
for i in range(1, m + 1):
65+
for j in range(1, n + 1):
66+
# Calculate the score by checking the top, left, and diagonal cells
67+
match = score[i - 1][j - 1] + match_score(seq1[j - 1], seq2[i - 1])
68+
delete = score[i - 1][j] + gap_penalty
69+
insert = score[i][j - 1] + gap_penalty
70+
# Record the maximum score from the three possible scores
71+
# calculated above
72+
score[i][j] = max(match, delete, insert)
73+
74+
# Traceback and compute the alignment
75+
76+
# Create variables to store alignment
77+
align1 = ""
78+
align2 = ""
79+
80+
# Start from the bottom right cell in matrix
81+
i = m
82+
j = n
83+
84+
similarity = 0
85+
86+
# We'll use i and j to keep track of where we are in the matrix, just like
87+
# above
88+
while i > 0 and j > 0: # end touching the top or the left edge
89+
score_current = score[i][j]
90+
similarity += score_current
91+
score_diagonal = score[i - 1][j - 1]
92+
score_up = score[i][j - 1]
93+
score_left = score[i - 1][j]
94+
95+
# Check to figure out which cell the current score was calculated from,
96+
# then update i and j to correspond to that cell.
97+
if score_current == score_diagonal + \
98+
match_score(seq1[j - 1], seq2[i - 1]):
99+
align1 += seq1[j - 1]
100+
align2 += seq2[i - 1]
101+
i -= 1
102+
j -= 1
103+
elif score_current == score_up + gap_penalty:
104+
align1 += seq1[j - 1]
105+
align2 += '-'
106+
j -= 1
107+
elif score_current == score_left + gap_penalty:
108+
align1 += '-'
109+
align2 += seq2[i - 1]
110+
i -= 1
111+
112+
# Finish tracing up to the top left cell
113+
while j > 0:
114+
align1 += seq1[j - 1]
115+
align2 += '-'
116+
j -= 1
117+
while i > 0:
118+
align1 += '-'
119+
align2 += seq2[i - 1]
120+
i -= 1
121+
122+
# Since we traversed the score matrix from the bottom right, our two sequences will be reversed.
123+
# These two lines reverse the order of the characters in each sequence.
124+
align1 = align1[::-1]
125+
align2 = align2[::-1]
126+
127+
min = mismatch_penalty * len(seq1)
128+
rangeValues = match_award * len(seq1) - min
129+
130+
return(align1, align2, similarity)

danielmq/README.md

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
2+
# OpenDNA
3+
@author: Daniel Martin Marin Quiroz
4+
@version: Mar 12, 2020
5+
@class: CS41
6+
7+
### Description
8+
9+
Hi there! This project is an attempt to build an interface that lets a user interact with a database of human profiles, each of which is consisted on the name of the human, her/his age and a part of her/his DNA sequence.
10+
11+
The user enters this information and the program compares his/her DNA against the current database of registered user profiles. After the algorithm computes the results, the user is shown the profiles of the people whose DNA is the most similar to his/hers. Also, by using the Needleman-Wunsch sequence alignment algorithm, the user is shown the best possible alignment of two DNA sequences.
12+
13+
The results are ranked by the similarity of the DNA sequences of each profile with the DNA of the user, and the alignments are highlighted by the program, indicating matches, mismatches and gaps in each alignment. Finally, the user is able to save her/his information in the database so that future users are able to compare their DNA sequences with her/him.
14+
15+
### How to run
16+
17+
This project was designed to run both locally and also using the website template posted on Piazza. To run locally, simply type `python openDNA.py`. This will create a radomly generated user profile, compare his/her DNA against a randomly generated user database of (by default) 100 users and display the matchings, rankings and results.
18+
19+
The program can be run with the following command-line arguments: `python openDNA.py --randomDatabase` and `python openDNA.py --localDatabase`.
20+
Both options let the user enter his/her information (name, age and DNA sequence). The first compares his/her DNA with a randomly generated database and the latter compares it with an actual file in the `data/` folder named `userDatabase.json`, and it lets the user save his/her profile in the database if he/she wishes to do so.
21+
22+
To run on the web, run `python app.py`. This will create a local server that can be accessed on a web browser. On this website, the user is able to enter his/her name, age and DNA sequence. The server will receive the request and confirm that the data entered is in the correct format. If it is the case, then the user will be able to see the rankings, best matchings and similarities of the profiles in the database at `data/userDatabase.json`, and her/his information will be added (or updated) to such database.
23+
24+
### Code design
25+
26+
Basically all the relevant code is on the `openDNA.py` file. My approach was to use Python's classes to create a `Person` class that holds the user's information and a `DNAComparison` class that is instantiated with two `Person` objects and holds data such as the best alignment of the two person's sequences, percent similarity and their relative Needleman-Wunsch similarity scores.
27+
28+
I also created functions that would be in charge of loading/saving into the user database, create random DNA sequences, create random user-profiles, verify the validity of the information entered by the user, process the online requests, run the comparison of one profile against the whole database of user, format the results and interact with the user when run locally.
29+
30+
### Credits
31+
I have used the implementation of the Needleman-Wunsch algorithm coded by the Wilke Computational Evolutionary Biology Lab, from the class of Spring 2018 available at [this](https://wilkelab.org/classes/SDS348/2018_spring/labs/lab13-solution.html) link.
32+
33+
### Publishing
34+
Feel free to publish/use anything from this project!
35+
36+
### Video
37+
You can find the walkthrough video here: https://drive.google.com/open?id=1ukpae3Zmj4ylQvm3LzWjqdfVz7DG1zLc
38+

danielmq/app.py

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
"""
2+
A lightweight Flask app that renders a form for a user to fill out and then
3+
processes the form data with a student-specified function.
4+
5+
Authors
6+
-------
7+
TODO :)
8+
"""
9+
from flask import Flask, render_template, request
10+
from werkzeug.utils import secure_filename
11+
from utils import FormInputs
12+
import utils
13+
import process
14+
import os
15+
16+
app = Flask(__name__)
17+
app.config['UPLOAD_FOLDER'] = 'uploads/'
18+
19+
"""
20+
Use these global variables to configure the application parameters.
21+
"""
22+
APP_TITLE = 'OpenDNA' # Appears at the top of the page.
23+
AUTHORS = 'Daniel Marin' # Appears underneath the title
24+
FORM_DESCRIPTION = """Welcome to OpenDNA. This program lets you compare your DNA with other users in the database. We'll tell you whose DNA is more similar to yours and add you to our database!!""" # Appears before the form, to explain the form
25+
26+
"""
27+
Use this variable to design the form that you'd like to present the user with.
28+
This dictionary should be of the form:
29+
{
30+
'field_name': ('Field Label', field_type)
31+
}
32+
33+
The field type is one of the following options:
34+
FormInputs.STRING -- A string input area.
35+
FormInputs.TEXTAREA -- A textarea for large string inputs.
36+
FormInputs.NUMERIC -- A numeric input area.
37+
FormInputs.FILE -- A file upload input.
38+
or an iterable of valid inputs that will be provided to the user to choose
39+
between.
40+
41+
For example, if you were predicting housing prices, this form might look like:
42+
{
43+
'age': ('Age', FormInputs.NUMERIC),
44+
'living_area': ('Size (in square feet)', FormInputs.NUMERIC)
45+
}
46+
47+
If you'd like to provide a set of valid inputs, you can do that as you'd expect:
48+
{
49+
'location': ('Location', ('Palo Alto', 'Stanford', 'Redwood City'))
50+
}
51+
"""
52+
FORM_SPECIFICATION = {
53+
'name': ('Enter your name: ', FormInputs.STRING),
54+
'age': ('Age', FormInputs.NUMERIC),
55+
'dna': ('DNA', FormInputs.STRING),
56+
'save': ('Save into database?', ('Yes', 'No'))
57+
}
58+
59+
60+
"""
61+
--------------------------------------------------------------------------------
62+
You don't need to modify anything below this line, although you're
63+
welcome (and encouraged) to take a look!
64+
--------------------------------------------------------------------------------
65+
"""
66+
@app.context_processor
67+
def inject_globals():
68+
return {
69+
'title': APP_TITLE,
70+
'authors': AUTHORS
71+
}
72+
73+
@app.route('/', methods=['GET', 'POST'])
74+
def main():
75+
form_defaults = dict(request.form)
76+
77+
# Handle file save
78+
FILE_FIELDS = [k for k, v in FORM_SPECIFICATION.items() \
79+
if v[1] is FormInputs.FILE]
80+
for field_name in FILE_FIELDS:
81+
if field_name in request.files \
82+
and (file := request.files[field_name]).filename:
83+
# Save the file
84+
filename = secure_filename(file.filename)
85+
save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
86+
file.save(save_path)
87+
88+
# Update the form values
89+
form_defaults.update({field_name: save_path})
90+
91+
msg = None
92+
if any(form_defaults.values()):
93+
msg = process.process(**form_defaults)
94+
95+
print(msg)
96+
97+
form = utils.Form(FORM_SPECIFICATION, defaults=form_defaults)
98+
99+
return render_template('index.html',
100+
form=form,
101+
desc=FORM_DESCRIPTION,
102+
msg=msg)
103+
104+
if __name__ == '__main__':
105+
app.run()

danielmq/data/myDNA.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ATGGTGCTCTCTGGGGAAGACAAAAGCAACATCAAGGCTGCCTGGGGGAAGATTGGTGGCCATGGTGCTGAATATGGAGCTGAAGCCCTGGAAAGGATGTTTGCTAGCTTCCCCACCACCAAGACCTACTTTCCTCACTTTGATGTAAGCCACGGCTCTCATTCTAAATGGTGCTGAGCAAAGTT

0 commit comments

Comments
 (0)