Skip to content

Commit 468e500

Browse files
committed
hw3 is done except command line running options.
1 parent 444c9fe commit 468e500

7 files changed

+312
-0
lines changed

hw3/Makefile

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
all:
2+
g++ -o out readFile.cpp writeToFile.cpp needleman_wunsch.cpp smith_waterman.cpp main.cpp

hw3/cs481_hw3.pdf

75.1 KB
Binary file not shown.

hw3/main.cpp

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#include <iostream>
2+
#include <string>
3+
#include <bits/stdc++.h>
4+
#include "unistd.h"
5+
6+
using namespace std;
7+
8+
vector<string> readFile(std::string fileName);
9+
int smith_waterman(string sequence1Name, string sequence1, string sequence2Name, string sequence2,
10+
int gapopen, int gapext, int matchScore, int mismatchPenalty);
11+
int needleman_wunsch(string sequence1Name, string sequence1, string sequence2Name, string sequence2,
12+
int gapopen, int gapext, int matchScore, int mismatchPenalty);
13+
14+
int main(int argc, char *argv[]) {
15+
if (argc < 2) {
16+
fprintf(stderr, "usage: ./out <textFileName patternFileName>\n");
17+
return -1;
18+
}
19+
20+
vector<string> sequences = readFile(argv[1]);
21+
22+
int gapopen = -5;
23+
int gapext = -2;
24+
int matchScore = 2;
25+
int mismatchPenalty = -3;
26+
27+
needleman_wunsch(sequences[0], sequences[1], sequences[2], sequences[3], gapopen, gapext, matchScore, mismatchPenalty);
28+
29+
smith_waterman(sequences[0], sequences[1], sequences[2], sequences[3], gapopen, gapext, matchScore, mismatchPenalty);
30+
}

hw3/needleman_wunsch.cpp

+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
#include <iostream>
2+
#include <string>
3+
4+
using namespace std;
5+
int writeToFile(string sequence1Name, string sequence1, string sequence2Name, string sequence2, int score, string fileName);
6+
7+
int needleman_wunsch(string sequence1Name, string sequence1, string sequence2Name, string sequence2,
8+
int gapopen, int gapext, int matchScore, int mismatchPenalty) {
9+
10+
int text1Length = sequence1.size();
11+
int text2Length = sequence2.size();
12+
13+
int table[text1Length + 1][text2Length + 1];
14+
char directionTable[text1Length + 1][text2Length + 1];
15+
16+
// Basic values in the tables.
17+
table[0][0] = 0;
18+
table[1][0] = gapopen + gapext;
19+
table[0][1] = gapopen + gapext;
20+
directionTable[1][0] = 'u';
21+
directionTable[0][1] = 'l';
22+
23+
// Fill in the 0th columns
24+
for(int i = 2; i < text1Length + 1; i++){
25+
table[i][0] = table[i-1][0] + gapext;
26+
directionTable[i][0] = 'u';
27+
}
28+
29+
// Fill in the 0th rows
30+
for(int j = 2; j < text2Length + 1; j++){
31+
table[0][j] = table[0][j - 1] + gapext;
32+
directionTable[0][j] = 'l';
33+
}
34+
35+
// Fill in the tables
36+
for(int i = 1; i < text1Length + 1; i++){
37+
for(int j = 1; j < text2Length + 1; j++){
38+
int diagonalExtra = sequence1.at(i - 1) == sequence2.at(j - 1) ? matchScore : mismatchPenalty;
39+
int leftExtra = directionTable[i][j-1] == 'l' ? (gapext) : (gapext + gapopen);
40+
int upExtra = directionTable[i-1][j] == 'u' ? (gapext) : (gapext + gapopen);
41+
42+
if(table[i-1][j-1] + diagonalExtra >= table[i][j-1] + leftExtra && table[i-1][j-1] + diagonalExtra >= table[i-1][j] + upExtra){
43+
table[i][j] = table[i-1][j-1] + diagonalExtra;
44+
directionTable[i][j] = 'd';
45+
} else if(table[i][j-1] + leftExtra >= table[i-1][j-1] + diagonalExtra && table[i][j-1] + leftExtra >= table[i-1][j] + upExtra){
46+
table[i][j] = table[i][j-1] + leftExtra;
47+
directionTable[i][j] = 'l';
48+
} else{
49+
table[i][j] = table[i-1][j] + upExtra;
50+
directionTable[i][j] = 'u';
51+
}
52+
}
53+
}
54+
55+
// Backtracking
56+
string matchedSequence1;
57+
string matchedSequence2;
58+
int i = text1Length, j = text2Length;
59+
while(i >= 1 || j >= 1){
60+
if(directionTable[i][j] == 'd'){
61+
matchedSequence1.insert (0, 1, sequence1.at(i - 1));
62+
matchedSequence2.insert (0, 1, sequence2.at(j - 1));
63+
i--; j--;
64+
} else if(directionTable[i][j] == 'l'){
65+
matchedSequence1.insert (0, 1, '-');
66+
matchedSequence2.insert (0, 1, sequence2.at(j - 1));
67+
j--;
68+
} else if(directionTable[i][j] == 'u'){
69+
matchedSequence1.insert (0, 1, sequence1.at(i - 1));
70+
matchedSequence2.insert (0, 1, '-');
71+
i--;
72+
} else{
73+
cout << "Error while backtracking." << endl;
74+
return -1;
75+
}
76+
}
77+
78+
/*
79+
// Print tables to console
80+
for(int i = 0; i < text1Length + 1; i++){
81+
for(int j = 0; j < text2Length + 1; j++)
82+
printf("%*c",4,directionTable[i][j]);
83+
printf("\n");
84+
}
85+
printf("\n");
86+
for(int i = 0; i < text1Length + 1; i++){
87+
for(int j = 0; j < text2Length + 1; j++)
88+
printf("%*d",4,table[i][j]);
89+
printf("\n");
90+
}
91+
*/
92+
93+
// Print results to file
94+
if(gapopen == 0)
95+
writeToFile(sequence1Name, matchedSequence1, sequence2Name, matchedSequence2, table[text1Length][text2Length], "global-naiveGap.aln");
96+
else
97+
writeToFile(sequence1Name, matchedSequence1, sequence2Name, matchedSequence2, table[text1Length][text2Length], "global-affineGap.aln");
98+
99+
return 0;
100+
}

hw3/readFile.cpp

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#include <iostream>
2+
#include <fstream>
3+
#include <string>
4+
#include <bits/stdc++.h>
5+
6+
//https://stackoverflow.com/questions/35251635/fasta-reader-written-in-c
7+
std::vector<std::string> readFile(std::string fileName) {
8+
std::ifstream input(fileName);
9+
std::vector<std::string> sequences;
10+
if (!input.good()) {
11+
std::cerr << "Error opening: " << fileName << " . You have failed." << std::endl;
12+
return sequences;
13+
}
14+
std::string line, id, DNA_sequence;
15+
16+
// Don't loop on good(), it doesn't allow for EOF!!
17+
int i = 0;
18+
while (std::getline(input, line)) {
19+
20+
// line may be empty so you *must* ignore blank lines
21+
// or you have a crash waiting to happen with line[0]
22+
if(line.empty())
23+
continue;
24+
25+
if (line[0] == '>') {
26+
// output previous line before overwriting id
27+
// but ONLY if id actually contains something
28+
if(!id.empty()){
29+
std::string("ATCG").find(DNA_sequence.at(DNA_sequence.size() - 1)) == -1 ? sequences.push_back(DNA_sequence.substr(0, DNA_sequence.size() - 1)) : sequences.push_back(DNA_sequence);
30+
}
31+
id = line.substr(1);
32+
sequences.push_back(id.substr(0, id.size() - 1));
33+
DNA_sequence.clear();
34+
}
35+
else {// if (line[0] != '>'){ // not needed because implicit
36+
DNA_sequence += line;
37+
}
38+
}
39+
40+
// output final entry
41+
// but ONLY if id actually contains something
42+
if(!id.empty())
43+
std::string("ATCG").find(DNA_sequence.at(DNA_sequence.size() - 1)) == -1 ? sequences.push_back(DNA_sequence.substr(0, DNA_sequence.size() - 1)) : sequences.push_back(DNA_sequence);
44+
return sequences;
45+
}

hw3/smith_waterman.cpp

+112
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#include <iostream>
2+
#include <string>
3+
4+
using namespace std;
5+
int writeToFile(string sequence1Name, string sequence1, string sequence2Name, string sequence2, int score, string fileName);
6+
7+
int smith_waterman(string sequence1Name, string sequence1, string sequence2Name, string sequence2,
8+
int gapopen, int gapext, int matchScore, int mismatchPenalty) {
9+
10+
int text1Length = sequence1.size();
11+
int text2Length = sequence2.size();
12+
13+
int table[text1Length + 1][text2Length + 1]; // table of scores
14+
char directionTable[text1Length + 1][text2Length + 1]; // table of directions
15+
16+
table[0][0] = 0;
17+
18+
// Fill in the 0th columns
19+
for(int i = 1; i < text1Length + 1; i++){
20+
table[i][0] = 0;
21+
directionTable[i][0] = 'u';
22+
}
23+
24+
// Fill in the 0th rows
25+
for(int j = 1; j < text2Length + 1; j++){
26+
table[0][j] = 0;
27+
directionTable[0][j] = 'l';
28+
}
29+
30+
int iMaxValue = 0;
31+
int jMaxValue = 0;
32+
int maxValue = 0;
33+
34+
// Fill in the tables
35+
for(int i = 1; i < text1Length + 1; i++){
36+
for(int j = 1; j < text2Length + 1; j++){
37+
38+
int diagonalExtra = sequence1.at(i - 1) == sequence2.at(j - 1) ? matchScore : mismatchPenalty;
39+
int leftExtra = directionTable[i][j-1] == 'l' ? (gapext) : (gapext + gapopen);
40+
int upExtra = directionTable[i-1][j] == 'u' ? (gapext) : (gapext + gapopen);
41+
42+
if(0 >= table[i-1][j-1] + diagonalExtra && 0 >= table[i][j-1] + leftExtra && 0 >= table[i-1][j] + upExtra){
43+
table[i][j] = 0;
44+
directionTable[i][j] = '0';
45+
}
46+
else if(table[i-1][j-1] + diagonalExtra >= table[i][j-1] + leftExtra && table[i-1][j-1] + diagonalExtra >= table[i-1][j] + upExtra){
47+
table[i][j] = table[i-1][j-1] + diagonalExtra;
48+
directionTable[i][j] = 'd';
49+
} else if(table[i][j-1] + leftExtra >= table[i-1][j-1] + diagonalExtra && table[i][j-1] + leftExtra >= table[i-1][j] + upExtra){
50+
table[i][j] = table[i][j-1] + leftExtra;
51+
directionTable[i][j] = 'l';
52+
} else{
53+
table[i][j] = table[i-1][j] + upExtra;
54+
directionTable[i][j] = 'u';
55+
}
56+
57+
58+
if(table[i][j] > maxValue){
59+
iMaxValue = i;
60+
jMaxValue = j;
61+
maxValue = table[i][j];
62+
}
63+
}
64+
}
65+
66+
// Backtracking
67+
string matchedSequence1;
68+
string matchedSequence2;
69+
int i = iMaxValue, j = jMaxValue;
70+
71+
while(table[i][j] != 0){
72+
if(directionTable[i][j] == 'd'){
73+
matchedSequence1.insert (0, 1, sequence1.at(i - 1));
74+
matchedSequence2.insert (0, 1, sequence2.at(j - 1));
75+
i--; j--;
76+
} else if(directionTable[i][j] == 'l'){
77+
matchedSequence1.insert (0, 1, '-');
78+
matchedSequence2.insert (0, 1, sequence2.at(j - 1));
79+
j--;
80+
} else if(directionTable[i][j] == 'u'){
81+
matchedSequence1.insert (0, 1, sequence1.at(i - 1));
82+
matchedSequence2.insert (0, 1, '-');
83+
i--;
84+
} else{
85+
cout << "Error while backtracking." << endl;
86+
return -1;
87+
}
88+
}
89+
90+
/*
91+
// Print tables to console
92+
for(int i = 0; i < text1Length + 1; i++){
93+
for(int j = 0; j < text2Length + 1; j++)
94+
printf("%*c",4,directionTable[i][j]);
95+
printf("\n");
96+
}
97+
printf("\n");
98+
for(int i = 0; i < text1Length + 1; i++){
99+
for(int j = 0; j < text2Length + 1; j++)
100+
printf("%*d",4,table[i][j]);
101+
printf("\n");
102+
}
103+
*/
104+
105+
// Print results to file
106+
if(gapopen == 0)
107+
writeToFile(sequence1Name, matchedSequence1, sequence2Name, matchedSequence2, maxValue, "local-naiveGap.aln");
108+
else
109+
writeToFile(sequence1Name, matchedSequence1, sequence2Name, matchedSequence2, maxValue, "local-affineGap.aln");
110+
111+
return 0;
112+
}

hw3/writeToFile.cpp

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#include <iomanip>
2+
#include <fstream>
3+
#include <string>
4+
#include <math.h>
5+
using namespace std;
6+
7+
//https://stackoverflow.com/questions/35251635/fasta-reader-written-in-c
8+
int writeToFile(string sequence1Name, string sequence1, string sequence2Name, string sequence2, int score, string fileName) {
9+
int sequence1Length = sequence1.size();
10+
int sequence2Length = sequence2.size();
11+
12+
ofstream myfile;
13+
myfile.open (fileName);
14+
myfile << "Score = " << score << endl;
15+
myfile << left;
16+
for(int i = 0; i < ceil(max(sequence1Length, sequence2Length) / 60.0); i++){
17+
myfile << endl;
18+
myfile << setw(25) << sequence1Name << ( sequence1Length > i*60 ? sequence1.substr(i * 60, 60) : "" )<< endl;
19+
myfile << setw(25) << sequence2Name << ( sequence2Length > i*60 ? sequence2.substr(i * 60, 60) : "" ) << endl;
20+
}
21+
myfile.close();
22+
return 0;
23+
}

0 commit comments

Comments
 (0)