Skip to content

Commit e78d519

Browse files
author
Guanqiao Feng
committed
add scripts and results
1 parent bd601ea commit e78d519

6 files changed

+41082
-0
lines changed

clean_fasta_0.pl

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#!/usr/bin/perl
2+
3+
## clean rat fasta ##
4+
5+
%hash = ();
6+
7+
open FH, "<Rattus_norvegicus_UniProt_MoTrPAC_2017-10-20.fasta";
8+
open OUT, ">Rattus_norvegicus.fasta";
9+
$n = 0;
10+
$m = 0;
11+
12+
while (<FH>)
13+
{
14+
$seq = $_;
15+
chomp $seq;
16+
$m = $m + 1;
17+
if (/^>[^\|]+\|[^\|]+\|(\S+)/)
18+
{
19+
$name = $1;
20+
if (!(exists ($hash{$name})))
21+
{
22+
if ($seq =~ m/Rattus norvegicus/)
23+
{
24+
if ($m == 1)
25+
{
26+
print OUT ">$name\n";
27+
}
28+
else
29+
{
30+
print OUT "\n>$name\n";
31+
}
32+
$n = 1;
33+
}
34+
else
35+
{
36+
$n = 0;
37+
}
38+
$hash{$name} = 0;
39+
}
40+
else
41+
{
42+
$n = 0;
43+
}
44+
}
45+
elsif ($n == 1)
46+
{
47+
print OUT "$seq";
48+
}
49+
}
50+
print OUT "\n";
51+
52+
close FH;
53+
close OUT;
54+
55+
## clean mouse fasta ##
56+
57+
open FH, "<M_musculus_Uniprot_SPROT_2017-04-12.fasta";
58+
open OUT, ">Mus_musculus.fasta";
59+
60+
$m = 0;
61+
62+
while (<FH>)
63+
{
64+
$seq = $_;
65+
chomp $seq;
66+
print "$seq\n";
67+
$m = $m + 1;
68+
if (/^(>\S+)/)
69+
{
70+
$name = $1;
71+
if ($m == 1)
72+
{
73+
print OUT "$name\n";
74+
}
75+
else
76+
{
77+
print OUT "\n$name\n";
78+
}
79+
}
80+
elsif (/(\S+)/)
81+
{
82+
$seqs = $1;
83+
print OUT "$seqs";
84+
}
85+
}
86+
print OUT "\n";
87+
close FH;
88+
close OUT;

0 commit comments

Comments
 (0)