Skip to content

Fix gff2db segfault, and keep entryname in lookup when multithreaded #635

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
60e336d
Update combine pval agg-mode 3
RuoshiZhang May 11, 2020
0d05801
Merge branch 'master' of github.com:soedinglab/MMseqs2
RuoshiZhang Jul 27, 2020
f9382b8
Merge branch 'master' of github.com:soedinglab/MMseqs2
RuoshiZhang Jul 28, 2020
6c2f09e
Merge branch 'master' of github.com:soedinglab/MMseqs2
RuoshiZhang Aug 4, 2020
aa1ed39
Merge branch 'master' of github.com:soedinglab/MMseqs2
RuoshiZhang Sep 15, 2020
0c6b539
Merge branch 'master' of github.com:soedinglab/MMseqs2
RuoshiZhang Sep 28, 2020
db1da70
Merge branch 'master' of github.com:soedinglab/MMseqs2
RuoshiZhang Feb 23, 2021
71d985a
Merge branch 'master' of github.com:soedinglab/MMseqs2
RuoshiZhang Mar 22, 2021
54432f7
Merge branch 'master' of github.com:soedinglab/MMseqs2
RuoshiZhang Apr 13, 2021
51819ec
Merge branch 'master' of github.com:soedinglab/MMseqs2
RuoshiZhang Apr 27, 2021
fccd47e
Merge branch 'master' of github.com:soedinglab/MMseqs2
RuoshiZhang May 20, 2021
aea233a
Improve gff2db to parse multiple files, and try multi-threading
RuoshiZhang Jul 2, 2021
3bc6df0
Merge branch 'master' of github.com:soedinglab/MMseqs2
RuoshiZhang Jul 2, 2021
25250cb
Merge branch 'master' of github.com:soedinglab/MMseqs2
RuoshiZhang Jul 5, 2021
1b34fd2
Merge branch 'master' of github.com:soedinglab/MMseqs2
RuoshiZhang Jul 6, 2021
5e9ee74
createRenumberedDB can also keep entry name
RuoshiZhang Oct 21, 2021
fcae85a
Fix seg fault when memorymapped handles files that end exactly at the…
RuoshiZhang Jan 11, 2022
edd3a76
Merge branch 'master' into gff
milot-mirdita Nov 16, 2022
777c203
Update regression to fix oversubscription error in mpi test
milot-mirdita Nov 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/commons/DBWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ void DBWriter::writeThreadBuffer(unsigned int idx, size_t dataSize) {
}
}

void DBWriter::createRenumberedDB(const std::string& dataFile, const std::string& indexFile, const std::string& origData, const std::string& origIndex, int sortMode) {
void DBWriter::createRenumberedDB(const std::string& dataFile, const std::string& indexFile, const std::string& origData, const std::string& origIndex, bool keepEntryName, int sortMode) {
DBReader<unsigned int>* lookupReader = NULL;
FILE *sLookup = NULL;
if (origData.empty() == false && origIndex.empty() == false) {
Expand Down Expand Up @@ -711,7 +711,9 @@ void DBWriter::createRenumberedDB(const std::string& dataFile, const std::string
size_t lookupId = lookupReader->getLookupIdByKey(idx->id);
DBReader<unsigned int>::LookupEntry copy = lookup[lookupId];
copy.id = i;
copy.entryName = SSTR(idx->id);
if(!keepEntryName){
copy.entryName = SSTR(idx->id);
}
lookupReader->lookupEntryToBuffer(strBuffer, copy);
written = fwrite(strBuffer.c_str(), sizeof(char), strBuffer.size(), sLookup);
if (written != (int) strBuffer.size()) {
Expand Down
2 changes: 1 addition & 1 deletion src/commons/DBWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class DBWriter : public MemoryTracker {
template <typename T>
static void writeIndexEntryToFile(FILE *outFile, char *buff1, T &index);

static void createRenumberedDB(const std::string& dataFile, const std::string& indexFile, const std::string& origData, const std::string& origIndex, int sortMode = DBReader<unsigned int>::SORT_BY_ID_OFFSET);
static void createRenumberedDB(const std::string& dataFile, const std::string& indexFile, const std::string& origData, const std::string& origIndex, bool keepEntryName = false, int sortMode = DBReader<unsigned int>::SORT_BY_ID_OFFSET);

bool isClosed(){
return closed;
Expand Down
6 changes: 4 additions & 2 deletions src/util/convertalignments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,9 @@ std::map<unsigned int, unsigned int> readKeyToSet(const std::string& file) {

MemoryMapped lookup(file, MemoryMapped::WholeFile, MemoryMapped::SequentialScan);
char* data = (char *) lookup.getData();
char* end = data + lookup.mappedSize();
const char* entry[255];
while (*data != '\0') {
while (data < end && *data != '\0') {
const size_t columns = Util::getWordsOfLine(data, entry, 255);
if (columns < 3) {
Debug(Debug::WARNING) << "Not enough columns in lookup file " << file << "\n";
Expand All @@ -125,8 +126,9 @@ std::map<unsigned int, std::string> readSetToSource(const std::string& file) {

MemoryMapped source(file, MemoryMapped::WholeFile, MemoryMapped::SequentialScan);
char* data = (char *) source.getData();
char* end = data + source.mappedSize();
const char* entry[255];
while (*data != '\0') {
while (data < end && *data != '\0') {
const size_t columns = Util::getWordsOfLine(data, entry, 255);
if (columns < 2) {
Debug(Debug::WARNING) << "Not enough columns in lookup file " << file << "\n";
Expand Down
7 changes: 4 additions & 3 deletions src/util/gff2db.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ int gff2db(int argc, const char **argv, const Command &command) {
headerWriter.open();
std::string outLookup = outDb + ".lookup";
std::string outLookupIndex = outDb + ".lookup.index";
DBWriter lookupWriter(outLookup.c_str(), outLookupIndex.c_str(), par.threads, 0, Parameters::DBTYPE_OMIT_FILE);
DBWriter lookupWriter(outLookup.c_str(), outLookupIndex.c_str(), par.threads, par.compressed, Parameters::DBTYPE_OMIT_FILE);
lookupWriter.open();

FILE *source = FileUtil::openAndDelete((outDb + ".source").c_str(), "w");
Expand Down Expand Up @@ -84,8 +84,9 @@ int gff2db(int argc, const char **argv, const Command &command) {
EXIT(EXIT_FAILURE);
}
char *data = (char *) file.getData();
char* end = data + file.mappedSize();
size_t idx = 0;
while (*data != '\0') {
while (data < end && *data != '\0') {
// line is a comment or empty
if (*data == '#' || *data == '\n') {
data = Util::skipLine(data);
Expand Down Expand Up @@ -198,7 +199,7 @@ int gff2db(int argc, const char **argv, const Command &command) {

#pragma omp task
{
DBWriter::createRenumberedDB(outDb, outDbIndex, outDb, outDbIndex);
DBWriter::createRenumberedDB(outDb, outDbIndex, outDb, outDbIndex, true);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion util/regression
Submodule regression updated 1 files
+1 −1 run_regression.sh