Skip to content

Tokenizer: provide TokenList in constructor #7468

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 67 additions & 58 deletions lib/cppcheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -715,8 +715,9 @@ unsigned int CppCheck::checkClang(const FileWithDetails &file)
}

try {
Tokenizer tokenizer(mSettings, mErrorLogger);
tokenizer.list.appendFileIfNew(file.spath());
TokenList tokenlist{&mSettings};
tokenlist.appendFileIfNew(file.spath());
Tokenizer tokenizer(std::move(tokenlist), mSettings, mErrorLogger);
std::istringstream ast(output2);
clangimport::parseClangAstDump(tokenizer, ast);
ValueFlow::setValues(tokenizer.list,
Expand Down Expand Up @@ -902,18 +903,17 @@ unsigned int CppCheck::checkFile(const FileWithDetails& file, const std::string

if (mUnusedFunctionsCheck && (mSettings.useSingleJob() || analyzerInformation)) {
std::size_t hash = 0;
// this is not a real source file - we just want to tokenize it. treat it as C anyways as the language needs to be determined.
Tokenizer tokenizer(mSettings, mErrorLogger);
TokenList tokenlist{&mSettings};
// enforce the language since markup files are special and do not adhere to the enforced language
tokenizer.list.setLang(Standards::Language::C, true);
tokenlist.setLang(Standards::Language::C, true);
if (fileStream) {
std::vector<std::string> files;
simplecpp::TokenList tokens(*fileStream, files, file.spath());
if (analyzerInformation) {
const Preprocessor preprocessor(mSettings, mErrorLogger);
hash = calculateHash(preprocessor, tokens, mSettings, mSuppressions);
}
tokenizer.list.createTokens(std::move(tokens));
tokenlist.createTokens(std::move(tokens));
}
else {
std::vector<std::string> files;
Expand All @@ -922,8 +922,10 @@ unsigned int CppCheck::checkFile(const FileWithDetails& file, const std::string
const Preprocessor preprocessor(mSettings, mErrorLogger);
hash = calculateHash(preprocessor, tokens, mSettings, mSuppressions);
}
tokenizer.list.createTokens(std::move(tokens));
tokenlist.createTokens(std::move(tokens));
}
// this is not a real source file - we just want to tokenize it. treat it as C anyways as the language needs to be determined.
Tokenizer tokenizer(std::move(tokenlist), mSettings, mErrorLogger);
mUnusedFunctionsCheck->parseTokens(tokenizer, mSettings);

if (analyzerInformation) {
Expand Down Expand Up @@ -1123,75 +1125,82 @@ unsigned int CppCheck::checkFile(const FileWithDetails& file, const std::string
continue;
}

Tokenizer tokenizer(mSettings, mErrorLogger);
if (mSettings.showtime != SHOWTIME_MODES::SHOWTIME_NONE)
tokenizer.setTimerResults(&s_timerResults);
tokenizer.setDirectives(directives); // TODO: how to avoid repeated copies?

try {
TokenList tokenlist{&mSettings};

// Create tokens, skip rest of iteration if failed
Timer::run("Tokenizer::createTokens", mSettings.showtime, &s_timerResults, [&]() {
simplecpp::TokenList tokensP = preprocessor.preprocess(tokens1, mCurrentConfig, files, true);
tokenizer.list.createTokens(std::move(tokensP));
tokenlist.createTokens(std::move(tokensP));
});
hasValidConfig = true;

// locations macros
mLogger->setLocationMacros(tokenizer.tokens(), files);
Tokenizer tokenizer(std::move(tokenlist), mSettings, mErrorLogger);
try {
if (mSettings.showtime != SHOWTIME_MODES::SHOWTIME_NONE)
tokenizer.setTimerResults(&s_timerResults);
tokenizer.setDirectives(directives); // TODO: how to avoid repeated copies?

// If only errors are printed, print filename after the check
if (!mSettings.quiet && (!mCurrentConfig.empty() || checkCount > 1)) {
std::string fixedpath = Path::toNativeSeparators(file.spath());
mErrorLogger.reportOut("Checking " + fixedpath + ": " + mCurrentConfig + "...", Color::FgGreen);
}
// locations macros
mLogger->setLocationMacros(tokenizer.tokens(), files);

if (!tokenizer.tokens())
continue;
// If only errors are printed, print filename after the check
if (!mSettings.quiet && (!mCurrentConfig.empty() || checkCount > 1)) {
std::string fixedpath = Path::toNativeSeparators(file.spath());
mErrorLogger.reportOut("Checking " + fixedpath + ": " + mCurrentConfig + "...", Color::FgGreen);
}

// skip rest of iteration if just checking configuration
if (mSettings.checkConfiguration)
continue;
if (!tokenizer.tokens())
continue;

// skip rest of iteration if just checking configuration
if (mSettings.checkConfiguration)
continue;

#ifdef HAVE_RULES
// Execute rules for "raw" code
executeRules("raw", tokenizer.list);
// Execute rules for "raw" code
executeRules("raw", tokenizer.list);
#endif

// Simplify tokens into normal form, skip rest of iteration if failed
if (!tokenizer.simplifyTokens1(mCurrentConfig))
continue;
// Simplify tokens into normal form, skip rest of iteration if failed
if (!tokenizer.simplifyTokens1(mCurrentConfig))
continue;

// dump xml if --dump
if ((mSettings.dump || !mSettings.addons.empty()) && fdump.is_open()) {
fdump << "<dump cfg=\"" << ErrorLogger::toxml(mCurrentConfig) << "\">" << std::endl;
fdump << " <standards>" << std::endl;
fdump << " <c version=\"" << mSettings.standards.getC() << "\"/>" << std::endl;
fdump << " <cpp version=\"" << mSettings.standards.getCPP() << "\"/>" << std::endl;
fdump << " </standards>" << std::endl;
fdump << getLibraryDumpData();
preprocessor.dump(fdump);
tokenizer.dump(fdump);
fdump << "</dump>" << std::endl;
}
// dump xml if --dump
if ((mSettings.dump || !mSettings.addons.empty()) && fdump.is_open()) {
fdump << "<dump cfg=\"" << ErrorLogger::toxml(mCurrentConfig) << "\">" << std::endl;
fdump << " <standards>" << std::endl;
fdump << " <c version=\"" << mSettings.standards.getC() << "\"/>" << std::endl;
fdump << " <cpp version=\"" << mSettings.standards.getCPP() << "\"/>" << std::endl;
fdump << " </standards>" << std::endl;
fdump << getLibraryDumpData();
preprocessor.dump(fdump);
tokenizer.dump(fdump);
fdump << "</dump>" << std::endl;
}

if (mSettings.inlineSuppressions) {
// Need to call this even if the hash will skip this configuration
mSuppressions.nomsg.markUnmatchedInlineSuppressionsAsChecked(tokenizer);
}
if (mSettings.inlineSuppressions) {
// Need to call this even if the hash will skip this configuration
mSuppressions.nomsg.markUnmatchedInlineSuppressionsAsChecked(tokenizer);
}

// Skip if we already met the same simplified token list
if (mSettings.force || mSettings.maxConfigs > 1) {
const std::size_t hash = tokenizer.list.calculateHash();
if (hashes.find(hash) != hashes.end()) {
if (mSettings.debugwarnings)
purgedConfigurationMessage(file.spath(), mCurrentConfig);
continue;
// Skip if we already met the same simplified token list
if (mSettings.force || mSettings.maxConfigs > 1) {
const std::size_t hash = tokenizer.list.calculateHash();
if (hashes.find(hash) != hashes.end()) {
if (mSettings.debugwarnings)
purgedConfigurationMessage(file.spath(), mCurrentConfig);
continue;
}
hashes.insert(hash);
}
hashes.insert(hash);
}

// Check normal tokens
checkNormalTokens(tokenizer, analyzerInformation.get());
// Check normal tokens
checkNormalTokens(tokenizer, analyzerInformation.get());
} catch (const InternalError &e) {
ErrorMessage errmsg = ErrorMessage::fromInternalError(e, &tokenizer.list, file.spath());
mErrorLogger.reportErr(errmsg);
}
Comment on lines +1200 to +1203
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The multiple layers of exception handling seem excessive. I think it is possible to clean this up a bit but that requires test coverage first.

} catch (const simplecpp::Output &o) {
// #error etc during preprocessing
configurationError.push_back((mCurrentConfig.empty() ? "\'\'" : mCurrentConfig) + " : [" + o.location.file() + ':' + std::to_string(o.location.line) + "] " + o.msg);
Expand Down Expand Up @@ -1221,7 +1230,7 @@ unsigned int CppCheck::checkFile(const FileWithDetails& file, const std::string
mLogger->setAnalyzerInfo(nullptr);
return mLogger->exitcode();
} catch (const InternalError &e) {
ErrorMessage errmsg = ErrorMessage::fromInternalError(e, &tokenizer.list, file.spath());
ErrorMessage errmsg = ErrorMessage::fromInternalError(e, nullptr, file.spath());
Comment on lines -1224 to +1233
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since TokenList might throw InternalError it seem strange that we pass that object into the exception handling. Also it was no longer available.

mErrorLogger.reportErr(errmsg);
}
}
Expand Down
7 changes: 4 additions & 3 deletions lib/tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ static bool isClassStructUnionEnumStart(const Token * tok)

//---------------------------------------------------------------------------

Tokenizer::Tokenizer(const Settings &settings, ErrorLogger &errorLogger) :
list(&settings),
Tokenizer::Tokenizer(TokenList tokenList, const Settings &settings, ErrorLogger &errorLogger) :
list(std::move(tokenList)),
mSettings(settings),
mErrorLogger(errorLogger),
mTemplateSimplifier(new TemplateSimplifier(*this))
Expand Down Expand Up @@ -10969,7 +10969,8 @@ bool Tokenizer::isPacked(const Token * bodyStart) const

void Tokenizer::getErrorMessages(ErrorLogger& errorLogger, const Settings& settings)
{
Tokenizer tokenizer(settings, errorLogger);
TokenList tokenlist{&settings};
Tokenizer tokenizer(std::move(tokenlist), settings, errorLogger);
tokenizer.invalidConstFunctionTypeError(nullptr);
// checkLibraryNoReturn
tokenizer.unhandled_macro_class_x_y(nullptr, "", "", "", "");
Expand Down
2 changes: 1 addition & 1 deletion lib/tokenize.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class CPPCHECKLIB Tokenizer {
friend class TestTokenizer;

public:
explicit Tokenizer(const Settings & settings, ErrorLogger &errorLogger);
Tokenizer(TokenList tokenList, const Settings & settings, ErrorLogger &errorLogger);
~Tokenizer();

void setTimerResults(TimerResults *tr) {
Expand Down
2 changes: 2 additions & 0 deletions lib/tokenlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ class CPPCHECKLIB TokenList {
TokenList(const TokenList &) = delete;
TokenList &operator=(const TokenList &) = delete;

TokenList(TokenList&& other) NOEXCEPT = default;

/** @return the source file path. e.g. "file.cpp" */
const std::string& getSourceFilePath() const;

Expand Down
10 changes: 5 additions & 5 deletions test/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,19 @@ namespace tinyxml2 {
class SimpleTokenizer : public Tokenizer {
public:
explicit SimpleTokenizer(ErrorLogger& errorlogger, bool cpp = true)
: Tokenizer{s_settings, errorlogger}
: Tokenizer{TokenList{&s_settings}, s_settings, errorlogger}
{
list.setLang(cpp ? Standards::Language::CPP : Standards::Language::C, true);
}

SimpleTokenizer(const Settings& settings, ErrorLogger& errorlogger, bool cpp = true)
: Tokenizer{settings, errorlogger}
: Tokenizer{TokenList{&settings}, settings, errorlogger}
{
list.setLang(cpp ? Standards::Language::CPP : Standards::Language::C, true);
}

SimpleTokenizer(const Settings& settings, ErrorLogger& errorlogger, const std::string& filename)
: Tokenizer{settings, errorlogger}
: Tokenizer{TokenList{&settings}, settings, errorlogger}
{
list.setLang(Path::identify(filename, false));
list.appendFileIfNew(filename);
Expand Down Expand Up @@ -238,14 +238,14 @@ class SimpleTokenizer2 : public Tokenizer {
public:
template<size_t size>
SimpleTokenizer2(const Settings &settings, ErrorLogger &errorlogger, const char (&code)[size], const std::string& file0)
: Tokenizer{settings, errorlogger}
: Tokenizer{TokenList{&settings}, settings, errorlogger}
{
preprocess(code, mFiles, file0, *this, errorlogger);
}

// TODO: get rid of this
SimpleTokenizer2(const Settings &settings, ErrorLogger &errorlogger, const char code[], const std::string& file0)
: Tokenizer{settings, errorlogger}
: Tokenizer{TokenList{&settings}, settings, errorlogger}
{
preprocess(code, mFiles, file0, *this, errorlogger);
}
Expand Down
7 changes: 5 additions & 2 deletions test/testclangimport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <list>
#include <sstream>
#include <string>
#include <utility>
#include <vector>


Expand Down Expand Up @@ -139,7 +140,8 @@ class TestClangImport : public TestFixture {

std::string parse(const char clang[]) {
const Settings settings = settingsBuilder().clang().build();
Tokenizer tokenizer(settings, *this);
TokenList tokenlist{&settings};
Tokenizer tokenizer(std::move(tokenlist), settings, *this);
std::istringstream istr(clang);
clangimport::parseClangAstDump(tokenizer, istr);
if (!tokenizer.tokens()) {
Expand Down Expand Up @@ -1059,7 +1061,8 @@ class TestClangImport : public TestFixture {

#define GET_SYMBOL_DB(AST) \
const Settings settings = settingsBuilder().clang().platform(Platform::Type::Unix64).build(); \
Tokenizer tokenizer(settings, *this); \
TokenList tokenlist{&settings}; \
Tokenizer tokenizer(std::move(tokenlist), settings, *this); \
{ \
std::istringstream istr(AST); \
clangimport::parseClangAstDump(tokenizer, istr); \
Expand Down
27 changes: 15 additions & 12 deletions test/testsimplifytemplate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <cstring>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

class TestSimplifyTemplate : public TestFixture {
Expand Down Expand Up @@ -5427,12 +5428,12 @@ class TestSimplifyTemplate : public TestFixture {
}

unsigned int templateParameters(const char code[]) {
Tokenizer tokenizer(settings, *this);

TokenList tokenlist{&settings};
std::istringstream istr(code);
tokenizer.list.appendFileIfNew("test.cpp");
if (!tokenizer.list.createTokens(istr, Path::identify("test.cpp", false)))
tokenlist.appendFileIfNew("test.cpp");
if (!tokenlist.createTokens(istr, Path::identify("test.cpp", false)))
return false;
Tokenizer tokenizer(std::move(tokenlist), settings, *this);
tokenizer.createLinks();
tokenizer.splitTemplateRightAngleBrackets(false);

Expand Down Expand Up @@ -5496,12 +5497,13 @@ class TestSimplifyTemplate : public TestFixture {

// Helper function to unit test TemplateSimplifier::getTemplateNamePosition
int templateNamePositionHelper(const char code[], unsigned offset = 0) {
Tokenizer tokenizer(settings, *this);
TokenList tokenlist{&settings};

std::istringstream istr(code);
tokenizer.list.appendFileIfNew("test.cpp");
if (!tokenizer.list.createTokens(istr, Path::identify("test.cpp", false)))
tokenlist.appendFileIfNew("test.cpp");
if (!tokenlist.createTokens(istr, Path::identify("test.cpp", false)))
return false;
Tokenizer tokenizer(std::move(tokenlist), settings, *this);
tokenizer.createLinks();
tokenizer.splitTemplateRightAngleBrackets(false);

Expand Down Expand Up @@ -5568,11 +5570,11 @@ class TestSimplifyTemplate : public TestFixture {

// Helper function to unit test TemplateSimplifier::findTemplateDeclarationEnd
bool findTemplateDeclarationEndHelper(const char code[], const char pattern[], unsigned offset = 0) {
Tokenizer tokenizer(settings, *this);

TokenList tokenlist{&settings};
std::istringstream istr(code);
if (!TokenListHelper::createTokens(tokenizer.list, istr, "test.cpp"))
if (!TokenListHelper::createTokens(tokenlist, istr, "test.cpp"))
return false;
Tokenizer tokenizer(std::move(tokenlist), settings, *this);
tokenizer.createLinks();
tokenizer.splitTemplateRightAngleBrackets(false);

Expand All @@ -5598,11 +5600,12 @@ class TestSimplifyTemplate : public TestFixture {

// Helper function to unit test TemplateSimplifier::getTemplateParametersInDeclaration
bool getTemplateParametersInDeclarationHelper(const char code[], const std::vector<std::string> & params) {
Tokenizer tokenizer(settings, *this);
TokenList tokenlist{&settings};

std::istringstream istr(code);
if (!TokenListHelper::createTokens(tokenizer.list, istr, "test.cpp"))
if (!TokenListHelper::createTokens(tokenlist, istr, "test.cpp"))
return false;
Tokenizer tokenizer(std::move(tokenlist), settings, *this);
tokenizer.createLinks();
tokenizer.splitTemplateRightAngleBrackets(false);

Expand Down
Loading
Loading