3
3
#include " directoryscanner.h"
4
4
5
5
#include " qtcore_helpers/qstring_helpers.hpp"
6
+ #include " file.hpp"
6
7
7
8
#include " hash/jenkins_hash.hpp"
8
9
#include " threading/thread_helpers.h"
10
+ #include " utility_functions/memory_functions.h"
9
11
10
12
DISABLE_COMPILER_WARNINGS
11
13
#include < QDebug>
12
14
#include < QRegularExpression>
13
15
#include < QTextStream>
14
16
RESTORE_COMPILER_WARNINGS
15
17
16
- static QString queryToRegex (const QString& query)
18
+ [[nodiscard]] static QString queryToRegex (const QString& query, bool startToEnd )
17
19
{
18
20
// Escape the dots
19
21
QString regExString = QString{ query }.replace (' .' , QLatin1StringView{ " \\ ." });
@@ -22,18 +24,102 @@ static QString queryToRegex(const QString& query)
22
24
if (nameQueryHasWildcards)
23
25
{
24
26
regExString.replace (' ?' , ' .' ).replace (' *' , " .*" );
25
- regExString.prepend (" \\ A" ).append (" \\ z" );
27
+ // regExString.prepend("\\A").append("\\z");
26
28
}
27
29
28
- if (!regExString.startsWith (' ^' ))
29
- regExString.prepend (' ^' );
30
+ if (startToEnd)
31
+ {
32
+ if (!regExString.startsWith (' ^' ))
33
+ regExString.prepend (' ^' );
30
34
31
- if (!regExString.endsWith (' $' ))
32
- regExString.append (' $' );
35
+ if (!regExString.endsWith (' $' ))
36
+ regExString.append (' $' );
37
+ }
33
38
34
39
return regExString;
35
40
}
36
41
42
+ #ifndef __ARM_ARCH_ISA_A64
43
+
44
+ #include < smmintrin.h> // SSE4.1
45
+
46
+ inline void replace_byte (uint8_t * array, size_t size) noexcept
47
+ {
48
+ const __m128i old_sse = _mm_set1_epi8 (0 );
49
+ const __m128i new_sse = _mm_set1_epi8 (' ' );
50
+
51
+ for (size_t i = 0 ; i < size; i += 16 )
52
+ {
53
+ __m128i data = _mm_loadu_si128 (reinterpret_cast <__m128i*>(array + i)); // Load 16 bytes
54
+ __m128i mask = _mm_cmpeq_epi8 (data, old_sse); // Compare with old_value
55
+ __m128i result = _mm_blendv_epi8 (data, new_sse, mask); // Blend new_value where mask is true
56
+ _mm_storeu_si128 (reinterpret_cast <__m128i*>(array + i), result); // Store the result back
57
+ }
58
+ }
59
+
60
+ #else // ARM64
61
+
62
+ #include < arm_neon.h>
63
+
64
+ inline void replace_byte (uint8_t * array, size_t size)
65
+ {
66
+ uint8x16_t old_neon = vdupq_n_u8 (0 ); // Duplicate old_value across all 16 bytes in the vector
67
+ uint8x16_t new_neon = vdupq_n_u8 (' ' ); // Duplicate new_value across all 16 bytes in the vector
68
+
69
+ for (size_t i = 0 ; i < size; i += 16 )
70
+ {
71
+ uint8x16_t data = vld1q_u8 (&array[i]); // Load 16 bytes
72
+ uint8x16_t mask = vceqq_u8 (data, old_neon); // Compare with old_value
73
+ uint8x16_t result = vbslq_u8 (mask, new_neon, data); // Select new_value where mask is true, else original value
74
+ vst1q_u8 (&array[i], result); // Store the result back
75
+ }
76
+ }
77
+
78
+ #endif
79
+
80
+ [[nodiscard]] static bool fileContentsMatches (const QString& path, const QRegularExpression& regex)
81
+ {
82
+ thin_io::file file;
83
+ if (!file.open (path.toUtf8 ().constData (), thin_io::file::open_mode::Read)) [[unlikely]]
84
+ return false ;
85
+
86
+ const auto fileSize = file.size ().value_or (0 );
87
+ if (fileSize == 0 ) [[unlikely]]
88
+ return false ;
89
+
90
+ static constexpr auto toBytePtr = [](const void * ptr) -> const std::byte* {
91
+ return reinterpret_cast <const std::byte*>(ptr);
92
+ };
93
+
94
+ auto * mappedFile = toBytePtr (file.mmap (thin_io::file::mmap_access_mode::ReadOnly, 0 , fileSize));
95
+ if (!mappedFile) [[unlikely]]
96
+ {
97
+ assert_debug_only (mappedFile);
98
+ return false ;
99
+ }
100
+
101
+
102
+ static constexpr size_t maxLineLength = 8 * 1024 ;
103
+ char buffer[maxLineLength];
104
+
105
+ for (size_t offset = 0 ; offset < fileSize; )
106
+ {
107
+ const auto maxSearchLength = std::min (fileSize - offset, maxLineLength);
108
+ const auto lineStart = mappedFile + offset;
109
+ offset += maxSearchLength;
110
+
111
+ ::memcpy (buffer, lineStart, maxSearchLength);
112
+ replace_byte ((uint8_t *)buffer, (maxSearchLength + 15 ) / 16 );
113
+
114
+ QString line = QString::fromUtf8 (buffer, maxSearchLength);
115
+ assert (!line.isEmpty ());
116
+ if (regex.match (line).hasMatch ())
117
+ return true ;
118
+ }
119
+
120
+ return false ;
121
+ }
122
+
37
123
CFileSearchEngine::CFileSearchEngine (CController& controller) :
38
124
_controller(controller),
39
125
_workerThread(" File search thread" )
@@ -92,7 +178,7 @@ void CFileSearchEngine::searchThread(const QString& what, bool subjectCaseSensit
92
178
QRegularExpression queryRegExp;
93
179
if (!noFileNameFilter)
94
180
{
95
- queryRegExp.setPattern (queryToRegex (what));
181
+ queryRegExp.setPattern (queryToRegex (what, true ));
96
182
assert_r (queryRegExp.isValid ());
97
183
98
184
if (!subjectCaseSensitive)
@@ -104,34 +190,29 @@ void CFileSearchEngine::searchThread(const QString& what, bool subjectCaseSensit
104
190
QRegularExpression fileContentsRegExp;
105
191
if (searchByContents)
106
192
{
107
- if (contentsToFind.contains (QRegularExpression (QSL (" [*?]" ))))
108
- {
109
- fileContentsRegExp.setPattern (QRegularExpression::wildcardToRegularExpression (contentsToFind));
110
- assert_r (fileContentsRegExp.isValid ());
111
- }
112
- else if (contentsWholeWords)
113
- {
114
- fileContentsRegExp.setPattern (" \\ b" + contentsToFind + " \\ b" );
115
- assert_r (fileContentsRegExp.isValid ());
116
- }
193
+ QString pattern = queryToRegex (contentsToFind, false );
194
+ if (contentsWholeWords)
195
+ pattern.prepend (" \\ b" ).append (" \\ b" );
196
+
197
+ fileContentsRegExp.setPattern (pattern);
117
198
118
199
if (!contentsCaseSensitive)
119
200
fileContentsRegExp.setPatternOptions (QRegularExpression::CaseInsensitiveOption);
120
- }
121
201
122
- const bool useFileContentsRegExp = !fileContentsRegExp.pattern ().isEmpty ();
202
+ assert_r (fileContentsRegExp.isValid ());
203
+ }
123
204
124
205
const int uniqueJobTag = static_cast <int >(jenkins_hash (" CFileSearchEngine" )) + rand ();
125
206
126
207
QString line;
127
208
209
+ const QByteArray contentsUtf8 = contentsToFind.toUtf8 ();
210
+
128
211
for (const QString& pathToLookIn : where)
129
212
{
130
213
scanDirectory (CFileSystemObject (pathToLookIn),
131
214
[&](const CFileSystemObject& item) {
132
215
133
- ++itemCounter;
134
-
135
216
if (itemCounter % 8192 == 0 )
136
217
{
137
218
// No need to report every single item and waste CPU cycles
@@ -141,6 +222,8 @@ void CFileSearchEngine::searchThread(const QString& what, bool subjectCaseSensit
141
222
}, uniqueJobTag);
142
223
}
143
224
225
+ ++itemCounter;
226
+
144
227
if (searchByContents && !item.isFile ())
145
228
return ;
146
229
@@ -152,21 +235,7 @@ void CFileSearchEngine::searchThread(const QString& what, bool subjectCaseSensit
152
235
bool matchFound = false ;
153
236
154
237
if (searchByContents)
155
- {
156
- QFile file{ item.fullAbsolutePath () };
157
- if (!file.open (QFile::ReadOnly))
158
- return ;
159
-
160
- const auto contentsCaseSensitivity = contentsCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive;
161
-
162
- QTextStream stream{ &file };
163
-
164
- while (!matchFound && !_workerThread.terminationFlag () && stream.readLineInto (&line))
165
- {
166
- // contains() is faster than RegEx match (as of Qt 5.4.2, but this was for QRegExp, not tested with QRegularExpression)
167
- matchFound = useFileContentsRegExp ? fileContentsRegExp.match (line).hasMatch () : line.contains (contentsToFind, contentsCaseSensitivity);
168
- }
169
- }
238
+ matchFound = fileContentsMatches (item.fullAbsolutePath (), fileContentsRegExp);
170
239
else
171
240
matchFound = nameMatches;
172
241
0 commit comments