run the CCTest project on the file "cc_macro_expansion_stringize.cpp"
(note, you need to copy this source file, and rename to ccc_macro_expansion_stringize.cpp, so that CCTest only run the test for this single file)
Set two breakpoints here:
int Tokenizer::KMP_Find(const wxChar* text, const wxChar* pattern, const int patternLen)
{
if (!text || !pattern || pattern[0] == _T('\0') || text[0] == _T('\0'))
return -1;
if (patternLen > 1024)
{
if (patternLen < 5012)
TRACE(_T("KMP_Find() : %s - %s"), text, pattern);
else
{
TRACE(_T("KMP_Find: The plan buffer is too big, %d"), patternLen);
return -2;
}
}
int next[patternLen];
KMP_GetNextVal(pattern, next); //bp1
int index = 0, i = 0, j = 0; //bp2
This the the value before we call the function KMP_GetNextVal
> info locals
[debug]> info locals
[debug]next = {2283936, 0, 1875784749, 0}
[debug]index = 0
[debug]i = 14
[debug]j = 0
[debug]>>>>>>cb_gdb:
next = {2283936, 0, 1875784749, 0}
index = 0
i = 14
j = 0
And after the function call
> info locals
[debug]> info locals
[debug]next = <error reading variable next (Cannot access memory at address 0x1)>
[debug]index = 0
[debug]i = 14
[debug]j = 0
[debug]>>>>>>cb_gdb:
next = <error reading variable next (Cannot access memory at address 0x1)>
index = 0
i = 14
j = 0
This means this function has some errors.
Note, the "next" array is the lps array stated in
https://www.geeksforgeeks.org/kmp-algorithm-for-pattern-searching/or
KMP Algorithm | Searching for Patterns | GeeksforGeeks - YouTubeEDIT1:For a pattern "text", when step into the function:
void Tokenizer::KMP_GetNextVal(const wxChar* pattern, int next[])
{
int j = 0, k = -1;
next[0] = -1;
while (pattern[j] != _T('\0'))
{
if (k == -1 || pattern[j] == pattern[k])
{
++j;
++k;
if (pattern[j] != pattern[k])
next[j] = k; // error
else
next[j] = next[k];
}
else
k = next[k];
}
}
I do see that the line "//error", has j=4, which means next[j] is beyond the next (since next array only have four elements).
But I still need some time to see how the KMP algorithm works.
EDIT2:To simplify the issue, you only need to debug this function:
int Tokenizer::GetFirstTokenPosition(const wxChar* buffer, const size_t bufferLen,
const wxChar* key, const size_t keyLen)
{
int pos = -1;
wxChar* p = const_cast<wxChar*>(buffer);
const wxChar* endBuffer = buffer + bufferLen;
for (;;)
{
const int ret = KMP_Find(p, key, keyLen);
if (ret == -1)
break;
// check previous char
p += ret;
if (p > buffer)
{
const wxChar ch = *(p - 1);
if (ch == _T('_') || wxIsalnum(ch))
{
p += keyLen;
continue;
}
}
// check next char
p += keyLen;
if (p < endBuffer)
{
const wxChar ch = *p;
if (ch == _T('_') || wxIsalnum(ch))
continue;
}
// got it
pos = p - buffer - keyLen;
break;
}
return pos;
}
Where, the arguments are:
[debug]> info args
[debug]this = 0x41d4200
[debug]buffer = 0x41d7bf0 L"text ## line"
[debug]bufferLen = 12
[debug]key = 0x41d7868 L"text"
[debug]keyLen = 4
[debug]>>>>>>cb_gdb:
this = 0x41d4200
buffer = 0x41d7bf0 L"text ## line"
bufferLen = 12
key = 0x41d7868 L"text"
keyLen = 4