Skip to content

Commit

Permalink
lexer read ahead in file mode for multi-line patterns
Browse files Browse the repository at this point in the history
fixes #271
  • Loading branch information
Tieske committed Dec 23, 2018
1 parent ee381b7 commit fffa844
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 9 deletions.
3 changes: 1 addition & 2 deletions docs_topics/06-data.md
Original file line number Diff line number Diff line change
Expand Up @@ -664,8 +664,7 @@ A lexical scanner is useful where you have highly-structured data which is not
nicely delimited by newlines. For example, here is a snippet of a in-house file
format which it was my task to maintain:

points
(818344.1,-20389.7,-0.1),(818337.9,-20389.3,-0.1),(818332.5,-20387.8,-0.1)
points (818344.1,-20389.7,-0.1),(818337.9,-20389.3,-0.1),(818332.5,-20387.8,-0.1)
,(818327.4,-20388,-0.1),(818322,-20387.7,-0.1),(818316.3,-20388.6,-0.1)
,(818309.7,-20389.4,-0.1),(818303.5,-20390.6,-0.1),(818295.8,-20388.3,-0.1)
,(818290.5,-20386.9,-0.1),(818285.2,-20386.1,-0.1),(818279.3,-20383.6,-0.1)
Expand Down
29 changes: 24 additions & 5 deletions lua/pl/lexer.lua
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ local function wsdump (tok)
return yield("space",tok)
end

local function pdump (tok)
local function pdump(tok)
return yield('prepro',tok)
end

Expand Down Expand Up @@ -214,8 +214,23 @@ function lexer.scan(s,matches,filter,options)
for _,m in ipairs(matches) do
local pat = m[1]
local fun = m[2]
local pat_full = m[3]
local findres = {strfind(s,pat,idx)}
local i1, i2 = findres[1], findres[2]
if i1 and pat_full then
-- a multi-line pattern
findres = {strfind(s,pat_full,idx)}
i1, i2 = findres[1], findres[2]
while not i1 do -- read lines until we have a full pattern
if not next_line then break end
line_nr = line_nr + 1
s = s .. next_line .. '\n'
next_line = file:read()
sz = #s
findres = {strfind(s,pat_full,idx)}
i1, i2 = findres[1], findres[2]
end
end
if i1 then
local tok = strsub(s,i1,i2)
idx = i2 + 1
Expand Down Expand Up @@ -325,9 +340,12 @@ function lexer.lua(s,filter,options)
{STRING1,sdump},
{STRING2,sdump},
{STRING3,sdump},
{'^%-%-%[(=*)%[.-%]%1%]',cdump},
{'^%-%-.-\n',cdump},
{'^%[(=*)%[.-%]%1%]',sdump_l},
{'^%-%-%[(=*)%[',cdump,'^%-%-%[(=*)%[.-%]%1%]'},
--{'^%-%-%[(=*)%[.-%]%1%]',cdump},
{'^%-%-[^%[].-\n',cdump},
{'^%-%-\n',cdump},
{'^%[(=*)%[',sdump_l, '^%[(=*)%[.-%]%1%]'},
--{'^%[(=*)%[.-%]%1%]',sdump_l},
{'^==',tdump},
{'^~=',tdump},
{'^<=',tdump},
Expand Down Expand Up @@ -379,7 +397,8 @@ function lexer.cpp(s,filter,options)
{STRING2,sdump},
{STRING3,sdump},
{'^//.-\n',cdump},
{'^/%*.-%*/',cdump},
{'^/%*',cdump,'^/%*.-%*/'},
--{'^/%*.-%*/',cdump},
{'^==',tdump},
{'^!=',tdump},
{'^<=',tdump},
Expand Down
4 changes: 2 additions & 2 deletions tests/test-lexer.lua
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ local function test_scan(str, filter, options, expected_tokens, lang)
end

asserteq(copy2(lexer[lang](str, matches, filter, options)), expected_tokens)
if lang == 'scan' then
--if lang == 'scan' then
asserteq(copy2(lexer[lang](open(str), matches, filter, options)), expected_tokens)
end
--end
end

local s = '20 = hello'
Expand Down

0 comments on commit fffa844

Please sign in to comment.