From fffa8441c970a7cd3cba5507c4cc536df2fc35cf Mon Sep 17 00:00:00 2001 From: Thijs Schreijer Date: Sun, 23 Dec 2018 14:39:26 +0100 Subject: [PATCH] lexer read ahead in file mode for multi-line patterns fixes #271 --- docs_topics/06-data.md | 3 +-- lua/pl/lexer.lua | 29 ++++++++++++++++++++++++----- tests/test-lexer.lua | 4 ++-- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/docs_topics/06-data.md b/docs_topics/06-data.md index 8c759d78..36a02579 100644 --- a/docs_topics/06-data.md +++ b/docs_topics/06-data.md @@ -664,8 +664,7 @@ A lexical scanner is useful where you have highly-structured data which is not nicely delimited by newlines. For example, here is a snippet of a in-house file format which it was my task to maintain: - points -(818344.1,-20389.7,-0.1),(818337.9,-20389.3,-0.1),(818332.5,-20387.8,-0.1) + points (818344.1,-20389.7,-0.1),(818337.9,-20389.3,-0.1),(818332.5,-20387.8,-0.1) ,(818327.4,-20388,-0.1),(818322,-20387.7,-0.1),(818316.3,-20388.6,-0.1) ,(818309.7,-20389.4,-0.1),(818303.5,-20390.6,-0.1),(818295.8,-20388.3,-0.1) ,(818290.5,-20386.9,-0.1),(818285.2,-20386.1,-0.1),(818279.3,-20383.6,-0.1) diff --git a/lua/pl/lexer.lua b/lua/pl/lexer.lua index 68a08749..23024c6d 100644 --- a/lua/pl/lexer.lua +++ b/lua/pl/lexer.lua @@ -100,7 +100,7 @@ local function wsdump (tok) return yield("space",tok) end -local function pdump (tok) +local function pdump(tok) return yield('prepro',tok) end @@ -214,8 +214,23 @@ function lexer.scan(s,matches,filter,options) for _,m in ipairs(matches) do local pat = m[1] local fun = m[2] + local pat_full = m[3] local findres = {strfind(s,pat,idx)} local i1, i2 = findres[1], findres[2] + if i1 and pat_full then + -- a multi-line pattern + findres = {strfind(s,pat_full,idx)} + i1, i2 = findres[1], findres[2] + while not i1 do -- read lines until we have a full pattern + if not next_line then break end + line_nr = line_nr + 1 + s = s .. next_line .. '\n' + next_line = file:read() + sz = #s + findres = {strfind(s,pat_full,idx)} + i1, i2 = findres[1], findres[2] + end + end if i1 then local tok = strsub(s,i1,i2) idx = i2 + 1 @@ -325,9 +340,12 @@ function lexer.lua(s,filter,options) {STRING1,sdump}, {STRING2,sdump}, {STRING3,sdump}, - {'^%-%-%[(=*)%[.-%]%1%]',cdump}, - {'^%-%-.-\n',cdump}, - {'^%[(=*)%[.-%]%1%]',sdump_l}, + {'^%-%-%[(=*)%[',cdump,'^%-%-%[(=*)%[.-%]%1%]'}, + --{'^%-%-%[(=*)%[.-%]%1%]',cdump}, + {'^%-%-[^%[].-\n',cdump}, + {'^%-%-\n',cdump}, + {'^%[(=*)%[',sdump_l, '^%[(=*)%[.-%]%1%]'}, + --{'^%[(=*)%[.-%]%1%]',sdump_l}, {'^==',tdump}, {'^~=',tdump}, {'^<=',tdump}, @@ -379,7 +397,8 @@ function lexer.cpp(s,filter,options) {STRING2,sdump}, {STRING3,sdump}, {'^//.-\n',cdump}, - {'^/%*.-%*/',cdump}, + {'^/%*',cdump,'^/%*.-%*/'}, + --{'^/%*.-%*/',cdump}, {'^==',tdump}, {'^!=',tdump}, {'^<=',tdump}, diff --git a/tests/test-lexer.lua b/tests/test-lexer.lua index 20a86a18..807e3f5e 100644 --- a/tests/test-lexer.lua +++ b/tests/test-lexer.lua @@ -14,9 +14,9 @@ local function test_scan(str, filter, options, expected_tokens, lang) end asserteq(copy2(lexer[lang](str, matches, filter, options)), expected_tokens) - if lang == 'scan' then + --if lang == 'scan' then asserteq(copy2(lexer[lang](open(str), matches, filter, options)), expected_tokens) - end + --end end local s = '20 = hello'