338 lines
13 KiB
Lua
338 lines
13 KiB
Lua
|
--[[
|
||
|
lua_parser_loose.lua.
|
||
|
Loose parsing of Lua code. See README.
|
||
|
(c) 2013 David Manura. MIT License.
|
||
|
--]]
|
||
|
|
||
|
local PARSE = {}
|
||
|
|
||
|
local unpack = table.unpack or unpack
|
||
|
local LEX = require 'lua_lexer_loose'
|
||
|
|
||
|
--[[
|
||
|
Loose parser.
|
||
|
|
||
|
lx - lexer stream of Lua tokens.
|
||
|
f(event...) - callback function to send events to.
|
||
|
|
||
|
Events generated:
|
||
|
'Var', name, lineinfo - variable declaration that immediately comes into scope.
|
||
|
'VarSelf', name, lineinfo - same as 'Var' but for implicit 'self' parameter
|
||
|
in method definitions. lineinfo is zero-width space after '('
|
||
|
'VarNext', name, lineinfo - variable definition that comes into scope
|
||
|
upon next statement.
|
||
|
'VarInside', name, lineinfo - variable definition that comes into scope
|
||
|
inside following block. Used for control variables in 'for' statements.
|
||
|
'Id', name, lineinfo - reference to variable.
|
||
|
'String', name - string or table field.
|
||
|
'Scope', opt - beginning of scope block.
|
||
|
'EndScope', nil, lineinfo - end of scope block.
|
||
|
'FunctionCall', name, lineinfo - function call (in addition to other events).
|
||
|
'Function', name, lineinfo - function definition.
|
||
|
--]]
|
||
|
function PARSE.parse_scope(lx, f, level)
|
||
|
local cprev = {tag='Eof'}
|
||
|
|
||
|
-- stack of scopes.
|
||
|
local scopes = {{}}
|
||
|
for l = 2, (level or 1) do scopes[l] = {} end
|
||
|
|
||
|
local function scope_begin(opt, lineinfo, nobreak)
|
||
|
scopes[#scopes+1] = {}
|
||
|
f('Scope', opt, lineinfo, nobreak)
|
||
|
end
|
||
|
local function scope_end(opt, lineinfo)
|
||
|
local scope = #scopes
|
||
|
if scope > 1 then table.remove(scopes) end
|
||
|
local inside_local = false
|
||
|
for scope = scope-1, 1, -1 do
|
||
|
if scopes[scope].inside_local then inside_local = true; break end
|
||
|
end
|
||
|
f('EndScope', opt, lineinfo, inside_local)
|
||
|
end
|
||
|
|
||
|
local function parse_function_list(has_self, name, pos)
|
||
|
local c = lx:next(); assert(c[1] == '(')
|
||
|
f('Statement', c[1], c.lineinfo, true) -- generate Statement for function definition
|
||
|
scope_begin(c[1], c.lineinfo, true)
|
||
|
|
||
|
local vars = {} -- accumulate vars (if any) to send after 'Function'
|
||
|
if has_self then
|
||
|
local lineinfo = c.lineinfo+1 -- zero size
|
||
|
table.insert(vars, {'VarSelf', 'self', lineinfo, true})
|
||
|
end
|
||
|
while true do
|
||
|
local n = lx:peek()
|
||
|
if not (n.tag == 'Id' or n.tag == 'Keyword' and n[1] == '...') then break end
|
||
|
local c = lx:next()
|
||
|
if c.tag == 'Id' then table.insert(vars, {'Var', c[1], c.lineinfo, true}) end
|
||
|
-- ignore '...' in this case
|
||
|
if lx:peek()[1] == ',' then lx:next() end
|
||
|
end
|
||
|
if lx:peek()[1] == ')' then
|
||
|
lx:next()
|
||
|
f('Function', name, pos or c.lineinfo, true)
|
||
|
end
|
||
|
for _, var in ipairs(vars) do f(unpack(var)) end
|
||
|
end
|
||
|
|
||
|
while true do
|
||
|
local c = lx:next()
|
||
|
|
||
|
-- Detect end of previous statement
|
||
|
if c.tag == 'Eof' -- trigger 'Statement' at the end of file
|
||
|
or c.tag == 'Keyword' and (
|
||
|
c[1] == 'break' or c[1] == 'goto' or c[1] == 'do' or c[1] == 'while' or
|
||
|
c[1] == 'repeat' or c[1] == 'if' or c[1] == 'for' or c[1] == 'function' and lx:peek().tag == 'Id' or
|
||
|
c[1] == 'local' or c[1] == ';' or c[1] == 'until' or c[1] == 'return' or c[1] == 'end') or
|
||
|
c.tag == 'Id' and
|
||
|
(cprev.tag == 'Id' or
|
||
|
cprev.tag == 'Keyword' and
|
||
|
(cprev[1] == ']' or cprev[1] == ')' or cprev[1] == '}' or
|
||
|
cprev[1] == '...' or cprev[1] == 'end' or
|
||
|
cprev[1] == 'true' or cprev[1] == 'false' or
|
||
|
cprev[1] == 'nil') or
|
||
|
cprev.tag == 'Number' or cprev.tag == 'String')
|
||
|
then
|
||
|
if scopes[#scopes].inside_until then scope_end(nil, c.lineinfo) end
|
||
|
local scope = #scopes
|
||
|
if not scopes[scope].inside_table then scopes[scope].inside_local = nil end
|
||
|
f('Statement', c[1], c.lineinfo,
|
||
|
scopes[scope].inside_local or c[1] == 'local' or c[1] == 'function' or c[1] == 'end')
|
||
|
end
|
||
|
|
||
|
if c.tag == 'Eof' then break end
|
||
|
|
||
|
-- Process token(s)
|
||
|
if c.tag == 'Keyword' then
|
||
|
|
||
|
if c[1] == 'local' and lx:peek().tag == 'Keyword' and lx:peek()[1] == 'function' then
|
||
|
-- local function
|
||
|
local c = lx:next(); assert(c[1] == 'function')
|
||
|
if lx:peek().tag == 'Id' then
|
||
|
c = lx:next()
|
||
|
f('Var', c[1], c.lineinfo, true)
|
||
|
if lx:peek()[1] == '(' then parse_function_list(nil, c[1], c.lineinfo) end
|
||
|
end
|
||
|
elseif c[1] == 'function' then
|
||
|
if lx:peek()[1] == '(' then -- inline function
|
||
|
parse_function_list()
|
||
|
elseif lx:peek().tag == 'Id' then -- function definition statement
|
||
|
c = lx:next(); assert(c.tag == 'Id')
|
||
|
local name = c[1]
|
||
|
local pos = c.lineinfo
|
||
|
f('Id', name, pos, true)
|
||
|
local has_self
|
||
|
while lx:peek()[1] ~= '(' and lx:peek().tag ~= 'Eof' do
|
||
|
c = lx:next()
|
||
|
name = name .. c[1]
|
||
|
if c.tag == 'Id' then
|
||
|
f('String', c[1], c.lineinfo, true)
|
||
|
elseif c.tag == 'Keyword' and c[1] == ':' then
|
||
|
has_self = true
|
||
|
end
|
||
|
end
|
||
|
if lx:peek()[1] == '(' then parse_function_list(has_self, name, pos) end
|
||
|
end
|
||
|
elseif c[1] == 'local' and lx:peek().tag == 'Id' then
|
||
|
scopes[#scopes].inside_local = true
|
||
|
c = lx:next()
|
||
|
f('VarNext', c[1], c.lineinfo, true)
|
||
|
while lx:peek().tag == 'Keyword' and lx:peek()[1] == ',' do
|
||
|
c = lx:next(); if lx:peek().tag ~= 'Id' then break end
|
||
|
c = lx:next()
|
||
|
f('VarNext', c[1], c.lineinfo, true)
|
||
|
end
|
||
|
elseif c[1] == 'for' and lx:peek().tag == 'Id' then
|
||
|
c = lx:next()
|
||
|
f('VarInside', c[1], c.lineinfo, true)
|
||
|
while lx:peek().tag == 'Keyword' and lx:peek()[1] == ',' do
|
||
|
c = lx:next(); if lx:peek().tag ~= 'Id' then break end
|
||
|
c = lx:next()
|
||
|
f('VarInside', c[1], c.lineinfo, true)
|
||
|
end
|
||
|
elseif c[1] == 'goto' and lx:peek().tag == 'Id' then
|
||
|
lx:next()
|
||
|
elseif c[1] == 'do' then
|
||
|
scope_begin('do', c.lineinfo)
|
||
|
-- note: do/while/for statement scopes all begin at 'do'.
|
||
|
elseif c[1] == 'repeat' or c[1] == 'then' then
|
||
|
scope_begin(c[1], c.lineinfo)
|
||
|
elseif c[1] == 'end' or c[1] == 'elseif' then
|
||
|
scope_end(c[1], c.lineinfo)
|
||
|
elseif c[1] == 'else' then
|
||
|
scope_end(nil, c.lineinfo)
|
||
|
scope_begin(c[1], c.lineinfo)
|
||
|
elseif c[1] == 'until' then
|
||
|
scopes[#scopes].inside_until = true
|
||
|
elseif c[1] == '{' then
|
||
|
scopes[#scopes].inside_table = (scopes[#scopes].inside_table or 0) + 1
|
||
|
elseif c[1] == '}' then
|
||
|
local newval = (scopes[#scopes].inside_table or 0) - 1
|
||
|
newval = newval >= 1 and newval or nil
|
||
|
scopes[#scopes].inside_table = newval
|
||
|
end
|
||
|
elseif c.tag == 'Id' then
|
||
|
local scope = #scopes
|
||
|
local inside_local = scopes[scope].inside_local ~= nil
|
||
|
local inside_table = scopes[scope].inside_table
|
||
|
local cnext = lx:peek()
|
||
|
if cnext.tag == 'Keyword' and (cnext[1] == '(' or cnext[1] == '{')
|
||
|
or cnext.tag == 'String' then
|
||
|
f('FunctionCall', c[1], c.lineinfo, inside_local)
|
||
|
end
|
||
|
-- either this is inside a table or it continues from a comma,
|
||
|
-- which may be a field assignment, so assume it's in a table
|
||
|
if (inside_table or cprev[1] == ',') and cnext.tag == 'Keyword' and cnext[1] == '=' then
|
||
|
-- table field; table fields are tricky to handle during incremental
|
||
|
-- processing as "a = 1" may be either an assignment (in which case
|
||
|
-- 'a' is Id) or a field initialization (in which case it's a String).
|
||
|
-- Since it's not possible to decide between two cases in isolation,
|
||
|
-- this is not a good place to insert a break; instead, the break is
|
||
|
-- inserted at the location of the previous keyword, which allows
|
||
|
-- to properly handle those cases. The desired location of
|
||
|
-- the restart point is returned as the `nobreak` value.
|
||
|
f('String', c[1], c.lineinfo,
|
||
|
inside_local or cprev and cprev.tag == 'Keyword' and cprev.lineinfo)
|
||
|
elseif cprev.tag == 'Keyword' and (cprev[1] == ':' or cprev[1] == '.') then
|
||
|
f('String', c[1], c.lineinfo, true)
|
||
|
else
|
||
|
f('Id', c[1], c.lineinfo, true)
|
||
|
-- this looks like the left side of (multi-variable) assignment
|
||
|
-- unless it's a part of `= var, field = value`, so skip if inside a table
|
||
|
if not inside_table and not (cprev and cprev.tag == 'Keyword' and cprev[1] == '=') then
|
||
|
while lx:peek().tag == 'Keyword' and lx:peek()[1] == ',' do
|
||
|
local c = lx:next(); if lx:peek().tag ~= 'Id' then break end
|
||
|
c = lx:next()
|
||
|
f('Id', c[1], c.lineinfo, true)
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
if c.tag ~= 'Comment' then cprev = c end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
--[[
|
||
|
This is similar to parse_scope but determines if variables are local or global.
|
||
|
|
||
|
lx - lexer stream of Lua tokens.
|
||
|
f(event...) - callback function to send events to.
|
||
|
|
||
|
Events generated:
|
||
|
'Id', name, lineinfo, 'local'|'global'
|
||
|
(plus all events in parse_scope)
|
||
|
--]]
|
||
|
function PARSE.parse_scope_resolve(lx, f, vars)
|
||
|
local NEXT = {} -- unique key
|
||
|
local INSIDE = {} -- unique key
|
||
|
local function newscope(vars, opt, lineinfo)
|
||
|
local newvars = opt=='do' and vars[INSIDE] or {}
|
||
|
if newvars == vars[INSIDE] then vars[INSIDE] = false end
|
||
|
newvars[INSIDE]=false
|
||
|
newvars[NEXT]=false
|
||
|
local level = (vars[0] or 0) + 1
|
||
|
newvars[0] = level -- keep the current level
|
||
|
newvars[-1] = lineinfo -- keep the start of the scope
|
||
|
newvars[level] = newvars -- reference the current vars table
|
||
|
return setmetatable(newvars, {__index=vars})
|
||
|
end
|
||
|
|
||
|
vars = vars or newscope({[0] = 0}, nil, 1)
|
||
|
vars[NEXT] = false -- vars that come into scope upon next statement
|
||
|
vars[INSIDE] = false -- vars that come into scope upon entering block
|
||
|
PARSE.parse_scope(lx, function(op, name, lineinfo, nobreak)
|
||
|
-- in some (rare) cases VarNext can follow Statement event (which copies
|
||
|
-- vars[NEXT]). This may cause vars[0] to be `nil`, so default to 1.
|
||
|
local var = op:find("^Var") and
|
||
|
{fpos = lineinfo, at = (vars[0] or 1) + (op == 'VarInside' and 1 or 0),
|
||
|
masked = vars[name], self = (op == 'VarSelf') or nil } or nil
|
||
|
if op == 'Var' or op == 'VarSelf' then
|
||
|
vars[name] = var
|
||
|
elseif op == 'VarNext' then
|
||
|
vars[NEXT] = vars[NEXT] or {}
|
||
|
vars[NEXT][name] = var
|
||
|
elseif op == 'VarInside' then
|
||
|
vars[INSIDE] = vars[INSIDE] or {}
|
||
|
vars[INSIDE][name] = var
|
||
|
elseif op == 'Scope' then
|
||
|
vars = newscope(vars, name, lineinfo)
|
||
|
elseif op == 'EndScope' then
|
||
|
local mt = getmetatable(vars)
|
||
|
if mt ~= nil then vars = mt.__index end
|
||
|
elseif op == 'Id'
|
||
|
or op == 'String' or op == 'FunctionCall' or op == 'Function' then
|
||
|
-- Just make callback
|
||
|
elseif op == 'Statement' then -- beginning of statement
|
||
|
-- Apply vars that come into scope upon beginning of statement.
|
||
|
if vars[NEXT] then
|
||
|
for k,v in pairs(vars[NEXT]) do
|
||
|
vars[k] = v; vars[NEXT][k] = nil
|
||
|
end
|
||
|
end
|
||
|
else
|
||
|
assert(false)
|
||
|
end
|
||
|
f(op, name, lineinfo, vars, nobreak)
|
||
|
end, vars[0])
|
||
|
end
|
||
|
|
||
|
function PARSE.extract_vars(code, f)
|
||
|
local lx = LEX.lexc(code)
|
||
|
|
||
|
local char0 = 1 -- next char offset to write
|
||
|
local function gen(char1, nextchar0)
|
||
|
char0 = nextchar0
|
||
|
end
|
||
|
|
||
|
PARSE.parse_scope_resolve(lx, function(op, name, lineinfo, other)
|
||
|
if op == 'Id' then
|
||
|
f('Id', name, other, lineinfo)
|
||
|
elseif op == 'Var' or op == 'VarNext' or op == 'VarInside' then
|
||
|
gen(lineinfo, lineinfo+#name)
|
||
|
f('Var', name, "local", lineinfo)
|
||
|
end -- ignore 'VarSelf' and others
|
||
|
end)
|
||
|
gen(#code+1, nil)
|
||
|
end
|
||
|
|
||
|
--[[
|
||
|
Converts 5.2 code to 5.1 style code with explicit _ENV variables.
|
||
|
Example: "function f(_ENV, x) print(x, y)" -->
|
||
|
"function _ENV.f(_ENV, x) _ENV.print(x, _ENV.y) end"
|
||
|
|
||
|
code - string of Lua code. Assumed to be valid Lua (FIX: 5.1 or 5.2?)
|
||
|
f(s) - call back function to send chunks of Lua code output to. Example: io.stdout.
|
||
|
--]]
|
||
|
function PARSE.replace_env(code, f)
|
||
|
if not f then return PARSE.accumulate(PARSE.replace_env, code) end
|
||
|
PARSE.extract_vars(code, function(op, name, other)
|
||
|
if op == 'Id' then
|
||
|
f(other == 'global' and '_ENV.' .. name or name)
|
||
|
elseif op == 'Var' or op == 'Other' then
|
||
|
f(name)
|
||
|
end
|
||
|
end)
|
||
|
end
|
||
|
|
||
|
-- helper function. Can be passed as argument `f` to functions
|
||
|
-- like `replace_env` above to accumulate fragments into a single string.
|
||
|
function PARSE.accumulator()
|
||
|
local ts = {}
|
||
|
local mt = {}
|
||
|
mt.__index = mt
|
||
|
function mt:__call(s) ts[#ts+1] = s end
|
||
|
function mt:result() return table.concat(ts) end
|
||
|
return setmetatable({}, mt)
|
||
|
end
|
||
|
|
||
|
-- helper function
|
||
|
function PARSE.accumulate(g, code)
|
||
|
local accum = PARSE.accumulator()
|
||
|
g(code, accum)
|
||
|
return accum:result()
|
||
|
end
|
||
|
|
||
|
return PARSE
|