alunizaje/StartGamedev-160604-osx/tools/zbstudio.app/Contents/ZeroBraneStudio/lualibs/luainspect/ast.lua

916 lines
32 KiB
Lua
Raw Normal View History

2016-11-03 00:05:36 +01:00
-- luainspect.ast - Lua Abstract Syntax Tree (AST) and token list operations.
--
-- Two main structures are maintained. A Metalua-style AST represents the
-- nested syntactic structure obtained from the parse.
-- A separate linear ordered list of tokens represents the syntactic structure
-- from the lexing, including line information (character positions only not row/columns),
-- comments, and keywords, which is originally built from the lineinfo attributes
-- injected by Metalua into the AST (IMPROVE: it probably would be simpler
-- to obtain this from the lexer directly rather then inferring it from the parsing).
-- During AST manipulations, the lineinfo maintained in the AST is ignored
-- because it was found more difficult to maintain and not in the optimal format.
--
-- The contained code deals with
-- - Building the AST from source.
-- - Building the tokenlist from the AST lineinfo.
-- - Querying the AST+tokenlist.
-- - Modifying the AST+tokenlist (including incremental parsing source -> AST)
-- - Annotating the AST with navigational info (e.g. parent links) to assist queries.
-- - Dumping the tokenlist for debugging.
--
-- (c) 2010 David Manura, MIT License.
--! require 'luainspect.typecheck' (context)
local mlc = require 'metalua.compiler'.new()
local M = {}
--[=TESTSUITE
-- utilities
local ops = {}
ops['=='] = function(a,b) return a == b end
local function check(opname, a, b)
local op = assert(ops[opname])
if not op(a,b) then
error("fail == " .. tostring(a) .. " " .. tostring(b))
end
end
--]=]
-- CATEGORY: debug
local function DEBUG(...)
if LUAINSPECT_DEBUG then
print('DEBUG:', ...)
end
end
-- Converts character position to row,column position in string src.
-- Add values are 1-indexed.
function M.pos_to_linecol(pos, src)
local linenum = 1
local lasteolpos = 0
for eolpos in src:gmatch"()\n" do
if eolpos > pos then break end
linenum = linenum + 1
lasteolpos = eolpos
end
local colnum = pos - lasteolpos
return linenum, colnum
end
-- Removes any sheband ("#!") line from Lua source string.
-- CATEGORY: Lua parsing
function M.remove_shebang(src)
local shebang = src:match("^#![^\r\n]*")
return shebang and (" "):rep(#shebang) .. src:sub(#shebang+1) or src
end
-- Custom version of loadstring that parses out line number info
-- CATEGORY: Lua parsing
function M.loadstring(src)
local f, err = loadstring(src, "")
if f then
return f
else
err = err:gsub('^%[string ""%]:', "")
local linenum = assert(err:match("(%d+):"))
local colnum = 0
local linenum2 = err:match("^%d+: '[^']+' expected %(to close '[^']+' at line (%d+)")
return nil, err, linenum, colnum, linenum2
end
end
-- helper for ast_from_string. Raises on error.
-- FIX? filename currently ignored in Metalua
-- CATEGORY: Lua parsing
local function ast_from_string_helper(src, filename)
return mlc:src_to_ast(src, filename)
end
-- Counts number of lines in text.
-- Warning: the decision of whether to count a trailing new-line in a file
-- or an empty file as a line is a little subjective. This function currently
-- defines the line count as 1 plus the number of new line characters.
-- CATEGORY: utility/string
local function linecount(text)
local n = 1
for _ in text:gmatch'\n' do
n = n + 1
end
return n
end
-- Converts Lua source string to Lua AST (via mlp/gg).
-- CATEGORY: Lua parsing
function M.ast_from_string(src, filename)
local ok, ast = pcall(ast_from_string_helper, src, filename)
if not ok then
local err = ast
err = err:match('[^\n]*')
err = err:gsub("^.-:%s*line", "line")
-- mlp.chunk prepending this is undesirable. error(msg,0) would be better in gg.lua. Reported.
-- TODO-Metalua: remove when fixed in Metalua.
local linenum, colnum = err:match("line (%d+), char (%d+)")
if not linenum then
-- Metalua libraries may return "...gg.lua:56: .../mlp_misc.lua:179: End-of-file expected"
-- without the normal line/char numbers given things like "if x then end end". Should be
-- fixed probably with gg.parse_error in _chunk in mlp_misc.lua.
-- TODO-Metalua: remove when fixed in Metalua.
linenum = linecount(src)
colnum = 1
end
local linenum2 = nil
return nil, err, linenum, colnum, linenum2
else
return ast
end
end
-- Simple comment parser. Returns Metalua-style comment.
-- CATEGORY: Lua lexing
local function quick_parse_comment(src)
local s = src:match"^%-%-([^\n]*)()\n$"
if s then return {s, 1, #src, 'short'} end
local _, s = src:match(lexer.lexer.patterns.long_comment .. '\r?\n?$')
if s then return {s, 1, #src, 'long'} end
return nil
end
--FIX:check new-line correctness
--note: currently requiring \n at end of single line comment to avoid
-- incremental compilation with `--x\nf()` and removing \n from still
-- recognizing as comment `--x`.
-- currently allowing \r\n at end of long comment since Metalua includes
-- it in lineinfo of long comment (FIX:Metalua?)
-- Gets length of longest prefix string in both provided strings.
-- Returns max n such that text1:sub(1,n) == text2:sub(1,n) and n <= max(#text1,#text2)
-- CATEGORY: string utility
local function longest_prefix(text1, text2)
local nmin = 0
local nmax = math.min(#text1, #text2)
while nmax > nmin do
local nmid = math.ceil((nmin+nmax)/2)
if text1:sub(1,nmid) == text2:sub(1,nmid) then
nmin = nmid
else
nmax = nmid-1
end
end
return nmin
end
-- Gets length of longest postfix string in both provided strings.
-- Returns max n such that text1:sub(-n) == text2:sub(-n) and n <= max(#text1,#text2)
-- CATEGORY: string utility
local function longest_postfix(text1, text2)
local nmin = 0
local nmax = math.min(#text1, #text2)
while nmax > nmin do
local nmid = math.ceil((nmin+nmax)/2)
if text1:sub(-nmid) == text2:sub(-nmid) then --[*]
nmin = nmid
else
nmax = nmid-1
end
end
return nmin
end -- differs from longest_prefix only on line [*]
-- Determines AST node that must be re-evaluated upon changing code string from
-- `src` to `bsrc`, given previous top_ast/tokenlist/src.
-- Note: decorates top_ast as side-effect.
-- If preserve is true, then does not expand AST match even if replacement is invalid.
-- CATEGORY: AST/tokenlist manipulation
function M.invalidated_code(top_ast, tokenlist, src, bsrc, preserve)
-- Converts posiiton range in src to position range in bsrc.
local function range_transform(src_fpos, src_lpos)
local src_nlpos = #src - src_lpos
local bsrc_fpos = src_fpos
local bsrc_lpos = #bsrc - src_nlpos
return bsrc_fpos, bsrc_lpos
end
if src == bsrc then return end -- up-to-date
-- Find range of positions in src that differences correspond to.
-- Note: for zero byte range, src_pos2 = src_pos1 - 1.
local npre = longest_prefix(src, bsrc)
local npost = math.min(#src-npre, longest_postfix(src, bsrc))
-- note: min avoids overlap ambiguity
local src_fpos, src_lpos = 1 + npre, #src - npost
-- Find smallest AST node containing src range above. May also
-- be contained in (smaller) comment or whitespace.
local match_ast, match_comment, iswhitespace =
M.smallest_ast_containing_range(top_ast, tokenlist, src_fpos, src_lpos)
DEBUG('invalidate-smallest:', match_ast and (match_ast.tag or 'notag'), match_comment, iswhitespace)
-- Determine which (ast, comment, or whitespace) to match, and get its pos range in src and bsrc.
local srcm_fpos, srcm_lpos, bsrcm_fpos, bsrcm_lpos, mast, mtype
if iswhitespace then
mast, mtype = nil, 'whitespace'
srcm_fpos, srcm_lpos = src_fpos, src_lpos
elseif match_comment then
mast, mtype = match_comment, 'comment'
srcm_fpos, srcm_lpos = match_comment.fpos, match_comment.lpos
else
mast, mtype = match_ast, 'ast'
repeat
srcm_fpos, srcm_lpos = M.ast_pos_range(mast, tokenlist)
if not srcm_fpos then
if mast == top_ast then
srcm_fpos, srcm_lpos = 1, #src
break
else
M.ensure_parents_marked(top_ast)
mast = mast.parent
end
end
until srcm_fpos
end
bsrcm_fpos, bsrcm_lpos = range_transform(srcm_fpos, srcm_lpos)
-- Never expand match if preserve specified.
if preserve then
return srcm_fpos, srcm_lpos, bsrcm_fpos, bsrcm_lpos, mast, mtype
end
-- Determine if replacement could break parent nodes.
local isreplacesafe
if mtype == 'whitespace' then
if bsrc:sub(bsrcm_fpos, bsrcm_lpos):match'^%s*$' then -- replaced with whitespace
if bsrc:sub(bsrcm_fpos-1, bsrcm_lpos+1):match'%s' then -- not eliminating whitespace
isreplacesafe = true
end
end
elseif mtype == 'comment' then
local m2src = bsrc:sub(bsrcm_fpos, bsrcm_lpos)
DEBUG('invalidate-comment[' .. m2src .. ']')
if quick_parse_comment(m2src) then -- replaced with comment
isreplacesafe = true
end
end
if isreplacesafe then -- return on safe replacement
return srcm_fpos, srcm_lpos, bsrcm_fpos, bsrcm_lpos, mast, mtype
end
-- Find smallest containing statement block that will compile (or top_ast).
while 1 do
match_ast = M.get_containing_statementblock(match_ast, top_ast)
if match_ast == top_ast then
return 1,#src, 1, #bsrc, match_ast, 'statblock'
-- entire AST invalidated
end
local srcm_fpos, srcm_lpos = M.ast_pos_range(match_ast, tokenlist)
local bsrcm_fpos, bsrcm_lpos = range_transform(srcm_fpos, srcm_lpos)
local msrc = bsrc:sub(bsrcm_fpos, bsrcm_lpos)
DEBUG('invalidate-statblock:', match_ast and match_ast.tag, '[' .. msrc .. ']')
if loadstring(msrc) then -- compiled
return srcm_fpos, srcm_lpos, bsrcm_fpos, bsrcm_lpos, match_ast, 'statblock'
end
M.ensure_parents_marked(top_ast)
match_ast = match_ast.parent
end
end
-- Walks AST `ast` in arbitrary order, visiting each node `n`, executing `fdown(n)` (if specified)
-- when doing down and `fup(n)` (if specified) when going if.
-- CATEGORY: AST walk
function M.walk(ast, fdown, fup)
assert(type(ast) == 'table')
if fdown then fdown(ast) end
for _,bast in ipairs(ast) do
if type(bast) == 'table' then
M.walk(bast, fdown, fup)
end
end
if fup then fup(ast) end
end
-- Replaces contents of table t1 with contents of table t2.
-- Does not change metatable (if any).
-- This function is useful for swapping one AST node with another
-- while preserving any references to the node.
-- CATEGORY: table utility
function M.switchtable(t1, t2)
for k in pairs(t1) do t1[k] = nil end
for k in pairs(t2) do t1[k] = t2[k] end
end
-- Inserts all elements in list bt at index i in list t.
-- CATEGORY: table utility
local function tinsertlist(t, i, bt)
local oldtlen, delta = #t, i - 1
for ti = #t + 1, #t + #bt do t[ti] = false end -- preallocate (avoid holes)
for ti = oldtlen, i, -1 do t[ti + #bt] = t[ti] end -- shift
for bi = 1, #bt do t[bi + delta] = bt[bi] end -- fill
end
--[=[TESTSUITE:
local function _tinsertlist(t, i, bt)
for bi=#bt,1,-1 do table.insert(t, i, bt[bi]) end
end -- equivalent but MUCH less efficient for large tables
local function _tinsertlist(t, i, bt)
for bi=1,#bt do table.insert(t, i+bi-1, bt[bi]) end
end -- equivalent but MUCH less efficient for large tables
local t = {}; tinsertlist(t, 1, {}); assert(table.concat(t)=='')
local t = {}; tinsertlist(t, 1, {2,3}); assert(table.concat(t)=='23')
local t = {4}; tinsertlist(t, 1, {2,3}); assert(table.concat(t)=='234')
local t = {2}; tinsertlist(t, 2, {3,4}); assert(table.concat(t)=='234')
local t = {4,5}; tinsertlist(t, 1, {2,3}); assert(table.concat(t)=='2345')
local t = {2,5}; tinsertlist(t, 2, {3,4}); assert(table.concat(t)=='2345')
local t = {2,3}; tinsertlist(t, 3, {4,5}); assert(table.concat(t)=='2345')
print 'DONE'
--]=]
-- Gets list of keyword positions related to node ast in source src
-- note: ast must be visible, i.e. have lineinfo (e.g. unlike `Id "self" definition).
-- Note: includes operators.
-- Note: Assumes ast Metalua-style lineinfo is valid.
-- CATEGORY: tokenlist build
function M.get_keywords(ast, src)
local list = {}
if not ast.lineinfo then return list end
-- examine space between each pair of children i and j.
-- special cases: 0 is before first child and #ast+1 is after last child
-- Put children in lexical order.
-- Some binary operations have arguments reversed from lexical order.
-- For example, `a > b` becomes `Op{'lt', `Id 'b', `Id 'a'}
local oast =
(ast.tag == 'Op' and #ast == 3 and tostring(ast[2].lineinfo.first):match('|L(%d+)') > tostring(ast[3].lineinfo.first):match('|L(%d+)'))
and {ast[1], ast[3], ast[2]} or ast
local i = 0
while i <= #ast do
-- j is node following i that has lineinfo
local j = i+1; while j < #ast+1 and not oast[j].lineinfo do j=j+1 end
-- Get position range [fpos,lpos] between subsequent children.
local fpos
if i == 0 then -- before first child
fpos = tonumber(tostring(ast.lineinfo.first):match('|L(%d+)'))
else
local last = oast[i].lineinfo.last; local c = last.comments
fpos = (c and #c > 0 and c[#c][3] or tostring(last):match('|L(%d+)')) + 1
end
local lpos
if j == #ast+1 then -- after last child
lpos = tonumber(tostring(ast.lineinfo.last):match('|L(%d+)'))
else
local first = oast[j].lineinfo.first; local c = first.comments
lpos = (c and #c > 0 and c[1][2] or tostring(first):match('|L(%d+)')) - 1
end
-- Find keyword in range.
local spos = fpos
repeat
local mfpos, tok, mlppos = src:match("^%s*()(%a+)()", spos)
if not mfpos then
mfpos, tok, mlppos = src:match("^%s*()(%p+)()", spos)
end
if mfpos then
local mlpos = mlppos-1
if mlpos > lpos then mlpos = lpos end
if mlpos >= mfpos then
list[#list+1] = mfpos
list[#list+1] = mlpos
end
end
spos = mlppos
until not spos or spos > lpos
-- note: finds single keyword. in `local function` returns only `local`
--DEBUG(i,j ,'test[' .. src:sub(fpos, lpos) .. ']')
i = j -- next
--DESIGN:Lua: comment: string.match accepts a start position but not a stop position
end
return list
end
-- Q:Metalua: does ast.lineinfo[loc].comments imply #ast.lineinfo[loc].comments > 0 ?
-- Generates ordered list of tokens in top_ast/src.
-- Note: currently ignores operators and parens.
-- Note: Modifies ast.
-- Note: Assumes ast Metalua-style lineinfo is valid.
-- CATEGORY: AST/tokenlist query
local isterminal = {Nil=true, Dots=true, True=true, False=true, Number=true, String=true,
Dots=true, Id=true}
local function compare_tokens_(atoken, btoken) return atoken.fpos < btoken.fpos end
function M.ast_to_tokenlist(top_ast, src)
local tokens = {} -- {nbytes=#src}
local isseen = {}
M.walk(top_ast, function(ast)
if isterminal[ast.tag] then -- Extract terminal
local token = ast
if ast.lineinfo then
token.fpos = tonumber(tostring(ast.lineinfo.first):match('|L(%d+)'))
token.lpos = tonumber(tostring(ast.lineinfo.last):match('|L(%d+)'))
token.ast = ast
table.insert(tokens, token)
end
else -- Extract non-terminal
local keywordposlist = M.get_keywords(ast, src)
for i=1,#keywordposlist,2 do
local fpos, lpos = keywordposlist[i], keywordposlist[i+1]
local toksrc = src:sub(fpos, lpos)
local token = {tag='Keyword', fpos=fpos, lpos=lpos, ast=ast, toksrc}
table.insert(tokens, token)
end
end
-- Extract comments
for i=1,2 do
local comments = ast.lineinfo and ast.lineinfo[i==1 and 'first' or 'last'].comments
if comments then for _, comment in ipairs(comments) do
if not isseen[comment] then
comment.tag = 'Comment'
local token = comment
token.fpos = tonumber(tostring(comment.lineinfo.first):match('|L(%d+)'))
token.lpos = tonumber(tostring(comment.lineinfo.last):match('|L(%d+)'))
token.ast = comment
table.insert(tokens, token)
isseen[comment] = true
end
end end
end
end, nil)
table.sort(tokens, compare_tokens_)
return tokens
end
-- Gets tokenlist range [fidx,lidx] covered by ast. Returns nil,nil if not found.
--FIX:PERFORMANCE:this is slow on large files.
-- CATEGORY: AST/tokenlist query
function M.ast_idx_range_in_tokenlist(tokenlist, ast)
-- Get list of primary nodes under ast.
local isold = {}; M.walk(ast, function(ast) isold[ast] = true end)
-- Get range.
local fidx, lidx
for idx=1,#tokenlist do
local token = tokenlist[idx]
if isold[token.ast] then
lidx = idx
if not fidx then fidx = idx end
end
end
return fidx, lidx
end
-- Gets index range in tokenlist overlapped by character position range [fpos, lpos].
-- For example, `do ff() end` with range ` ff() ` would match tokens `ff()`.
-- Tokens partly inside range are counted, so range `f()` would match tokens `ff()`.
-- If lidx = fidx - 1, then position range is whitespace between tokens lidx (on left)
-- and fidx (on right), and this may include token pseudoindices 0 (start of file) and
-- #tokenlist+1 (end of file).
-- Note: lpos == fpos - 1 indicates zero-width range between chars lpos and fpos.
-- CATEGORY: tokenlist query
function M.tokenlist_idx_range_over_pos_range(tokenlist, fpos, lpos)
-- Find first/last indices of tokens overlapped (even partly) by position range.
local fidx, lidx
for idx=1,#tokenlist do
local token = tokenlist[idx]
--if (token.fpos >= fpos and token.fpos <= lpos) or (token.lpos >= fpos and token.lpos <= lpos) then -- token overlaps range
if fpos <= token.lpos and lpos >= token.fpos then -- range overlaps token (even partially)
if not fidx then fidx = idx end
lidx = idx
end
end
if not fidx then -- on fail, check between tokens
for idx=1,#tokenlist+1 do -- between idx-1 and idx
local tokfpos, toklpos = tokenlist[idx-1] and tokenlist[idx-1].lpos, tokenlist[idx] and tokenlist[idx].fpos
if (not tokfpos or fpos > tokfpos) and (not toklpos or lpos < toklpos) then -- range between tokens
return idx, idx-1
end
end
end
return fidx, lidx
end
--[=[TESTSUITE
local function test(...)
return table.concat({M.tokenlist_idx_range_over_pos_range(...)}, ',')
end
check('==', test({}, 2, 2), "1,0") -- no tokens
check('==', test({{tag='Id', fpos=1, lpos=1}}, 2, 2), "2,1") -- right of one token
check('==', test({{tag='Id', fpos=3, lpos=3}}, 2, 2), "1,0") -- left of one token
check('==', test({{tag='Id', fpos=3, lpos=4}}, 2, 3), "1,1") -- left partial overlap one token
check('==', test({{tag='Id', fpos=3, lpos=4}}, 4, 5), "1,1") -- right partial overlap one token
check('==', test({{tag='Id', fpos=3, lpos=6}}, 4, 5), "1,1") -- partial inner overlap one token
check('==', test({{tag='Id', fpos=3, lpos=6}}, 3, 6), "1,1") -- exact overlap one token
check('==', test({{tag='Id', fpos=4, lpos=5}}, 3, 6), "1,1") -- extra overlap one token
check('==', test({{tag='Id', fpos=2, lpos=3}, {tag='Id', fpos=5, lpos=6}}, 4, 4), "2,1") -- between tokens, " " exact
check('==', test({{tag='Id', fpos=2, lpos=3}, {tag='Id', fpos=5, lpos=6}}, 4, 3), "2,1") -- between tokens, "" on left
check('==', test({{tag='Id', fpos=2, lpos=3}, {tag='Id', fpos=5, lpos=6}}, 5, 4), "2,1") -- between tokens, "" on right
check('==', test({{tag='Id', fpos=2, lpos=3}, {tag='Id', fpos=4, lpos=5}}, 4, 3), "2,1") -- between tokens, "" exact
--]=]
-- Removes tokens in tokenlist covered by ast.
-- CATEGORY: tokenlist manipulation
local function remove_ast_in_tokenlist(tokenlist, ast)
local fidx, lidx = M.ast_idx_range_in_tokenlist(tokenlist, ast)
if fidx then -- note: fidx implies lidx
for idx=lidx,fidx,-1 do table.remove(tokenlist, idx) end
end
end
-- Inserts tokens from btokenlist into tokenlist. Preserves sort.
-- CATEGORY: tokenlist manipulation
local function insert_tokenlist(tokenlist, btokenlist)
local ftoken = btokenlist[1]
if ftoken then
-- Get index in tokenlist in which to insert tokens in btokenlist.
local fidx
for idx=1,#tokenlist do
if tokenlist[idx].fpos > ftoken.fpos then fidx = idx; break end
end
fidx = fidx or #tokenlist + 1 -- else append
-- Insert tokens.
tinsertlist(tokenlist, fidx, btokenlist)
end
end
-- Get character position range covered by ast in tokenlist. Returns nil,nil on not found.
-- CATEGORY: AST/tokenlist query
function M.ast_pos_range(ast, tokenlist) -- IMPROVE:style: ast_idx_range_in_tokenlist has params reversed
local fidx, lidx = M.ast_idx_range_in_tokenlist(tokenlist, ast)
if fidx then
return tokenlist[fidx].fpos, tokenlist[lidx].lpos
else
return nil, nil
end
end
-- Gets string representation of AST node. nil if none.
-- IMPROVE: what if node is empty block?
-- CATEGORY: AST/tokenlist query
function M.ast_to_text(ast, tokenlist, src) -- IMPROVE:style: ast_idx_range_in_tokenlist has params reversed
local fpos, lpos = M.ast_pos_range(ast, tokenlist)
if fpos then
return src:sub(fpos, lpos)
else
return nil
end
end
-- Gets smallest AST node in top_ast/tokenlist/src
-- completely containing position range [pos1, pos2].
-- careful: "function" is not part of the `Function node.
-- If range is inside comment, returns comment also.
-- If range is inside whitespace, then returns true in third return value.
-- CATEGORY: AST/tokenlist query
function M.smallest_ast_containing_range(top_ast, tokenlist, pos1, pos2)
local f0idx, l0idx = M.tokenlist_idx_range_over_pos_range(tokenlist, pos1, pos2)
-- Find enclosing AST.
M.ensure_parents_marked(top_ast)
local fidx, lidx = f0idx, l0idx
while tokenlist[fidx] and not tokenlist[fidx].ast.parent do fidx = fidx - 1 end
while tokenlist[lidx] and not tokenlist[lidx].ast.parent do lidx = lidx + 1 end
-- DEBUG(fidx, lidx, f0idx, l0idx, #tokenlist, pos1, pos2, tokenlist[fidx], tokenlist[lidx])
local ast = not (tokenlist[fidx] and tokenlist[lidx]) and top_ast or
M.common_ast_parent(tokenlist[fidx].ast, tokenlist[lidx].ast, top_ast)
-- DEBUG('m2', tokenlist[fidx], tokenlist[lidx], top_ast, ast, ast and ast.tag)
if l0idx == f0idx - 1 then -- whitespace
return ast, nil, true
elseif l0idx == f0idx and tokenlist[l0idx].tag == 'Comment' then
return ast, tokenlist[l0idx], nil
else
return ast, nil, nil
end
end
--IMPROVE: handle string edits and maybe others
-- Gets smallest statement block containing position pos or
-- nearest statement block before pos, whichever is smaller, given ast/tokenlist.
function M.current_statementblock(ast, tokenlist, pos)
local fidx,lidx = M.tokenlist_idx_range_over_pos_range(tokenlist, pos, pos)
if fidx > lidx then fidx = lidx end -- use nearest backward
-- Find closest AST node backward
while fidx >= 1 and tokenlist[fidx].tag == 'Comment' do fidx=fidx-1 end
if fidx < 1 then return ast, false end
local mast = tokenlist[fidx].ast
if not mast then return ast, false end
mast = M.get_containing_statementblock(mast, ast)
local isafter = false
if mast.tag2 ~= 'Block' then
local mfidx,mlidx = M.ast_idx_range_in_tokenlist(tokenlist, mast)
if pos > mlidx then
isafter = true
end
end
return mast, isafter
end
-- Gets index of bast in ast (nil if not found).
-- CATEGORY: AST query
function M.ast_idx(ast, bast)
for idx=1,#ast do
if ast[idx] == bast then return idx end
end
return nil
end
-- Gets parent of ast and index of ast in parent.
-- Root node top_ast must also be provided. Returns nil, nil if ast is root.
-- Note: may call mark_parents.
-- CATEGORY: AST query
function M.ast_parent_idx(top_ast, ast)
if ast == top_ast then return nil, nil end
M.ensure_parents_marked(top_ast); assert(ast.parent)
local idx = M.ast_idx(ast.parent, ast)
return ast.parent, idx
end
-- Gets common parent of aast and bast. Always returns value.
-- Must provide root top_ast too.
-- CATEGORY: AST query
function M.common_ast_parent(aast, bast, top_ast)
M.ensure_parents_marked(top_ast)
local isparent = {}
local tast = bast; repeat isparent[tast] = true; tast = tast.parent until not tast
local uast = aast; repeat if isparent[uast] then return uast end; uast = uast.parent until not uast
assert(false)
end
-- Replaces old_ast with new_ast/new_tokenlist in top_ast/tokenlist.
-- Note: assumes new_ast is a block. assumes old_ast is a statement or block.
-- CATEGORY: AST/tokenlist
function M.replace_statements(top_ast, tokenlist, old_ast, new_ast, new_tokenlist)
remove_ast_in_tokenlist(tokenlist, old_ast)
insert_tokenlist(tokenlist, new_tokenlist)
if old_ast == top_ast then -- special case: no parent
M.switchtable(old_ast, new_ast) -- note: safe since block is not in tokenlist.
else
local parent_ast, idx = M.ast_parent_idx(top_ast, old_ast)
table.remove(parent_ast, idx)
tinsertlist(parent_ast, idx, new_ast)
end
-- fixup annotations
for _,bast in ipairs(new_ast) do
if top_ast.tag2 then M.mark_tag2(bast, bast.tag == 'Do' and 'StatBlock' or 'Block') end
if old_ast.parent then M.mark_parents(bast, old_ast.parent) end
end
end
-- Adjusts lineinfo in tokenlist.
-- All char positions starting at pos1 are shifted by delta number of chars.
-- CATEGORY: tokenlist
function M.adjust_lineinfo(tokenlist, pos1, delta)
for _,token in ipairs(tokenlist) do
if token.fpos >= pos1 then
token.fpos = token.fpos + delta
end
if token.lpos >= pos1 then
token.lpos = token.lpos + delta
end
end
--tokenlist.nbytes = tokenlist.nbytes + delta
end
-- For each node n in ast, sets n.parent to parent node of n.
-- Assumes ast.parent will be parent_ast (may be nil)
-- CATEGORY: AST query
function M.mark_parents(ast, parent_ast)
ast.parent = parent_ast
for _,ast2 in ipairs(ast) do
if type(ast2) == 'table' then
M.mark_parents(ast2, ast)
end
end
end
-- Calls mark_parents(ast) if ast not marked.
-- CATEGORY: AST query
function M.ensure_parents_marked(ast)
if ast[1] and not ast[1].parent then M.mark_parents(ast) end
end
-- For each node n in ast, sets n.tag2 to context string:
-- 'Block' - node is block
-- 'Stat' - node is statement
-- 'StatBlock' - node is statement and block (i.e. `Do)
-- 'Exp' - node is expression
-- 'Explist' - node is expression list (or identifier list)
-- 'Pair' - node is key-value pair in table constructor
-- note: ast.tag2 will be set to context.
-- CATEGORY: AST query
local iscertainstat = {Do=true, Set=true, While=true, Repeat=true, If=true,
Fornum=true, Forin=true, Local=true, Localrec=true, Return=true, Break=true}
function M.mark_tag2(ast, context)
context = context or 'Block'
ast.tag2 = context
for i,bast in ipairs(ast) do
if type(bast) == 'table' then
local nextcontext
if bast.tag == 'Do' then
nextcontext = 'StatBlock'
elseif iscertainstat[bast.tag] then
nextcontext = 'Stat'
elseif bast.tag == 'Call' or bast.tag == 'Invoke' then
nextcontext = context == 'Block' and 'Stat' or 'Exp'
--DESIGN:Metalua: these calls actually contain expression lists,
-- but the expression list is not represented as a complete node
-- by Metalua (as blocks are in `Do statements)
elseif bast.tag == 'Pair' then
nextcontext = 'Pair'
elseif not bast.tag then
if ast.tag == 'Set' or ast.tag == 'Local' or ast.tag == 'Localrec'
or ast.tag == 'Forin' and i <= 2
or ast.tag == 'Function' and i == 1
then
nextcontext = 'Explist'
else
nextcontext = 'Block'
end
else
nextcontext = 'Exp'
end
M.mark_tag2(bast, nextcontext)
end
end
end
-- Gets smallest statement or block containing or being `ast`.
-- The AST root node `top_ast` must also be provided.
-- Note: may decorate AST as side-effect (mark_tag2/mark_parents).
-- top_ast is assumed a block, so this is always successful.
-- CATEGORY: AST query
function M.get_containing_statementblock(ast, top_ast)
if not top_ast.tag2 then M.mark_tag2(top_ast) end
if ast.tag2 == 'Stat' or ast.tag2 == 'StatBlock' or ast.tag2 == 'Block' then
return ast
else
M.ensure_parents_marked(top_ast)
return M.get_containing_statementblock(ast.parent, top_ast)
end
end
-- Finds smallest statement, block, or comment AST in ast/tokenlist containing position
-- range [fpos, lpos]. If allowexpand is true (default nil) and located AST
-- coincides with position range, then next containing statement is used
-- instead (this allows multiple calls to further expand the statement selection).
-- CATEGORY: AST query
function M.select_statementblockcomment(ast, tokenlist, fpos, lpos, allowexpand)
--IMPROVE: rename ast to top_ast
local match_ast, comment_ast = M.smallest_ast_containing_range(ast, tokenlist, fpos, lpos)
local select_ast = comment_ast or M.get_containing_statementblock(match_ast, ast)
local nfpos, nlpos = M.ast_pos_range(select_ast, tokenlist)
--DEBUG('s', nfpos, nlpos, fpos, lpos, match_ast.tag, select_ast.tag)
if allowexpand and fpos == nfpos and lpos == nlpos then
if comment_ast then
-- Select enclosing statement.
select_ast = match_ast
nfpos, nlpos = M.ast_pos_range(select_ast, tokenlist)
else
-- note: multiple times may be needed to expand selection. For example, in
-- `for x=1,2 do f() end` both the statement `f()` and block `f()` have
-- the same position range.
M.ensure_parents_marked(ast)
while select_ast.parent and fpos == nfpos and lpos == nlpos do
select_ast = M.get_containing_statementblock(select_ast.parent, ast)
nfpos, nlpos = M.ast_pos_range(select_ast, tokenlist)
end
end
end
return nfpos, nlpos
end
-- Converts tokenlist to string representation for debugging.
-- CATEGORY: tokenlist debug
function M.dump_tokenlist(tokenlist)
local ts = {}
for i,token in ipairs(tokenlist) do
ts[#ts+1] = 'tok.' .. i .. ': [' .. token.fpos .. ',' .. token.lpos .. '] '
.. tostring(token[1]) .. ' ' .. tostring(token.ast.tag)
end
return table.concat(ts, '\n') -- .. 'nbytes=' .. tokenlist.nbytes .. '\n'
end
--FIX:Q: does this handle Unicode ok?
--FIX?:Metalua: fails on string with escape sequence '\/'. The Reference Manual
-- doesn't say this sequence is valid though.
--FIX:Metalua: In `local --[[x]] function --[[y]] f() end`,
-- 'x' comment omitted from AST.
--FIX:Metalua: `do --[[x]] end` doesn't generate comments in AST.
-- `if x then --[[x]] end` and `while 1 do --[[x]] end` generates
-- comments in first/last of block
--FIX:Metalua: `--[[x]] f() --[[y]]` returns lineinfo around `f()`.
-- `--[[x]] --[[y]]` returns lineinfo around everything.
--FIX:Metalua: `while 1 do --[[x]] --[[y]] end` returns first > last
-- lineinfo for contained block
--FIX:Metalua: search for "PATCHED:LuaInspect" in the metalualib folder.
--FIX?:Metalua: loadstring parses "--x" but metalua omits the comment in the AST
--FIX?:Metalua: `local x` is generating `Local{{`Id{x}}, {}}`, which
-- has no lineinfo on {}. This is contrary to the Metalua
-- spec: `Local{ {ident+} {expr+}? }.
-- Other things like `self` also generate no lineinfo.
-- The ast2.lineinfo above avoids this.
--FIX:Metalua: Metalua shouldn't overwrite ipairs/pairs. Note: Metalua version
-- doesn't set errorlevel correctly.
--Q:Metalua: Why does `return --[[y]] z --[[x]]` have
-- lineinfo.first.comments, lineinfo.last.comments,
-- plus lineinfo.comments (which is the same as lineinfo.first.comments) ?
--CAUTION:Metalua: `do f() end` returns lineinfo around `do f() end`, while
-- `while 1 do f() end` returns lineinfo around `f()` for inner block.
--CAUTION:Metalua: The lineinfo on Metalua comments is inconsistent with other
-- nodes
--CAUTION:Metalua: lineinfo of table in `f{}` is [3,2], of `f{ x,y }` it's [4,6].
-- This is inconsistent with `x={}` which is [3,4] and `f""` which is [1,2]
-- for the string.
--CAUTION:Metalua: only the `function()` form of `Function includes `function`
-- in lineinfo. 'function' is part of `Localrec and `Set in syntactic sugar form.
--[=[TESTSUITE
-- test longest_prefix/longest_postfix
local function pr(text1, text2)
local lastv
local function same(v)
assert(not lastv or v == lastv); lastv = v; return v
end
local function test1(text1, text2) -- test prefix/postfix
same(longest_prefix(text1, text2))
same(longest_postfix(text1:reverse(), text2:reverse()))
end
local function test2(text1, text2) -- test swap
test1(text1, text2)
test1(text2, text1)
end
for _,extra in ipairs{"", "x", "xy", "xyz"} do -- test extra chars
test2(text1, text2..extra)
test2(text2, text1..extra)
end
return lastv
end
check('==', pr("",""), 0)
check('==', pr("a",""), 0)
check('==', pr("a","a"), 1)
check('==', pr("ab",""), 0)
check('==', pr("ab","a"), 1)
check('==', pr("ab","ab"), 2)
check('==', pr("abcdefg","abcdefgh"), 7)
--]=]
--[=[TESTSUITE
print 'DONE'
--]=]
return M