916 lines
32 KiB
Lua
916 lines
32 KiB
Lua
|
-- luainspect.ast - Lua Abstract Syntax Tree (AST) and token list operations.
|
||
|
--
|
||
|
-- Two main structures are maintained. A Metalua-style AST represents the
|
||
|
-- nested syntactic structure obtained from the parse.
|
||
|
-- A separate linear ordered list of tokens represents the syntactic structure
|
||
|
-- from the lexing, including line information (character positions only not row/columns),
|
||
|
-- comments, and keywords, which is originally built from the lineinfo attributes
|
||
|
-- injected by Metalua into the AST (IMPROVE: it probably would be simpler
|
||
|
-- to obtain this from the lexer directly rather then inferring it from the parsing).
|
||
|
-- During AST manipulations, the lineinfo maintained in the AST is ignored
|
||
|
-- because it was found more difficult to maintain and not in the optimal format.
|
||
|
--
|
||
|
-- The contained code deals with
|
||
|
-- - Building the AST from source.
|
||
|
-- - Building the tokenlist from the AST lineinfo.
|
||
|
-- - Querying the AST+tokenlist.
|
||
|
-- - Modifying the AST+tokenlist (including incremental parsing source -> AST)
|
||
|
-- - Annotating the AST with navigational info (e.g. parent links) to assist queries.
|
||
|
-- - Dumping the tokenlist for debugging.
|
||
|
--
|
||
|
-- (c) 2010 David Manura, MIT License.
|
||
|
|
||
|
|
||
|
--! require 'luainspect.typecheck' (context)
|
||
|
|
||
|
local mlc = require 'metalua.compiler'.new()
|
||
|
|
||
|
local M = {}
|
||
|
|
||
|
--[=TESTSUITE
|
||
|
-- utilities
|
||
|
local ops = {}
|
||
|
ops['=='] = function(a,b) return a == b end
|
||
|
local function check(opname, a, b)
|
||
|
local op = assert(ops[opname])
|
||
|
if not op(a,b) then
|
||
|
error("fail == " .. tostring(a) .. " " .. tostring(b))
|
||
|
end
|
||
|
end
|
||
|
--]=]
|
||
|
|
||
|
-- CATEGORY: debug
|
||
|
local function DEBUG(...)
|
||
|
if LUAINSPECT_DEBUG then
|
||
|
print('DEBUG:', ...)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Converts character position to row,column position in string src.
|
||
|
-- Add values are 1-indexed.
|
||
|
function M.pos_to_linecol(pos, src)
|
||
|
local linenum = 1
|
||
|
local lasteolpos = 0
|
||
|
for eolpos in src:gmatch"()\n" do
|
||
|
if eolpos > pos then break end
|
||
|
linenum = linenum + 1
|
||
|
lasteolpos = eolpos
|
||
|
end
|
||
|
local colnum = pos - lasteolpos
|
||
|
return linenum, colnum
|
||
|
end
|
||
|
|
||
|
-- Removes any sheband ("#!") line from Lua source string.
|
||
|
-- CATEGORY: Lua parsing
|
||
|
function M.remove_shebang(src)
|
||
|
local shebang = src:match("^#![^\r\n]*")
|
||
|
return shebang and (" "):rep(#shebang) .. src:sub(#shebang+1) or src
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Custom version of loadstring that parses out line number info
|
||
|
-- CATEGORY: Lua parsing
|
||
|
function M.loadstring(src)
|
||
|
local f, err = loadstring(src, "")
|
||
|
if f then
|
||
|
return f
|
||
|
else
|
||
|
err = err:gsub('^%[string ""%]:', "")
|
||
|
local linenum = assert(err:match("(%d+):"))
|
||
|
local colnum = 0
|
||
|
local linenum2 = err:match("^%d+: '[^']+' expected %(to close '[^']+' at line (%d+)")
|
||
|
return nil, err, linenum, colnum, linenum2
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- helper for ast_from_string. Raises on error.
|
||
|
-- FIX? filename currently ignored in Metalua
|
||
|
-- CATEGORY: Lua parsing
|
||
|
local function ast_from_string_helper(src, filename)
|
||
|
return mlc:src_to_ast(src, filename)
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Counts number of lines in text.
|
||
|
-- Warning: the decision of whether to count a trailing new-line in a file
|
||
|
-- or an empty file as a line is a little subjective. This function currently
|
||
|
-- defines the line count as 1 plus the number of new line characters.
|
||
|
-- CATEGORY: utility/string
|
||
|
local function linecount(text)
|
||
|
local n = 1
|
||
|
for _ in text:gmatch'\n' do
|
||
|
n = n + 1
|
||
|
end
|
||
|
return n
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Converts Lua source string to Lua AST (via mlp/gg).
|
||
|
-- CATEGORY: Lua parsing
|
||
|
function M.ast_from_string(src, filename)
|
||
|
local ok, ast = pcall(ast_from_string_helper, src, filename)
|
||
|
if not ok then
|
||
|
local err = ast
|
||
|
err = err:match('[^\n]*')
|
||
|
err = err:gsub("^.-:%s*line", "line")
|
||
|
-- mlp.chunk prepending this is undesirable. error(msg,0) would be better in gg.lua. Reported.
|
||
|
-- TODO-Metalua: remove when fixed in Metalua.
|
||
|
local linenum, colnum = err:match("line (%d+), char (%d+)")
|
||
|
if not linenum then
|
||
|
-- Metalua libraries may return "...gg.lua:56: .../mlp_misc.lua:179: End-of-file expected"
|
||
|
-- without the normal line/char numbers given things like "if x then end end". Should be
|
||
|
-- fixed probably with gg.parse_error in _chunk in mlp_misc.lua.
|
||
|
-- TODO-Metalua: remove when fixed in Metalua.
|
||
|
linenum = linecount(src)
|
||
|
colnum = 1
|
||
|
end
|
||
|
local linenum2 = nil
|
||
|
return nil, err, linenum, colnum, linenum2
|
||
|
else
|
||
|
return ast
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Simple comment parser. Returns Metalua-style comment.
|
||
|
-- CATEGORY: Lua lexing
|
||
|
local function quick_parse_comment(src)
|
||
|
local s = src:match"^%-%-([^\n]*)()\n$"
|
||
|
if s then return {s, 1, #src, 'short'} end
|
||
|
local _, s = src:match(lexer.lexer.patterns.long_comment .. '\r?\n?$')
|
||
|
if s then return {s, 1, #src, 'long'} end
|
||
|
return nil
|
||
|
end
|
||
|
--FIX:check new-line correctness
|
||
|
--note: currently requiring \n at end of single line comment to avoid
|
||
|
-- incremental compilation with `--x\nf()` and removing \n from still
|
||
|
-- recognizing as comment `--x`.
|
||
|
-- currently allowing \r\n at end of long comment since Metalua includes
|
||
|
-- it in lineinfo of long comment (FIX:Metalua?)
|
||
|
|
||
|
|
||
|
-- Gets length of longest prefix string in both provided strings.
|
||
|
-- Returns max n such that text1:sub(1,n) == text2:sub(1,n) and n <= max(#text1,#text2)
|
||
|
-- CATEGORY: string utility
|
||
|
local function longest_prefix(text1, text2)
|
||
|
local nmin = 0
|
||
|
local nmax = math.min(#text1, #text2)
|
||
|
while nmax > nmin do
|
||
|
local nmid = math.ceil((nmin+nmax)/2)
|
||
|
if text1:sub(1,nmid) == text2:sub(1,nmid) then
|
||
|
nmin = nmid
|
||
|
else
|
||
|
nmax = nmid-1
|
||
|
end
|
||
|
end
|
||
|
return nmin
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Gets length of longest postfix string in both provided strings.
|
||
|
-- Returns max n such that text1:sub(-n) == text2:sub(-n) and n <= max(#text1,#text2)
|
||
|
-- CATEGORY: string utility
|
||
|
local function longest_postfix(text1, text2)
|
||
|
local nmin = 0
|
||
|
local nmax = math.min(#text1, #text2)
|
||
|
while nmax > nmin do
|
||
|
local nmid = math.ceil((nmin+nmax)/2)
|
||
|
if text1:sub(-nmid) == text2:sub(-nmid) then --[*]
|
||
|
nmin = nmid
|
||
|
else
|
||
|
nmax = nmid-1
|
||
|
end
|
||
|
end
|
||
|
return nmin
|
||
|
end -- differs from longest_prefix only on line [*]
|
||
|
|
||
|
|
||
|
|
||
|
-- Determines AST node that must be re-evaluated upon changing code string from
|
||
|
-- `src` to `bsrc`, given previous top_ast/tokenlist/src.
|
||
|
-- Note: decorates top_ast as side-effect.
|
||
|
-- If preserve is true, then does not expand AST match even if replacement is invalid.
|
||
|
-- CATEGORY: AST/tokenlist manipulation
|
||
|
function M.invalidated_code(top_ast, tokenlist, src, bsrc, preserve)
|
||
|
-- Converts posiiton range in src to position range in bsrc.
|
||
|
local function range_transform(src_fpos, src_lpos)
|
||
|
local src_nlpos = #src - src_lpos
|
||
|
local bsrc_fpos = src_fpos
|
||
|
local bsrc_lpos = #bsrc - src_nlpos
|
||
|
return bsrc_fpos, bsrc_lpos
|
||
|
end
|
||
|
|
||
|
if src == bsrc then return end -- up-to-date
|
||
|
|
||
|
-- Find range of positions in src that differences correspond to.
|
||
|
-- Note: for zero byte range, src_pos2 = src_pos1 - 1.
|
||
|
local npre = longest_prefix(src, bsrc)
|
||
|
local npost = math.min(#src-npre, longest_postfix(src, bsrc))
|
||
|
-- note: min avoids overlap ambiguity
|
||
|
local src_fpos, src_lpos = 1 + npre, #src - npost
|
||
|
|
||
|
-- Find smallest AST node containing src range above. May also
|
||
|
-- be contained in (smaller) comment or whitespace.
|
||
|
local match_ast, match_comment, iswhitespace =
|
||
|
M.smallest_ast_containing_range(top_ast, tokenlist, src_fpos, src_lpos)
|
||
|
DEBUG('invalidate-smallest:', match_ast and (match_ast.tag or 'notag'), match_comment, iswhitespace)
|
||
|
|
||
|
-- Determine which (ast, comment, or whitespace) to match, and get its pos range in src and bsrc.
|
||
|
local srcm_fpos, srcm_lpos, bsrcm_fpos, bsrcm_lpos, mast, mtype
|
||
|
if iswhitespace then
|
||
|
mast, mtype = nil, 'whitespace'
|
||
|
srcm_fpos, srcm_lpos = src_fpos, src_lpos
|
||
|
elseif match_comment then
|
||
|
mast, mtype = match_comment, 'comment'
|
||
|
srcm_fpos, srcm_lpos = match_comment.fpos, match_comment.lpos
|
||
|
else
|
||
|
mast, mtype = match_ast, 'ast'
|
||
|
repeat
|
||
|
srcm_fpos, srcm_lpos = M.ast_pos_range(mast, tokenlist)
|
||
|
if not srcm_fpos then
|
||
|
if mast == top_ast then
|
||
|
srcm_fpos, srcm_lpos = 1, #src
|
||
|
break
|
||
|
else
|
||
|
M.ensure_parents_marked(top_ast)
|
||
|
mast = mast.parent
|
||
|
end
|
||
|
end
|
||
|
until srcm_fpos
|
||
|
end
|
||
|
bsrcm_fpos, bsrcm_lpos = range_transform(srcm_fpos, srcm_lpos)
|
||
|
|
||
|
-- Never expand match if preserve specified.
|
||
|
if preserve then
|
||
|
return srcm_fpos, srcm_lpos, bsrcm_fpos, bsrcm_lpos, mast, mtype
|
||
|
end
|
||
|
|
||
|
-- Determine if replacement could break parent nodes.
|
||
|
local isreplacesafe
|
||
|
if mtype == 'whitespace' then
|
||
|
if bsrc:sub(bsrcm_fpos, bsrcm_lpos):match'^%s*$' then -- replaced with whitespace
|
||
|
if bsrc:sub(bsrcm_fpos-1, bsrcm_lpos+1):match'%s' then -- not eliminating whitespace
|
||
|
isreplacesafe = true
|
||
|
end
|
||
|
end
|
||
|
elseif mtype == 'comment' then
|
||
|
local m2src = bsrc:sub(bsrcm_fpos, bsrcm_lpos)
|
||
|
DEBUG('invalidate-comment[' .. m2src .. ']')
|
||
|
if quick_parse_comment(m2src) then -- replaced with comment
|
||
|
isreplacesafe = true
|
||
|
end
|
||
|
end
|
||
|
if isreplacesafe then -- return on safe replacement
|
||
|
return srcm_fpos, srcm_lpos, bsrcm_fpos, bsrcm_lpos, mast, mtype
|
||
|
end
|
||
|
|
||
|
-- Find smallest containing statement block that will compile (or top_ast).
|
||
|
while 1 do
|
||
|
match_ast = M.get_containing_statementblock(match_ast, top_ast)
|
||
|
if match_ast == top_ast then
|
||
|
return 1,#src, 1, #bsrc, match_ast, 'statblock'
|
||
|
-- entire AST invalidated
|
||
|
end
|
||
|
local srcm_fpos, srcm_lpos = M.ast_pos_range(match_ast, tokenlist)
|
||
|
local bsrcm_fpos, bsrcm_lpos = range_transform(srcm_fpos, srcm_lpos)
|
||
|
local msrc = bsrc:sub(bsrcm_fpos, bsrcm_lpos)
|
||
|
DEBUG('invalidate-statblock:', match_ast and match_ast.tag, '[' .. msrc .. ']')
|
||
|
if loadstring(msrc) then -- compiled
|
||
|
return srcm_fpos, srcm_lpos, bsrcm_fpos, bsrcm_lpos, match_ast, 'statblock'
|
||
|
end
|
||
|
M.ensure_parents_marked(top_ast)
|
||
|
match_ast = match_ast.parent
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Walks AST `ast` in arbitrary order, visiting each node `n`, executing `fdown(n)` (if specified)
|
||
|
-- when doing down and `fup(n)` (if specified) when going if.
|
||
|
-- CATEGORY: AST walk
|
||
|
function M.walk(ast, fdown, fup)
|
||
|
assert(type(ast) == 'table')
|
||
|
if fdown then fdown(ast) end
|
||
|
for _,bast in ipairs(ast) do
|
||
|
if type(bast) == 'table' then
|
||
|
M.walk(bast, fdown, fup)
|
||
|
end
|
||
|
end
|
||
|
if fup then fup(ast) end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Replaces contents of table t1 with contents of table t2.
|
||
|
-- Does not change metatable (if any).
|
||
|
-- This function is useful for swapping one AST node with another
|
||
|
-- while preserving any references to the node.
|
||
|
-- CATEGORY: table utility
|
||
|
function M.switchtable(t1, t2)
|
||
|
for k in pairs(t1) do t1[k] = nil end
|
||
|
for k in pairs(t2) do t1[k] = t2[k] end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Inserts all elements in list bt at index i in list t.
|
||
|
-- CATEGORY: table utility
|
||
|
local function tinsertlist(t, i, bt)
|
||
|
local oldtlen, delta = #t, i - 1
|
||
|
for ti = #t + 1, #t + #bt do t[ti] = false end -- preallocate (avoid holes)
|
||
|
for ti = oldtlen, i, -1 do t[ti + #bt] = t[ti] end -- shift
|
||
|
for bi = 1, #bt do t[bi + delta] = bt[bi] end -- fill
|
||
|
end
|
||
|
--[=[TESTSUITE:
|
||
|
local function _tinsertlist(t, i, bt)
|
||
|
for bi=#bt,1,-1 do table.insert(t, i, bt[bi]) end
|
||
|
end -- equivalent but MUCH less efficient for large tables
|
||
|
local function _tinsertlist(t, i, bt)
|
||
|
for bi=1,#bt do table.insert(t, i+bi-1, bt[bi]) end
|
||
|
end -- equivalent but MUCH less efficient for large tables
|
||
|
local t = {}; tinsertlist(t, 1, {}); assert(table.concat(t)=='')
|
||
|
local t = {}; tinsertlist(t, 1, {2,3}); assert(table.concat(t)=='23')
|
||
|
local t = {4}; tinsertlist(t, 1, {2,3}); assert(table.concat(t)=='234')
|
||
|
local t = {2}; tinsertlist(t, 2, {3,4}); assert(table.concat(t)=='234')
|
||
|
local t = {4,5}; tinsertlist(t, 1, {2,3}); assert(table.concat(t)=='2345')
|
||
|
local t = {2,5}; tinsertlist(t, 2, {3,4}); assert(table.concat(t)=='2345')
|
||
|
local t = {2,3}; tinsertlist(t, 3, {4,5}); assert(table.concat(t)=='2345')
|
||
|
print 'DONE'
|
||
|
--]=]
|
||
|
|
||
|
|
||
|
|
||
|
-- Gets list of keyword positions related to node ast in source src
|
||
|
-- note: ast must be visible, i.e. have lineinfo (e.g. unlike `Id "self" definition).
|
||
|
-- Note: includes operators.
|
||
|
-- Note: Assumes ast Metalua-style lineinfo is valid.
|
||
|
-- CATEGORY: tokenlist build
|
||
|
function M.get_keywords(ast, src)
|
||
|
local list = {}
|
||
|
if not ast.lineinfo then return list end
|
||
|
-- examine space between each pair of children i and j.
|
||
|
-- special cases: 0 is before first child and #ast+1 is after last child
|
||
|
|
||
|
-- Put children in lexical order.
|
||
|
-- Some binary operations have arguments reversed from lexical order.
|
||
|
-- For example, `a > b` becomes `Op{'lt', `Id 'b', `Id 'a'}
|
||
|
local oast =
|
||
|
(ast.tag == 'Op' and #ast == 3 and tostring(ast[2].lineinfo.first):match('|L(%d+)') > tostring(ast[3].lineinfo.first):match('|L(%d+)'))
|
||
|
and {ast[1], ast[3], ast[2]} or ast
|
||
|
|
||
|
local i = 0
|
||
|
while i <= #ast do
|
||
|
-- j is node following i that has lineinfo
|
||
|
local j = i+1; while j < #ast+1 and not oast[j].lineinfo do j=j+1 end
|
||
|
|
||
|
-- Get position range [fpos,lpos] between subsequent children.
|
||
|
local fpos
|
||
|
if i == 0 then -- before first child
|
||
|
fpos = tonumber(tostring(ast.lineinfo.first):match('|L(%d+)'))
|
||
|
else
|
||
|
local last = oast[i].lineinfo.last; local c = last.comments
|
||
|
fpos = (c and #c > 0 and c[#c][3] or tostring(last):match('|L(%d+)')) + 1
|
||
|
end
|
||
|
local lpos
|
||
|
if j == #ast+1 then -- after last child
|
||
|
lpos = tonumber(tostring(ast.lineinfo.last):match('|L(%d+)'))
|
||
|
else
|
||
|
local first = oast[j].lineinfo.first; local c = first.comments
|
||
|
lpos = (c and #c > 0 and c[1][2] or tostring(first):match('|L(%d+)')) - 1
|
||
|
end
|
||
|
|
||
|
-- Find keyword in range.
|
||
|
local spos = fpos
|
||
|
repeat
|
||
|
local mfpos, tok, mlppos = src:match("^%s*()(%a+)()", spos)
|
||
|
if not mfpos then
|
||
|
mfpos, tok, mlppos = src:match("^%s*()(%p+)()", spos)
|
||
|
end
|
||
|
if mfpos then
|
||
|
local mlpos = mlppos-1
|
||
|
if mlpos > lpos then mlpos = lpos end
|
||
|
if mlpos >= mfpos then
|
||
|
list[#list+1] = mfpos
|
||
|
list[#list+1] = mlpos
|
||
|
end
|
||
|
end
|
||
|
spos = mlppos
|
||
|
until not spos or spos > lpos
|
||
|
-- note: finds single keyword. in `local function` returns only `local`
|
||
|
--DEBUG(i,j ,'test[' .. src:sub(fpos, lpos) .. ']')
|
||
|
|
||
|
i = j -- next
|
||
|
|
||
|
--DESIGN:Lua: comment: string.match accepts a start position but not a stop position
|
||
|
end
|
||
|
return list
|
||
|
end
|
||
|
-- Q:Metalua: does ast.lineinfo[loc].comments imply #ast.lineinfo[loc].comments > 0 ?
|
||
|
|
||
|
|
||
|
|
||
|
-- Generates ordered list of tokens in top_ast/src.
|
||
|
-- Note: currently ignores operators and parens.
|
||
|
-- Note: Modifies ast.
|
||
|
-- Note: Assumes ast Metalua-style lineinfo is valid.
|
||
|
-- CATEGORY: AST/tokenlist query
|
||
|
local isterminal = {Nil=true, Dots=true, True=true, False=true, Number=true, String=true,
|
||
|
Dots=true, Id=true}
|
||
|
local function compare_tokens_(atoken, btoken) return atoken.fpos < btoken.fpos end
|
||
|
function M.ast_to_tokenlist(top_ast, src)
|
||
|
local tokens = {} -- {nbytes=#src}
|
||
|
local isseen = {}
|
||
|
M.walk(top_ast, function(ast)
|
||
|
if isterminal[ast.tag] then -- Extract terminal
|
||
|
local token = ast
|
||
|
if ast.lineinfo then
|
||
|
token.fpos = tonumber(tostring(ast.lineinfo.first):match('|L(%d+)'))
|
||
|
token.lpos = tonumber(tostring(ast.lineinfo.last):match('|L(%d+)'))
|
||
|
token.ast = ast
|
||
|
table.insert(tokens, token)
|
||
|
end
|
||
|
else -- Extract non-terminal
|
||
|
local keywordposlist = M.get_keywords(ast, src)
|
||
|
for i=1,#keywordposlist,2 do
|
||
|
local fpos, lpos = keywordposlist[i], keywordposlist[i+1]
|
||
|
local toksrc = src:sub(fpos, lpos)
|
||
|
local token = {tag='Keyword', fpos=fpos, lpos=lpos, ast=ast, toksrc}
|
||
|
table.insert(tokens, token)
|
||
|
end
|
||
|
end
|
||
|
-- Extract comments
|
||
|
for i=1,2 do
|
||
|
local comments = ast.lineinfo and ast.lineinfo[i==1 and 'first' or 'last'].comments
|
||
|
if comments then for _, comment in ipairs(comments) do
|
||
|
if not isseen[comment] then
|
||
|
comment.tag = 'Comment'
|
||
|
local token = comment
|
||
|
token.fpos = tonumber(tostring(comment.lineinfo.first):match('|L(%d+)'))
|
||
|
token.lpos = tonumber(tostring(comment.lineinfo.last):match('|L(%d+)'))
|
||
|
token.ast = comment
|
||
|
table.insert(tokens, token)
|
||
|
isseen[comment] = true
|
||
|
end
|
||
|
end end
|
||
|
end
|
||
|
end, nil)
|
||
|
table.sort(tokens, compare_tokens_)
|
||
|
return tokens
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Gets tokenlist range [fidx,lidx] covered by ast. Returns nil,nil if not found.
|
||
|
--FIX:PERFORMANCE:this is slow on large files.
|
||
|
-- CATEGORY: AST/tokenlist query
|
||
|
function M.ast_idx_range_in_tokenlist(tokenlist, ast)
|
||
|
-- Get list of primary nodes under ast.
|
||
|
local isold = {}; M.walk(ast, function(ast) isold[ast] = true end)
|
||
|
-- Get range.
|
||
|
local fidx, lidx
|
||
|
for idx=1,#tokenlist do
|
||
|
local token = tokenlist[idx]
|
||
|
if isold[token.ast] then
|
||
|
lidx = idx
|
||
|
if not fidx then fidx = idx end
|
||
|
end
|
||
|
end
|
||
|
return fidx, lidx
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Gets index range in tokenlist overlapped by character position range [fpos, lpos].
|
||
|
-- For example, `do ff() end` with range ` ff() ` would match tokens `ff()`.
|
||
|
-- Tokens partly inside range are counted, so range `f()` would match tokens `ff()`.
|
||
|
-- If lidx = fidx - 1, then position range is whitespace between tokens lidx (on left)
|
||
|
-- and fidx (on right), and this may include token pseudoindices 0 (start of file) and
|
||
|
-- #tokenlist+1 (end of file).
|
||
|
-- Note: lpos == fpos - 1 indicates zero-width range between chars lpos and fpos.
|
||
|
-- CATEGORY: tokenlist query
|
||
|
function M.tokenlist_idx_range_over_pos_range(tokenlist, fpos, lpos)
|
||
|
-- Find first/last indices of tokens overlapped (even partly) by position range.
|
||
|
local fidx, lidx
|
||
|
for idx=1,#tokenlist do
|
||
|
local token = tokenlist[idx]
|
||
|
--if (token.fpos >= fpos and token.fpos <= lpos) or (token.lpos >= fpos and token.lpos <= lpos) then -- token overlaps range
|
||
|
if fpos <= token.lpos and lpos >= token.fpos then -- range overlaps token (even partially)
|
||
|
if not fidx then fidx = idx end
|
||
|
lidx = idx
|
||
|
end
|
||
|
end
|
||
|
if not fidx then -- on fail, check between tokens
|
||
|
for idx=1,#tokenlist+1 do -- between idx-1 and idx
|
||
|
local tokfpos, toklpos = tokenlist[idx-1] and tokenlist[idx-1].lpos, tokenlist[idx] and tokenlist[idx].fpos
|
||
|
if (not tokfpos or fpos > tokfpos) and (not toklpos or lpos < toklpos) then -- range between tokens
|
||
|
return idx, idx-1
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
return fidx, lidx
|
||
|
end
|
||
|
--[=[TESTSUITE
|
||
|
local function test(...)
|
||
|
return table.concat({M.tokenlist_idx_range_over_pos_range(...)}, ',')
|
||
|
end
|
||
|
check('==', test({}, 2, 2), "1,0") -- no tokens
|
||
|
check('==', test({{tag='Id', fpos=1, lpos=1}}, 2, 2), "2,1") -- right of one token
|
||
|
check('==', test({{tag='Id', fpos=3, lpos=3}}, 2, 2), "1,0") -- left of one token
|
||
|
check('==', test({{tag='Id', fpos=3, lpos=4}}, 2, 3), "1,1") -- left partial overlap one token
|
||
|
check('==', test({{tag='Id', fpos=3, lpos=4}}, 4, 5), "1,1") -- right partial overlap one token
|
||
|
check('==', test({{tag='Id', fpos=3, lpos=6}}, 4, 5), "1,1") -- partial inner overlap one token
|
||
|
check('==', test({{tag='Id', fpos=3, lpos=6}}, 3, 6), "1,1") -- exact overlap one token
|
||
|
check('==', test({{tag='Id', fpos=4, lpos=5}}, 3, 6), "1,1") -- extra overlap one token
|
||
|
check('==', test({{tag='Id', fpos=2, lpos=3}, {tag='Id', fpos=5, lpos=6}}, 4, 4), "2,1") -- between tokens, " " exact
|
||
|
check('==', test({{tag='Id', fpos=2, lpos=3}, {tag='Id', fpos=5, lpos=6}}, 4, 3), "2,1") -- between tokens, "" on left
|
||
|
check('==', test({{tag='Id', fpos=2, lpos=3}, {tag='Id', fpos=5, lpos=6}}, 5, 4), "2,1") -- between tokens, "" on right
|
||
|
check('==', test({{tag='Id', fpos=2, lpos=3}, {tag='Id', fpos=4, lpos=5}}, 4, 3), "2,1") -- between tokens, "" exact
|
||
|
--]=]
|
||
|
|
||
|
-- Removes tokens in tokenlist covered by ast.
|
||
|
-- CATEGORY: tokenlist manipulation
|
||
|
local function remove_ast_in_tokenlist(tokenlist, ast)
|
||
|
local fidx, lidx = M.ast_idx_range_in_tokenlist(tokenlist, ast)
|
||
|
if fidx then -- note: fidx implies lidx
|
||
|
for idx=lidx,fidx,-1 do table.remove(tokenlist, idx) end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Inserts tokens from btokenlist into tokenlist. Preserves sort.
|
||
|
-- CATEGORY: tokenlist manipulation
|
||
|
local function insert_tokenlist(tokenlist, btokenlist)
|
||
|
local ftoken = btokenlist[1]
|
||
|
if ftoken then
|
||
|
-- Get index in tokenlist in which to insert tokens in btokenlist.
|
||
|
local fidx
|
||
|
for idx=1,#tokenlist do
|
||
|
if tokenlist[idx].fpos > ftoken.fpos then fidx = idx; break end
|
||
|
end
|
||
|
fidx = fidx or #tokenlist + 1 -- else append
|
||
|
|
||
|
-- Insert tokens.
|
||
|
tinsertlist(tokenlist, fidx, btokenlist)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Get character position range covered by ast in tokenlist. Returns nil,nil on not found.
|
||
|
-- CATEGORY: AST/tokenlist query
|
||
|
function M.ast_pos_range(ast, tokenlist) -- IMPROVE:style: ast_idx_range_in_tokenlist has params reversed
|
||
|
local fidx, lidx = M.ast_idx_range_in_tokenlist(tokenlist, ast)
|
||
|
if fidx then
|
||
|
return tokenlist[fidx].fpos, tokenlist[lidx].lpos
|
||
|
else
|
||
|
return nil, nil
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Gets string representation of AST node. nil if none.
|
||
|
-- IMPROVE: what if node is empty block?
|
||
|
-- CATEGORY: AST/tokenlist query
|
||
|
function M.ast_to_text(ast, tokenlist, src) -- IMPROVE:style: ast_idx_range_in_tokenlist has params reversed
|
||
|
local fpos, lpos = M.ast_pos_range(ast, tokenlist)
|
||
|
if fpos then
|
||
|
return src:sub(fpos, lpos)
|
||
|
else
|
||
|
return nil
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
|
||
|
-- Gets smallest AST node in top_ast/tokenlist/src
|
||
|
-- completely containing position range [pos1, pos2].
|
||
|
-- careful: "function" is not part of the `Function node.
|
||
|
-- If range is inside comment, returns comment also.
|
||
|
-- If range is inside whitespace, then returns true in third return value.
|
||
|
-- CATEGORY: AST/tokenlist query
|
||
|
function M.smallest_ast_containing_range(top_ast, tokenlist, pos1, pos2)
|
||
|
local f0idx, l0idx = M.tokenlist_idx_range_over_pos_range(tokenlist, pos1, pos2)
|
||
|
|
||
|
-- Find enclosing AST.
|
||
|
M.ensure_parents_marked(top_ast)
|
||
|
local fidx, lidx = f0idx, l0idx
|
||
|
while tokenlist[fidx] and not tokenlist[fidx].ast.parent do fidx = fidx - 1 end
|
||
|
while tokenlist[lidx] and not tokenlist[lidx].ast.parent do lidx = lidx + 1 end
|
||
|
-- DEBUG(fidx, lidx, f0idx, l0idx, #tokenlist, pos1, pos2, tokenlist[fidx], tokenlist[lidx])
|
||
|
local ast = not (tokenlist[fidx] and tokenlist[lidx]) and top_ast or
|
||
|
M.common_ast_parent(tokenlist[fidx].ast, tokenlist[lidx].ast, top_ast)
|
||
|
-- DEBUG('m2', tokenlist[fidx], tokenlist[lidx], top_ast, ast, ast and ast.tag)
|
||
|
if l0idx == f0idx - 1 then -- whitespace
|
||
|
return ast, nil, true
|
||
|
elseif l0idx == f0idx and tokenlist[l0idx].tag == 'Comment' then
|
||
|
return ast, tokenlist[l0idx], nil
|
||
|
else
|
||
|
return ast, nil, nil
|
||
|
end
|
||
|
end
|
||
|
--IMPROVE: handle string edits and maybe others
|
||
|
|
||
|
|
||
|
-- Gets smallest statement block containing position pos or
|
||
|
-- nearest statement block before pos, whichever is smaller, given ast/tokenlist.
|
||
|
function M.current_statementblock(ast, tokenlist, pos)
|
||
|
local fidx,lidx = M.tokenlist_idx_range_over_pos_range(tokenlist, pos, pos)
|
||
|
if fidx > lidx then fidx = lidx end -- use nearest backward
|
||
|
|
||
|
-- Find closest AST node backward
|
||
|
while fidx >= 1 and tokenlist[fidx].tag == 'Comment' do fidx=fidx-1 end
|
||
|
|
||
|
if fidx < 1 then return ast, false end
|
||
|
local mast = tokenlist[fidx].ast
|
||
|
if not mast then return ast, false end
|
||
|
mast = M.get_containing_statementblock(mast, ast)
|
||
|
local isafter = false
|
||
|
if mast.tag2 ~= 'Block' then
|
||
|
local mfidx,mlidx = M.ast_idx_range_in_tokenlist(tokenlist, mast)
|
||
|
if pos > mlidx then
|
||
|
isafter = true
|
||
|
end
|
||
|
end
|
||
|
|
||
|
return mast, isafter
|
||
|
end
|
||
|
|
||
|
-- Gets index of bast in ast (nil if not found).
|
||
|
-- CATEGORY: AST query
|
||
|
function M.ast_idx(ast, bast)
|
||
|
for idx=1,#ast do
|
||
|
if ast[idx] == bast then return idx end
|
||
|
end
|
||
|
return nil
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Gets parent of ast and index of ast in parent.
|
||
|
-- Root node top_ast must also be provided. Returns nil, nil if ast is root.
|
||
|
-- Note: may call mark_parents.
|
||
|
-- CATEGORY: AST query
|
||
|
function M.ast_parent_idx(top_ast, ast)
|
||
|
if ast == top_ast then return nil, nil end
|
||
|
M.ensure_parents_marked(top_ast); assert(ast.parent)
|
||
|
local idx = M.ast_idx(ast.parent, ast)
|
||
|
return ast.parent, idx
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Gets common parent of aast and bast. Always returns value.
|
||
|
-- Must provide root top_ast too.
|
||
|
-- CATEGORY: AST query
|
||
|
function M.common_ast_parent(aast, bast, top_ast)
|
||
|
M.ensure_parents_marked(top_ast)
|
||
|
local isparent = {}
|
||
|
local tast = bast; repeat isparent[tast] = true; tast = tast.parent until not tast
|
||
|
local uast = aast; repeat if isparent[uast] then return uast end; uast = uast.parent until not uast
|
||
|
assert(false)
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Replaces old_ast with new_ast/new_tokenlist in top_ast/tokenlist.
|
||
|
-- Note: assumes new_ast is a block. assumes old_ast is a statement or block.
|
||
|
-- CATEGORY: AST/tokenlist
|
||
|
function M.replace_statements(top_ast, tokenlist, old_ast, new_ast, new_tokenlist)
|
||
|
remove_ast_in_tokenlist(tokenlist, old_ast)
|
||
|
insert_tokenlist(tokenlist, new_tokenlist)
|
||
|
if old_ast == top_ast then -- special case: no parent
|
||
|
M.switchtable(old_ast, new_ast) -- note: safe since block is not in tokenlist.
|
||
|
else
|
||
|
local parent_ast, idx = M.ast_parent_idx(top_ast, old_ast)
|
||
|
table.remove(parent_ast, idx)
|
||
|
tinsertlist(parent_ast, idx, new_ast)
|
||
|
end
|
||
|
|
||
|
-- fixup annotations
|
||
|
for _,bast in ipairs(new_ast) do
|
||
|
if top_ast.tag2 then M.mark_tag2(bast, bast.tag == 'Do' and 'StatBlock' or 'Block') end
|
||
|
if old_ast.parent then M.mark_parents(bast, old_ast.parent) end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Adjusts lineinfo in tokenlist.
|
||
|
-- All char positions starting at pos1 are shifted by delta number of chars.
|
||
|
-- CATEGORY: tokenlist
|
||
|
function M.adjust_lineinfo(tokenlist, pos1, delta)
|
||
|
for _,token in ipairs(tokenlist) do
|
||
|
if token.fpos >= pos1 then
|
||
|
token.fpos = token.fpos + delta
|
||
|
end
|
||
|
if token.lpos >= pos1 then
|
||
|
token.lpos = token.lpos + delta
|
||
|
end
|
||
|
end
|
||
|
--tokenlist.nbytes = tokenlist.nbytes + delta
|
||
|
end
|
||
|
|
||
|
|
||
|
-- For each node n in ast, sets n.parent to parent node of n.
|
||
|
-- Assumes ast.parent will be parent_ast (may be nil)
|
||
|
-- CATEGORY: AST query
|
||
|
function M.mark_parents(ast, parent_ast)
|
||
|
ast.parent = parent_ast
|
||
|
for _,ast2 in ipairs(ast) do
|
||
|
if type(ast2) == 'table' then
|
||
|
M.mark_parents(ast2, ast)
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Calls mark_parents(ast) if ast not marked.
|
||
|
-- CATEGORY: AST query
|
||
|
function M.ensure_parents_marked(ast)
|
||
|
if ast[1] and not ast[1].parent then M.mark_parents(ast) end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- For each node n in ast, sets n.tag2 to context string:
|
||
|
-- 'Block' - node is block
|
||
|
-- 'Stat' - node is statement
|
||
|
-- 'StatBlock' - node is statement and block (i.e. `Do)
|
||
|
-- 'Exp' - node is expression
|
||
|
-- 'Explist' - node is expression list (or identifier list)
|
||
|
-- 'Pair' - node is key-value pair in table constructor
|
||
|
-- note: ast.tag2 will be set to context.
|
||
|
-- CATEGORY: AST query
|
||
|
local iscertainstat = {Do=true, Set=true, While=true, Repeat=true, If=true,
|
||
|
Fornum=true, Forin=true, Local=true, Localrec=true, Return=true, Break=true}
|
||
|
function M.mark_tag2(ast, context)
|
||
|
context = context or 'Block'
|
||
|
ast.tag2 = context
|
||
|
for i,bast in ipairs(ast) do
|
||
|
if type(bast) == 'table' then
|
||
|
local nextcontext
|
||
|
if bast.tag == 'Do' then
|
||
|
nextcontext = 'StatBlock'
|
||
|
elseif iscertainstat[bast.tag] then
|
||
|
nextcontext = 'Stat'
|
||
|
elseif bast.tag == 'Call' or bast.tag == 'Invoke' then
|
||
|
nextcontext = context == 'Block' and 'Stat' or 'Exp'
|
||
|
--DESIGN:Metalua: these calls actually contain expression lists,
|
||
|
-- but the expression list is not represented as a complete node
|
||
|
-- by Metalua (as blocks are in `Do statements)
|
||
|
elseif bast.tag == 'Pair' then
|
||
|
nextcontext = 'Pair'
|
||
|
elseif not bast.tag then
|
||
|
if ast.tag == 'Set' or ast.tag == 'Local' or ast.tag == 'Localrec'
|
||
|
or ast.tag == 'Forin' and i <= 2
|
||
|
or ast.tag == 'Function' and i == 1
|
||
|
then
|
||
|
nextcontext = 'Explist'
|
||
|
else
|
||
|
nextcontext = 'Block'
|
||
|
end
|
||
|
else
|
||
|
nextcontext = 'Exp'
|
||
|
end
|
||
|
M.mark_tag2(bast, nextcontext)
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Gets smallest statement or block containing or being `ast`.
|
||
|
-- The AST root node `top_ast` must also be provided.
|
||
|
-- Note: may decorate AST as side-effect (mark_tag2/mark_parents).
|
||
|
-- top_ast is assumed a block, so this is always successful.
|
||
|
-- CATEGORY: AST query
|
||
|
function M.get_containing_statementblock(ast, top_ast)
|
||
|
if not top_ast.tag2 then M.mark_tag2(top_ast) end
|
||
|
if ast.tag2 == 'Stat' or ast.tag2 == 'StatBlock' or ast.tag2 == 'Block' then
|
||
|
return ast
|
||
|
else
|
||
|
M.ensure_parents_marked(top_ast)
|
||
|
return M.get_containing_statementblock(ast.parent, top_ast)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Finds smallest statement, block, or comment AST in ast/tokenlist containing position
|
||
|
-- range [fpos, lpos]. If allowexpand is true (default nil) and located AST
|
||
|
-- coincides with position range, then next containing statement is used
|
||
|
-- instead (this allows multiple calls to further expand the statement selection).
|
||
|
-- CATEGORY: AST query
|
||
|
function M.select_statementblockcomment(ast, tokenlist, fpos, lpos, allowexpand)
|
||
|
--IMPROVE: rename ast to top_ast
|
||
|
local match_ast, comment_ast = M.smallest_ast_containing_range(ast, tokenlist, fpos, lpos)
|
||
|
local select_ast = comment_ast or M.get_containing_statementblock(match_ast, ast)
|
||
|
local nfpos, nlpos = M.ast_pos_range(select_ast, tokenlist)
|
||
|
--DEBUG('s', nfpos, nlpos, fpos, lpos, match_ast.tag, select_ast.tag)
|
||
|
if allowexpand and fpos == nfpos and lpos == nlpos then
|
||
|
if comment_ast then
|
||
|
-- Select enclosing statement.
|
||
|
select_ast = match_ast
|
||
|
nfpos, nlpos = M.ast_pos_range(select_ast, tokenlist)
|
||
|
else
|
||
|
-- note: multiple times may be needed to expand selection. For example, in
|
||
|
-- `for x=1,2 do f() end` both the statement `f()` and block `f()` have
|
||
|
-- the same position range.
|
||
|
M.ensure_parents_marked(ast)
|
||
|
while select_ast.parent and fpos == nfpos and lpos == nlpos do
|
||
|
select_ast = M.get_containing_statementblock(select_ast.parent, ast)
|
||
|
nfpos, nlpos = M.ast_pos_range(select_ast, tokenlist)
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
return nfpos, nlpos
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Converts tokenlist to string representation for debugging.
|
||
|
-- CATEGORY: tokenlist debug
|
||
|
function M.dump_tokenlist(tokenlist)
|
||
|
local ts = {}
|
||
|
for i,token in ipairs(tokenlist) do
|
||
|
ts[#ts+1] = 'tok.' .. i .. ': [' .. token.fpos .. ',' .. token.lpos .. '] '
|
||
|
.. tostring(token[1]) .. ' ' .. tostring(token.ast.tag)
|
||
|
end
|
||
|
return table.concat(ts, '\n') -- .. 'nbytes=' .. tokenlist.nbytes .. '\n'
|
||
|
end
|
||
|
|
||
|
|
||
|
--FIX:Q: does this handle Unicode ok?
|
||
|
|
||
|
--FIX?:Metalua: fails on string with escape sequence '\/'. The Reference Manual
|
||
|
-- doesn't say this sequence is valid though.
|
||
|
|
||
|
--FIX:Metalua: In `local --[[x]] function --[[y]] f() end`,
|
||
|
-- 'x' comment omitted from AST.
|
||
|
|
||
|
--FIX:Metalua: `do --[[x]] end` doesn't generate comments in AST.
|
||
|
-- `if x then --[[x]] end` and `while 1 do --[[x]] end` generates
|
||
|
-- comments in first/last of block
|
||
|
|
||
|
--FIX:Metalua: `--[[x]] f() --[[y]]` returns lineinfo around `f()`.
|
||
|
-- `--[[x]] --[[y]]` returns lineinfo around everything.
|
||
|
|
||
|
--FIX:Metalua: `while 1 do --[[x]] --[[y]] end` returns first > last
|
||
|
-- lineinfo for contained block
|
||
|
|
||
|
--FIX:Metalua: search for "PATCHED:LuaInspect" in the metalualib folder.
|
||
|
|
||
|
--FIX?:Metalua: loadstring parses "--x" but metalua omits the comment in the AST
|
||
|
|
||
|
--FIX?:Metalua: `local x` is generating `Local{{`Id{x}}, {}}`, which
|
||
|
-- has no lineinfo on {}. This is contrary to the Metalua
|
||
|
-- spec: `Local{ {ident+} {expr+}? }.
|
||
|
-- Other things like `self` also generate no lineinfo.
|
||
|
-- The ast2.lineinfo above avoids this.
|
||
|
|
||
|
--FIX:Metalua: Metalua shouldn't overwrite ipairs/pairs. Note: Metalua version
|
||
|
-- doesn't set errorlevel correctly.
|
||
|
|
||
|
--Q:Metalua: Why does `return --[[y]] z --[[x]]` have
|
||
|
-- lineinfo.first.comments, lineinfo.last.comments,
|
||
|
-- plus lineinfo.comments (which is the same as lineinfo.first.comments) ?
|
||
|
|
||
|
--CAUTION:Metalua: `do f() end` returns lineinfo around `do f() end`, while
|
||
|
-- `while 1 do f() end` returns lineinfo around `f()` for inner block.
|
||
|
|
||
|
--CAUTION:Metalua: The lineinfo on Metalua comments is inconsistent with other
|
||
|
-- nodes
|
||
|
|
||
|
--CAUTION:Metalua: lineinfo of table in `f{}` is [3,2], of `f{ x,y }` it's [4,6].
|
||
|
-- This is inconsistent with `x={}` which is [3,4] and `f""` which is [1,2]
|
||
|
-- for the string.
|
||
|
|
||
|
--CAUTION:Metalua: only the `function()` form of `Function includes `function`
|
||
|
-- in lineinfo. 'function' is part of `Localrec and `Set in syntactic sugar form.
|
||
|
|
||
|
|
||
|
--[=[TESTSUITE
|
||
|
-- test longest_prefix/longest_postfix
|
||
|
local function pr(text1, text2)
|
||
|
local lastv
|
||
|
local function same(v)
|
||
|
assert(not lastv or v == lastv); lastv = v; return v
|
||
|
end
|
||
|
local function test1(text1, text2) -- test prefix/postfix
|
||
|
same(longest_prefix(text1, text2))
|
||
|
same(longest_postfix(text1:reverse(), text2:reverse()))
|
||
|
end
|
||
|
local function test2(text1, text2) -- test swap
|
||
|
test1(text1, text2)
|
||
|
test1(text2, text1)
|
||
|
end
|
||
|
for _,extra in ipairs{"", "x", "xy", "xyz"} do -- test extra chars
|
||
|
test2(text1, text2..extra)
|
||
|
test2(text2, text1..extra)
|
||
|
end
|
||
|
return lastv
|
||
|
end
|
||
|
check('==', pr("",""), 0)
|
||
|
check('==', pr("a",""), 0)
|
||
|
check('==', pr("a","a"), 1)
|
||
|
check('==', pr("ab",""), 0)
|
||
|
check('==', pr("ab","a"), 1)
|
||
|
check('==', pr("ab","ab"), 2)
|
||
|
check('==', pr("abcdefg","abcdefgh"), 7)
|
||
|
--]=]
|
||
|
|
||
|
--[=[TESTSUITE
|
||
|
print 'DONE'
|
||
|
--]=]
|
||
|
|
||
|
|
||
|
return M
|