Compatible Android

This commit is contained in:
Andros Fenollosa
2016-11-03 00:05:36 +01:00
parent 7cb6af1390
commit 8ec8327e5e
1793 changed files with 440698 additions and 7 deletions

View File

@ -0,0 +1,832 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Summary: parser generator. Collection of higher order functors,
-- which allow to build and combine parsers. Relies on a lexer
-- that supports the same API as the one exposed in mll.lua.
--
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Exported API:
--
-- Parser generators:
-- * [gg.sequence()]
-- * [gg.multisequence()]
-- * [gg.expr()]
-- * [gg.list()]
-- * [gg.onkeyword()]
-- * [gg.optkeyword()]
--
-- Other functions:
-- * [gg.parse_error()]
-- * [gg.make_parser()]
-- * [gg.is_parser()]
--
--------------------------------------------------------------------------------
local M = { }
local lexer = require 'metalua.grammar.lexer'
--------------------------------------------------------------------------------
-- Symbol generator: [gensym()] returns a guaranteed-to-be-unique identifier.
-- The main purpose is to avoid variable capture in macros.
--
-- If a string is passed as an argument, theis string will be part of the
-- id name (helpful for macro debugging)
--------------------------------------------------------------------------------
local gensymidx = 0
function M.gensym (arg)
gensymidx = gensymidx + 1
return { tag="Id", string.format(".%i.%s", gensymidx, arg or "")}
end
-------------------------------------------------------------------------------
-- parser metatable, which maps __call to method parse, and adds some
-- error tracing boilerplate.
-------------------------------------------------------------------------------
local parser_metatable = { }
function parser_metatable :__call (lx, ...)
return self :parse (lx, ...)
end
-------------------------------------------------------------------------------
-- Turn a table into a parser, mainly by setting the metatable.
-------------------------------------------------------------------------------
function M.make_parser(kind, p)
p.kind = kind
if not p.transformers then p.transformers = { } end
function p.transformers:add (x)
table.insert (self, x)
end
setmetatable (p, parser_metatable)
return p
end
-------------------------------------------------------------------------------
-- Return true iff [x] is a parser.
-- If it's a gg-generated parser, return the name of its kind.
-------------------------------------------------------------------------------
function M.is_parser (x)
return type(x)=="function" or getmetatable(x)==parser_metatable and x.kind
end
-------------------------------------------------------------------------------
-- Parse a sequence, without applying builder nor transformers.
-------------------------------------------------------------------------------
local function raw_parse_sequence (lx, p)
local r = { }
for i=1, #p do
local e=p[i]
if type(e) == "string" then
local kw = lx :next()
if not lx :is_keyword (kw, e) then
M.parse_error(
lx, "A keyword was expected, probably `%s'.", e)
end
elseif M.is_parser (e) then
table.insert (r, e(lx))
else -- Invalid parser definition, this is *not* a parsing error
error(string.format(
"Sequence `%s': element #%i is neither a string nor a parser: %s",
p.name, i, table.tostring(e)))
end
end
return r
end
-------------------------------------------------------------------------------
-- Parse a multisequence, without applying multisequence transformers.
-- The sequences are completely parsed.
-------------------------------------------------------------------------------
local function raw_parse_multisequence (lx, sequence_table, default)
local seq_parser = sequence_table[lx:is_keyword(lx:peek())]
if seq_parser then return seq_parser (lx)
elseif default then return default (lx)
else return false end
end
-------------------------------------------------------------------------------
-- Applies all transformers listed in parser on ast.
-------------------------------------------------------------------------------
local function transform (ast, parser, fli, lli)
if parser.transformers then
for _, t in ipairs (parser.transformers) do ast = t(ast) or ast end
end
if type(ast) == 'table' then
local ali = ast.lineinfo
if not ali or ali.first~=fli or ali.last~=lli then
ast.lineinfo = lexer.new_lineinfo(fli, lli)
end
end
return ast
end
-------------------------------------------------------------------------------
-- Generate a tracable parsing error (not implemented yet)
-------------------------------------------------------------------------------
function M.parse_error(lx, fmt, ...)
local li = lx:lineinfo_left()
local file, line, column, offset, positions
if li then
file, line, column, offset = li.source, li.line, li.column, li.offset
positions = { first = li, last = li }
else
line, column, offset = -1, -1, -1
end
local msg = string.format("line %i, char %i: "..fmt, line, column, ...)
if file and file~='?' then msg = "file "..file..", "..msg end
local src = lx.src
if offset>0 and src then
local i, j = offset, offset
while src:sub(i,i) ~= '\n' and i>=0 do i=i-1 end
while src:sub(j,j) ~= '\n' and j<=#src do j=j+1 end
local srcline = src:sub (i+1, j-1)
local idx = string.rep (" ", column).."^"
msg = string.format("%s\n>>> %s\n>>> %s", msg, srcline, idx)
end
--lx :kill()
error(msg)
end
-------------------------------------------------------------------------------
--
-- Sequence parser generator
--
-------------------------------------------------------------------------------
-- Input fields:
--
-- * [builder]: how to build an AST out of sequence parts. let [x] be the list
-- of subparser results (keywords are simply omitted). [builder] can be:
-- - [nil], in which case the result of parsing is simply [x]
-- - a string, which is then put as a tag on [x]
-- - a function, which takes [x] as a parameter and returns an AST.
--
-- * [name]: the name of the parser. Used for debug messages
--
-- * [transformers]: a list of AST->AST functions, applied in order on ASTs
-- returned by the parser.
--
-- * Table-part entries corresponds to keywords (strings) and subparsers
-- (function and callable objects).
--
-- After creation, the following fields are added:
-- * [parse] the parsing function lexer->AST
-- * [kind] == "sequence"
-- * [name] is set, if it wasn't in the input.
--
-------------------------------------------------------------------------------
function M.sequence (p)
M.make_parser ("sequence", p)
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p:parse (lx)
-- Raw parsing:
local fli = lx:lineinfo_right()
local seq = raw_parse_sequence (lx, self)
local lli = lx:lineinfo_left()
-- Builder application:
local builder, tb = self.builder, type (self.builder)
if tb == "string" then seq.tag = builder
elseif tb == "function" or builder and builder.__call then seq = builder(seq)
elseif builder == nil then -- nothing
else error ("Invalid builder of type "..tb.." in sequence") end
seq = transform (seq, self, fli, lli)
assert (not seq or seq.lineinfo)
return seq
end
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
-- Try to build a proper name
if p.name then
-- don't touch existing name
elseif type(p[1])=="string" then -- find name based on 1st keyword
if #p==1 then p.name=p[1]
elseif type(p[#p])=="string" then
p.name = p[1] .. " ... " .. p[#p]
else p.name = p[1] .. " ..." end
else -- can't find a decent name
p.name = "unnamed_sequence"
end
return p
end --</sequence>
-------------------------------------------------------------------------------
--
-- Multiple, keyword-driven, sequence parser generator
--
-------------------------------------------------------------------------------
-- in [p], useful fields are:
--
-- * [transformers]: as usual
--
-- * [name]: as usual
--
-- * Table-part entries must be sequence parsers, or tables which can
-- be turned into a sequence parser by [gg.sequence]. These
-- sequences must start with a keyword, and this initial keyword
-- must be different for each sequence. The table-part entries will
-- be removed after [gg.multisequence] returns.
--
-- * [default]: the parser to run if the next keyword in the lexer is
-- none of the registered initial keywords. If there's no default
-- parser and no suitable initial keyword, the multisequence parser
-- simply returns [false].
--
-- After creation, the following fields are added:
--
-- * [parse] the parsing function lexer->AST
--
-- * [sequences] the table of sequences, indexed by initial keywords.
--
-- * [add] method takes a sequence parser or a config table for
-- [gg.sequence], and adds/replaces the corresponding sequence
-- parser. If the keyword was already used, the former sequence is
-- removed and a warning is issued.
--
-- * [get] method returns a sequence by its initial keyword
--
-- * [kind] == "multisequence"
--
-------------------------------------------------------------------------------
function M.multisequence (p)
M.make_parser ("multisequence", p)
-------------------------------------------------------------------
-- Add a sequence (might be just a config table for [gg.sequence])
-------------------------------------------------------------------
function p :add (s)
-- compile if necessary:
local keyword = type(s)=='table' and s[1]
if type(s)=='table' and not M.is_parser(s) then M.sequence(s) end
if M.is_parser(s)~='sequence' or type(keyword)~='string' then
if self.default then -- two defaults
error ("In a multisequence parser, all but one sequences "..
"must start with a keyword")
else self.default = s end -- first default
else
if self.sequences[keyword] then -- duplicate keyword
-- TODO: warn that initial keyword `keyword` is overloaded in multiseq
end
self.sequences[keyword] = s
end
end -- </multisequence.add>
-------------------------------------------------------------------
-- Get the sequence starting with this keyword. [kw :: string]
-------------------------------------------------------------------
function p :get (kw) return self.sequences [kw] end
-------------------------------------------------------------------
-- Remove the sequence starting with keyword [kw :: string]
-------------------------------------------------------------------
function p :del (kw)
if not self.sequences[kw] then
-- TODO: warn that we try to delete a non-existent entry
end
local removed = self.sequences[kw]
self.sequences[kw] = nil
return removed
end
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p :parse (lx)
local fli = lx:lineinfo_right()
local x = raw_parse_multisequence (lx, self.sequences, self.default)
local lli = lx:lineinfo_left()
return transform (x, self, fli, lli)
end
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
-- Register the sequences passed to the constructor. They're going
-- from the array part of the parser to the hash part of field
-- [sequences]
p.sequences = { }
for i=1, #p do p :add (p[i]); p[i] = nil end
-- FIXME: why is this commented out?
--if p.default and not is_parser(p.default) then sequence(p.default) end
return p
end --</multisequence>
-------------------------------------------------------------------------------
--
-- Expression parser generator
--
-------------------------------------------------------------------------------
--
-- Expression configuration relies on three tables: [prefix], [infix]
-- and [suffix]. Moreover, the primary parser can be replaced by a
-- table: in this case the [primary] table will be passed to
-- [gg.multisequence] to create a parser.
--
-- Each of these tables is a modified multisequence parser: the
-- differences with respect to regular multisequence config tables are:
--
-- * the builder takes specific parameters:
-- - for [prefix], it takes the result of the prefix sequence parser,
-- and the prefixed expression
-- - for [infix], it takes the left-hand-side expression, the results
-- of the infix sequence parser, and the right-hand-side expression.
-- - for [suffix], it takes the suffixed expression, and the result
-- of the suffix sequence parser.
--
-- * the default field is a list, with parameters:
-- - [parser] the raw parsing function
-- - [transformers], as usual
-- - [prec], the operator's precedence
-- - [assoc] for [infix] table, the operator's associativity, which
-- can be "left", "right" or "flat" (default to left)
--
-- In [p], useful fields are:
-- * [transformers]: as usual
-- * [name]: as usual
-- * [primary]: the atomic expression parser, or a multisequence config
-- table (mandatory)
-- * [prefix]: prefix operators config table, see above.
-- * [infix]: infix operators config table, see above.
-- * [suffix]: suffix operators config table, see above.
--
-- After creation, these fields are added:
-- * [kind] == "expr"
-- * [parse] as usual
-- * each table is turned into a multisequence, and therefore has an
-- [add] method
--
-------------------------------------------------------------------------------
function M.expr (p)
M.make_parser ("expr", p)
-------------------------------------------------------------------
-- parser method.
-- In addition to the lexer, it takes an optional precedence:
-- it won't read expressions whose precedence is lower or equal
-- to [prec].
-------------------------------------------------------------------
function p :parse (lx, prec)
prec = prec or 0
------------------------------------------------------
-- Extract the right parser and the corresponding
-- options table, for (pre|in|suff)fix operators.
-- Options include prec, assoc, transformers.
------------------------------------------------------
local function get_parser_info (tab)
local p2 = tab :get (lx :is_keyword (lx :peek()))
if p2 then -- keyword-based sequence found
local function parser(lx) return raw_parse_sequence(lx, p2) end
return parser, p2
else -- Got to use the default parser
local d = tab.default
if d then return d.parse or d.parser, d
else return false, false end
end
end
------------------------------------------------------
-- Look for a prefix sequence. Multiple prefixes are
-- handled through the recursive [p.parse] call.
-- Notice the double-transform: one for the primary
-- expr, and one for the one with the prefix op.
------------------------------------------------------
local function handle_prefix ()
local fli = lx :lineinfo_right()
local p2_func, p2 = get_parser_info (self.prefix)
local op = p2_func and p2_func (lx)
if op then -- Keyword-based sequence found
local ili = lx :lineinfo_right() -- Intermediate LineInfo
local e = p2.builder (op, self :parse (lx, p2.prec))
local lli = lx :lineinfo_left()
return transform (transform (e, p2, ili, lli), self, fli, lli)
else -- No prefix found, get a primary expression
local e = self.primary(lx)
local lli = lx :lineinfo_left()
return transform (e, self, fli, lli)
end
end --</expr.parse.handle_prefix>
------------------------------------------------------
-- Look for an infix sequence+right-hand-side operand.
-- Return the whole binary expression result,
-- or false if no operator was found.
------------------------------------------------------
local function handle_infix (e)
local p2_func, p2 = get_parser_info (self.infix)
if not p2 then return false end
-----------------------------------------
-- Handle flattening operators: gather all operands
-- of the series in [list]; when a different operator
-- is found, stop, build from [list], [transform] and
-- return.
-----------------------------------------
if (not p2.prec or p2.prec>prec) and p2.assoc=="flat" then
local fli = lx:lineinfo_right()
local pflat, list = p2, { e }
repeat
local op = p2_func(lx)
if not op then break end
table.insert (list, self:parse (lx, p2.prec))
local _ -- We only care about checking that p2==pflat
_, p2 = get_parser_info (self.infix)
until p2 ~= pflat
local e2 = pflat.builder (list)
local lli = lx:lineinfo_left()
return transform (transform (e2, pflat, fli, lli), self, fli, lli)
-----------------------------------------
-- Handle regular infix operators: [e] the LHS is known,
-- just gather the operator and [e2] the RHS.
-- Result goes in [e3].
-----------------------------------------
elseif p2.prec and p2.prec>prec or
p2.prec==prec and p2.assoc=="right" then
local fli = e.lineinfo.first -- lx:lineinfo_right()
local op = p2_func(lx)
if not op then return false end
local e2 = self:parse (lx, p2.prec)
local e3 = p2.builder (e, op, e2)
local lli = lx:lineinfo_left()
return transform (transform (e3, p2, fli, lli), self, fli, lli)
-----------------------------------------
-- Check for non-associative operators, and complain if applicable.
-----------------------------------------
elseif p2.assoc=="none" and p2.prec==prec then
M.parse_error (lx, "non-associative operator!")
-----------------------------------------
-- No infix operator suitable at that precedence
-----------------------------------------
else return false end
end --</expr.parse.handle_infix>
------------------------------------------------------
-- Look for a suffix sequence.
-- Return the result of suffix operator on [e],
-- or false if no operator was found.
------------------------------------------------------
local function handle_suffix (e)
-- FIXME bad fli, must take e.lineinfo.first
local p2_func, p2 = get_parser_info (self.suffix)
if not p2 then return false end
if not p2.prec or p2.prec>=prec then
--local fli = lx:lineinfo_right()
local fli = e.lineinfo.first
local op = p2_func(lx)
if not op then return false end
local lli = lx:lineinfo_left()
e = p2.builder (e, op)
e = transform (transform (e, p2, fli, lli), self, fli, lli)
return e
end
return false
end --</expr.parse.handle_suffix>
------------------------------------------------------
-- Parser body: read suffix and (infix+operand)
-- extensions as long as we're able to fetch more at
-- this precedence level.
------------------------------------------------------
local e = handle_prefix()
repeat
local x = handle_suffix (e); e = x or e
local y = handle_infix (e); e = y or e
until not (x or y)
-- No transform: it already happened in operators handling
return e
end --</expr.parse>
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
if not p.primary then p.primary=p[1]; p[1]=nil end
for _, t in ipairs{ "primary", "prefix", "infix", "suffix" } do
if not p[t] then p[t] = { } end
if not M.is_parser(p[t]) then M.multisequence(p[t]) end
end
function p:add(...) return self.primary:add(...) end
return p
end --</expr>
-------------------------------------------------------------------------------
--
-- List parser generator
--
-------------------------------------------------------------------------------
-- In [p], the following fields can be provided in input:
--
-- * [builder]: takes list of subparser results, returns AST
-- * [transformers]: as usual
-- * [name]: as usual
--
-- * [terminators]: list of strings representing the keywords which
-- might mark the end of the list. When non-empty, the list is
-- allowed to be empty. A string is treated as a single-element
-- table, whose element is that string, e.g. ["do"] is the same as
-- [{"do"}].
--
-- * [separators]: list of strings representing the keywords which can
-- separate elements of the list. When non-empty, one of these
-- keyword has to be found between each element. Lack of a separator
-- indicates the end of the list. A string is treated as a
-- single-element table, whose element is that string, e.g. ["do"]
-- is the same as [{"do"}]. If [terminators] is empty/nil, then
-- [separators] has to be non-empty.
--
-- After creation, the following fields are added:
-- * [parse] the parsing function lexer->AST
-- * [kind] == "list"
--
-------------------------------------------------------------------------------
function M.list (p)
M.make_parser ("list", p)
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p :parse (lx)
------------------------------------------------------
-- Used to quickly check whether there's a terminator
-- or a separator immediately ahead
------------------------------------------------------
local function peek_is_in (keywords)
return keywords and lx:is_keyword(lx:peek(), unpack(keywords)) end
local x = { }
local fli = lx :lineinfo_right()
-- if there's a terminator to start with, don't bother trying
local is_empty_list = self.terminators and (peek_is_in (self.terminators) or lx:peek().tag=="Eof")
if not is_empty_list then
repeat
local item = self.primary(lx)
table.insert (x, item) -- read one element
until
-- There's a separator list specified, and next token isn't in it.
-- Otherwise, consume it with [lx:next()]
self.separators and not(peek_is_in (self.separators) and lx:next()) or
-- Terminator token ahead
peek_is_in (self.terminators) or
-- Last reason: end of file reached
lx:peek().tag=="Eof"
end
local lli = lx:lineinfo_left()
-- Apply the builder. It can be a string, or a callable value,
-- or simply nothing.
local b = self.builder
if b then
if type(b)=="string" then x.tag = b -- b is a string, use it as a tag
elseif type(b)=="function" then x=b(x)
else
local bmt = getmetatable(b)
if bmt and bmt.__call then x=b(x) end
end
end
return transform (x, self, fli, lli)
end --</list.parse>
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
if not p.primary then p.primary = p[1]; p[1] = nil end
if type(p.terminators) == "string" then p.terminators = { p.terminators }
elseif p.terminators and #p.terminators == 0 then p.terminators = nil end
if type(p.separators) == "string" then p.separators = { p.separators }
elseif p.separators and #p.separators == 0 then p.separators = nil end
return p
end --</list>
-------------------------------------------------------------------------------
--
-- Keyword-conditioned parser generator
--
-------------------------------------------------------------------------------
--
-- Only apply a parser if a given keyword is found. The result of
-- [gg.onkeyword] parser is the result of the subparser (modulo
-- [transformers] applications).
--
-- lineinfo: the keyword is *not* included in the boundaries of the
-- resulting lineinfo. A review of all usages of gg.onkeyword() in the
-- implementation of metalua has shown that it was the appropriate choice
-- in every case.
--
-- Input fields:
--
-- * [name]: as usual
--
-- * [transformers]: as usual
--
-- * [peek]: if non-nil, the conditioning keyword is left in the lexeme
-- stream instead of being consumed.
--
-- * [primary]: the subparser.
--
-- * [keywords]: list of strings representing triggering keywords.
--
-- * Table-part entries can contain strings, and/or exactly one parser.
-- Strings are put in [keywords], and the parser is put in [primary].
--
-- After the call, the following fields will be set:
--
-- * [parse] the parsing method
-- * [kind] == "onkeyword"
-- * [primary]
-- * [keywords]
--
-------------------------------------------------------------------------------
function M.onkeyword (p)
M.make_parser ("onkeyword", p)
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p :parse (lx)
if lx :is_keyword (lx:peek(), unpack(self.keywords)) then
local fli = lx:lineinfo_right()
if not self.peek then lx:next() end
local content = self.primary (lx)
local lli = lx:lineinfo_left()
local li = content.lineinfo or { }
fli, lli = li.first or fli, li.last or lli
return transform (content, p, fli, lli)
else return false end
end
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
if not p.keywords then p.keywords = { } end
for _, x in ipairs(p) do
if type(x)=="string" then table.insert (p.keywords, x)
else assert (not p.primary and M.is_parser (x)); p.primary = x end
end
assert (next (p.keywords), "Missing trigger keyword in gg.onkeyword")
assert (p.primary, 'no primary parser in gg.onkeyword')
return p
end --</onkeyword>
-------------------------------------------------------------------------------
--
-- Optional keyword consummer pseudo-parser generator
--
-------------------------------------------------------------------------------
--
-- This doesn't return a real parser, just a function. That function parses
-- one of the keywords passed as parameters, and returns it. It returns
-- [false] if no matching keyword is found.
--
-- Notice that tokens returned by lexer already carry lineinfo, therefore
-- there's no need to add them, as done usually through transform() calls.
-------------------------------------------------------------------------------
function M.optkeyword (...)
local args = {...}
if type (args[1]) == "table" then
assert (#args == 1)
args = args[1]
end
for _, v in ipairs(args) do assert (type(v)=="string") end
return function (lx)
local x = lx:is_keyword (lx:peek(), unpack (args))
if x then lx:next(); return x
else return false end
end
end
-------------------------------------------------------------------------------
--
-- Run a parser with a special lexer
--
-------------------------------------------------------------------------------
--
-- This doesn't return a real parser, just a function.
-- First argument is the lexer class to be used with the parser,
-- 2nd is the parser itself.
-- The resulting parser returns whatever the argument parser does.
--
-------------------------------------------------------------------------------
function M.with_lexer(new_lexer, parser)
-------------------------------------------------------------------
-- Most gg functions take their parameters in a table, so it's
-- better to silently accept when with_lexer{ } is called with
-- its arguments in a list:
-------------------------------------------------------------------
if not parser and #new_lexer==2 and type(new_lexer[1])=='table' then
return M.with_lexer(unpack(new_lexer))
end
-------------------------------------------------------------------
-- Save the current lexer, switch it for the new one, run the parser,
-- restore the previous lexer, even if the parser caused an error.
-------------------------------------------------------------------
return function (lx)
local old_lexer = getmetatable(lx)
lx:sync()
setmetatable(lx, new_lexer)
local status, result = pcall(parser, lx)
lx:sync()
setmetatable(lx, old_lexer)
if status then return result else error(result) end
end
end
--------------------------------------------------------------------------------
--
-- Make sure a parser is used and returns successfully.
--
--------------------------------------------------------------------------------
function M.nonempty(primary)
local p = M.make_parser('non-empty list', { primary = primary, name=primary.name })
function p :parse (lx)
local fli = lx:lineinfo_right()
local content = self.primary (lx)
local lli = lx:lineinfo_left()
local li = content.lineinfo or { }
fli, lli = li.first or fli, li.last or lli
if #content == 0 then
M.parse_error (lx, "`%s' must not be empty.", self.name or "list")
else
return transform (content, self, fli, lli)
end
end
return p
end
local FUTURE_MT = { }
function FUTURE_MT:__tostring() return "<Proxy parser module>" end
function FUTURE_MT:__newindex(key, value) error "don't write in futures" end
function FUTURE_MT :__index (parser_name)
return function(...)
local p, m = rawget(self, '__path'), self.__module
if p then for _, name in ipairs(p) do
m=rawget(m, name)
if not m then error ("Submodule '"..name.."' undefined") end
end end
local f = rawget(m, parser_name)
if not f then error ("Parser '"..parser_name.."' undefined") end
return f(...)
end
end
function M.future(module, ...)
checks('table')
local path = ... and {...}
if path then for _, x in ipairs(path) do
assert(type(x)=='string', "Bad future arg")
end end
local self = { __module = module,
__path = path }
return setmetatable(self, FUTURE_MT)
end
return M

View File

@ -0,0 +1,678 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
require 'checks'
local M = { }
local lexer = { alpha={ }, sym={ } }
lexer.__index=lexer
lexer.__type='lexer.stream'
M.lexer = lexer
local debugf = function() end
-- local debugf=printf
----------------------------------------------------------------------
-- Some locale settings produce bad results, e.g. French locale
-- expect float numbers to use commas instead of periods.
-- TODO: change number parser into something loclae-independent,
-- locales are nasty.
----------------------------------------------------------------------
os.setlocale('C')
local MT = { }
M.metatables=MT
----------------------------------------------------------------------
-- Create a new metatable, for a new class of objects.
----------------------------------------------------------------------
local function new_metatable(name)
local mt = { __type = 'lexer.'..name };
mt.__index = mt
MT[name] = mt
end
----------------------------------------------------------------------
-- Position: represent a point in a source file.
----------------------------------------------------------------------
new_metatable 'position'
local position_idx=1
function M.new_position(line, column, offset, source)
checks('number', 'number', 'number', 'string')
local id = position_idx; position_idx = position_idx+1
return setmetatable({line=line, column=column, offset=offset,
source=source, id=id}, MT.position)
end
function MT.position :__tostring()
return string.format("<%s%s|L%d|C%d|K%d>",
self.comments and "C|" or "",
self.source, self.line, self.column, self.offset)
end
----------------------------------------------------------------------
-- Position factory: convert offsets into line/column/offset positions.
----------------------------------------------------------------------
new_metatable 'position_factory'
function M.new_position_factory(src, src_name)
-- assert(type(src)=='string')
-- assert(type(src_name)=='string')
local lines = { 1 }
for offset in src :gmatch '\n()' do table.insert(lines, offset) end
local max = #src+1
table.insert(lines, max+1) -- +1 includes Eof
return setmetatable({ src_name=src_name, line2offset=lines, max=max },
MT.position_factory)
end
function MT.position_factory :get_position (offset)
-- assert(type(offset)=='number')
assert(offset<=self.max)
local line2offset = self.line2offset
local left = self.last_left or 1
if offset<line2offset[left] then left=1 end
local right = left+1
if line2offset[right]<=offset then right = right+1 end
if line2offset[right]<=offset then right = #line2offset end
while true do
-- print (" trying lines "..left.."/"..right..", offsets "..line2offset[left]..
-- "/"..line2offset[right].." for offset "..offset)
-- assert(line2offset[left]<=offset)
-- assert(offset<line2offset[right])
-- assert(left<right)
if left+1==right then break end
local middle = math.floor((left+right)/2)
if line2offset[middle]<=offset then left=middle else right=middle end
end
-- assert(left+1==right)
-- printf("found that offset %d is between %d and %d, hence on line %d",
-- offset, line2offset[left], line2offset[right], left)
local line = left
local column = offset - line2offset[line] + 1
self.last_left = left
return M.new_position(line, column, offset, self.src_name)
end
----------------------------------------------------------------------
-- Lineinfo: represent a node's range in a source file;
-- embed information about prefix and suffix comments.
----------------------------------------------------------------------
new_metatable 'lineinfo'
function M.new_lineinfo(first, last)
checks('lexer.position', 'lexer.position')
return setmetatable({first=first, last=last}, MT.lineinfo)
end
function MT.lineinfo :__tostring()
local fli, lli = self.first, self.last
local line = fli.line; if line~=lli.line then line =line ..'-'..lli.line end
local column = fli.column; if column~=lli.column then column=column..'-'..lli.column end
local offset = fli.offset; if offset~=lli.offset then offset=offset..'-'..lli.offset end
return string.format("<%s%s|L%s|C%s|K%s%s>",
fli.comments and "C|" or "",
fli.source, line, column, offset,
lli.comments and "|C" or "")
end
----------------------------------------------------------------------
-- Token: atomic Lua language element, with a category, a content,
-- and some lineinfo relating it to its original source.
----------------------------------------------------------------------
new_metatable 'token'
function M.new_token(tag, content, lineinfo)
--printf("TOKEN `%s{ %q, lineinfo = %s} boundaries %d, %d",
-- tag, content, tostring(lineinfo), lineinfo.first.id, lineinfo.last.id)
return setmetatable({tag=tag, lineinfo=lineinfo, content}, MT.token)
end
function MT.token :__tostring()
--return string.format("`%s{ %q, %s }", self.tag, self[1], tostring(self.lineinfo))
return string.format("`%s %q", self.tag, self[1])
end
----------------------------------------------------------------------
-- Comment: series of comment blocks with associated lineinfo.
-- To be attached to the tokens just before and just after them.
----------------------------------------------------------------------
new_metatable 'comment'
function M.new_comment(lines)
local first = lines[1].lineinfo.first
local last = lines[#lines].lineinfo.last
local lineinfo = M.new_lineinfo(first, last)
return setmetatable({lineinfo=lineinfo, unpack(lines)}, MT.comment)
end
function MT.comment :text()
local last_line = self[1].lineinfo.last.line
local acc = { }
for i, line in ipairs(self) do
local nreturns = line.lineinfo.first.line - last_line
table.insert(acc, ("\n"):rep(nreturns))
table.insert(acc, line[1])
end
return table.concat(acc)
end
function M.new_comment_line(text, lineinfo, nequals)
checks('string', 'lexer.lineinfo', '?number')
return { lineinfo = lineinfo, text, nequals }
end
----------------------------------------------------------------------
-- Patterns used by [lexer :extract] to decompose the raw string into
-- correctly tagged tokens.
----------------------------------------------------------------------
lexer.patterns = {
spaces = "^[ \r\n\t]*()",
short_comment = "^%-%-([^\n]*)\n?()",
--final_short_comment = "^%-%-([^\n]*)()$",
long_comment = "^%-%-%[(=*)%[\n?(.-)%]%1%]()",
long_string = "^%[(=*)%[\n?(.-)%]%1%]()",
number_longint = "^%d+[uU]?[lL][lL]()",
number_longint_hex = "^%x+[uU]?[lL][lL]()",
number_mantissa = { "^%d+%.?%d*()", "^%d*%.%d+()" },
number_mantissa_hex = { "^%x+%.?%x*()", "^%x*%.%x+()" }, --Lua5.1 and Lua5.2
number_exponent = "^[eE][%+%-]?%d+()",
number_exponent_hex = "^[pP][%+%-]?%d+()", --Lua5.2
number_hex = "^0[xX]()",
number_imaginary = "^[iI]()",
word = "^([%a_][%w_]*)()",
}
----------------------------------------------------------------------
-- unescape a whole string, applying [unesc_digits] and
-- [unesc_letter] as many times as required.
----------------------------------------------------------------------
local function unescape_string (s)
-- Turn the digits of an escape sequence into the corresponding
-- character, e.g. [unesc_digits("123") == string.char(123)].
local function unesc_digits (backslashes, digits)
if #backslashes%2==0 then
-- Even number of backslashes, they escape each other, not the digits.
-- Return them so that unesc_letter() can treat them
return backslashes..digits
else
-- Remove the odd backslash, which escapes the number sequence.
-- The rest will be returned and parsed by unesc_letter()
backslashes = backslashes :sub (1,-2)
end
local k, j, i = digits :reverse() :byte(1, 3)
local z = string.byte "0"
local code = (k or z) + 10*(j or z) + 100*(i or z) - 111*z
if code > 255 then
error ("Illegal escape sequence '\\"..digits..
"' in string: ASCII codes must be in [0..255]")
end
local c = string.char (code)
if c == '\\' then c = '\\\\' end -- parsed by unesc_letter (test: "\092b" --> "\\b")
return backslashes..c
end
-- Turn hex digits of escape sequence into char.
local function unesc_hex(backslashes, digits)
if #backslashes%2==0 then
return backslashes..'x'..digits
else
backslashes = backslashes :sub (1,-2)
end
local c = string.char(tonumber(digits,16))
if c == '\\' then c = '\\\\' end -- parsed by unesc_letter (test: "\x5cb" --> "\\b")
return backslashes..c
end
-- Handle Lua 5.2 \z sequences
local function unesc_z(backslashes, more)
if #backslashes%2==0 then
return backslashes..more
else
return backslashes :sub (1,-2)
end
end
-- Take a letter [x], and returns the character represented by the
-- sequence ['\\'..x], e.g. [unesc_letter "n" == "\n"].
local function unesc_letter(x)
local t = {
a = "\a", b = "\b", f = "\f",
n = "\n", r = "\r", t = "\t", v = "\v",
["\\"] = "\\", ["'"] = "'", ['"'] = '"', ["\n"] = "\n" }
return t[x] or x
end
s = s: gsub ("(\\+)(z%s*)", unesc_z) -- Lua 5.2
s = s: gsub ("(\\+)([0-9][0-9]?[0-9]?)", unesc_digits)
s = s: gsub ("(\\+)x([0-9a-fA-F][0-9a-fA-F])", unesc_hex) -- Lua 5.2
s = s: gsub ("\\(%D)",unesc_letter)
return s
end
lexer.extractors = {
"extract_long_comment", "extract_short_comment",
"extract_short_string", "extract_word", "extract_number",
"extract_long_string", "extract_symbol" }
----------------------------------------------------------------------
-- Really extract next token from the raw string
-- (and update the index).
-- loc: offset of the position just after spaces and comments
-- previous_i: offset in src before extraction began
----------------------------------------------------------------------
function lexer :extract ()
local attached_comments = { }
local function gen_token(...)
local token = M.new_token(...)
if #attached_comments>0 then -- attach previous comments to token
local comments = M.new_comment(attached_comments)
token.lineinfo.first.comments = comments
if self.lineinfo_last_extracted then
self.lineinfo_last_extracted.comments = comments
end
attached_comments = { }
end
token.lineinfo.first.facing = self.lineinfo_last_extracted
self.lineinfo_last_extracted.facing = assert(token.lineinfo.first)
self.lineinfo_last_extracted = assert(token.lineinfo.last)
return token
end
while true do -- loop until a non-comment token is found
-- skip whitespaces
self.i = self.src:match (self.patterns.spaces, self.i)
if self.i>#self.src then
local fli = self.posfact :get_position (#self.src+1)
local lli = self.posfact :get_position (#self.src+1) -- ok?
local tok = gen_token("Eof", "eof", M.new_lineinfo(fli, lli))
tok.lineinfo.last.facing = lli
return tok
end
local i_first = self.i -- loc = position after whitespaces
-- try every extractor until a token is found
for _, extractor in ipairs(self.extractors) do
local tag, content, xtra = self [extractor] (self)
if tag then
local fli = self.posfact :get_position (i_first)
local lli = self.posfact :get_position (self.i-1)
local lineinfo = M.new_lineinfo(fli, lli)
if tag=='Comment' then
local prev_comment = attached_comments[#attached_comments]
if not xtra -- new comment is short
and prev_comment and not prev_comment[2] -- prev comment is short
and prev_comment.lineinfo.last.line+1==fli.line then -- adjascent lines
-- concat with previous comment
prev_comment[1] = prev_comment[1].."\n"..content -- TODO quadratic, BAD!
prev_comment.lineinfo.last = lli
else -- accumulate comment
local comment = M.new_comment_line(content, lineinfo, xtra)
table.insert(attached_comments, comment)
end
break -- back to skipping spaces
else -- not a comment: real token, then
return gen_token(tag, content, lineinfo)
end -- if token is a comment
end -- if token found
end -- for each extractor
end -- while token is a comment
end -- :extract()
----------------------------------------------------------------------
-- Extract a short comment.
----------------------------------------------------------------------
function lexer :extract_short_comment()
-- TODO: handle final_short_comment
local content, j = self.src :match (self.patterns.short_comment, self.i)
if content then self.i=j; return 'Comment', content, nil end
end
----------------------------------------------------------------------
-- Extract a long comment.
----------------------------------------------------------------------
function lexer :extract_long_comment()
local equals, content, j = self.src:match (self.patterns.long_comment, self.i)
if j then self.i = j; return "Comment", content, #equals end
end
----------------------------------------------------------------------
-- Extract a '...' or "..." short string.
----------------------------------------------------------------------
function lexer :extract_short_string()
local k = self.src :sub (self.i,self.i) -- first char
if k~=[[']] and k~=[["]] then return end -- no match'
local i = self.i + 1
local j = i
while true do
local x,y; x, j, y = self.src :match ("([\\\r\n"..k.."])()(.?)", j) -- next interesting char
if x == '\\' then
if y == 'z' then -- Lua 5.2 \z
j = self.src :match ("^%s*()", j+1)
else
j=j+1 -- escaped char
end
elseif x == k then break -- end of string
else
assert (not x or x=='\r' or x=='\n')
return nil, 'Unterminated string'
end
end
self.i = j
return 'String', unescape_string (self.src :sub (i,j-2))
end
----------------------------------------------------------------------
-- Extract Id or Keyword.
----------------------------------------------------------------------
function lexer :extract_word()
local word, j = self.src:match (self.patterns.word, self.i)
if word then
self.i = j
return (self.alpha [word] and 'Keyword' or 'Id'), word
end
end
----------------------------------------------------------------------
-- Extract Number.
----------------------------------------------------------------------
function lexer :extract_number()
local patt = self.patterns
local s = self.src
local j = s:match(patt.number_hex, self.i)
local hex = j ~= nil
local longint = hex and patt.number_longint_hex or patt.number_longint
local mantissa1 = hex and patt.number_mantissa_hex[1] or patt.number_mantissa[1]
local mantissa2 = hex and patt.number_mantissa_hex[2] or patt.number_mantissa[2]
local exponent = hex and patt.number_exponent_hex or patt.number_exponent
if not hex then j = self.i end
local t = s:match(longint, j)
if t then
j = t
else
j = s:match(mantissa1, j) or s:match(mantissa2, j)
if not j then return end
j = s:match(exponent, j) or j
j = s:match(patt.number_imaginary, j) or j
end
local str = self.src:sub (self.i, j-1)
self.i = j
-- Number found, interpret with tonumber() and return it
-- return str as the fallback when processing formats not supported by the current interpreter
return 'Number', (tonumber (str) or str)
end
----------------------------------------------------------------------
-- Extract long string.
----------------------------------------------------------------------
function lexer :extract_long_string()
local _, content, j = self.src :match (self.patterns.long_string, self.i)
if j then self.i = j; return 'String', content end
end
----------------------------------------------------------------------
-- Extract symbol.
----------------------------------------------------------------------
function lexer :extract_symbol()
local k = self.src:sub (self.i,self.i)
local symk = self.sym [k] -- symbols starting with `k`
if not symk then
self.i = self.i + 1
return 'Keyword', k
end
for _, sym in pairs (symk) do
if sym == self.src:sub (self.i, self.i + #sym - 1) then
self.i = self.i + #sym
return 'Keyword', sym
end
end
self.i = self.i+1
return 'Keyword', k
end
----------------------------------------------------------------------
-- Add a keyword to the list of keywords recognized by the lexer.
----------------------------------------------------------------------
function lexer :add (w, ...)
assert(not ..., "lexer :add() takes only one arg, although possibly a table")
if type (w) == "table" then
for _, x in ipairs (w) do self :add (x) end
else
if w:match (self.patterns.word .. "$") then self.alpha [w] = true
elseif w:match "^%p%p+$" then
local k = w:sub(1,1)
local list = self.sym [k]
if not list then list = { }; self.sym [k] = list end
table.insert (list, w)
elseif w:match "^%p$" then return
else error "Invalid keyword" end
end
end
----------------------------------------------------------------------
-- Return the [n]th next token, without consuming it.
-- [n] defaults to 1. If it goes pass the end of the stream, an EOF
-- token is returned.
----------------------------------------------------------------------
function lexer :peek (n)
if not n then n=1 end
if n > #self.peeked then
for i = #self.peeked+1, n do
self.peeked [i] = self :extract()
end
end
return self.peeked [n]
end
----------------------------------------------------------------------
-- Return the [n]th next token, removing it as well as the 0..n-1
-- previous tokens. [n] defaults to 1. If it goes pass the end of the
-- stream, an EOF token is returned.
----------------------------------------------------------------------
function lexer :next (n)
n = n or 1
self :peek (n)
local a
for i=1,n do
a = table.remove (self.peeked, 1)
-- TODO: is this used anywhere? I think not. a.lineinfo.last may be nil.
--self.lastline = a.lineinfo.last.line
end
self.lineinfo_last_consumed = a.lineinfo.last
return a
end
----------------------------------------------------------------------
-- Returns an object which saves the stream's current state.
----------------------------------------------------------------------
-- FIXME there are more fields than that to save
function lexer :save () return { self.i; {unpack(self.peeked) } } end
----------------------------------------------------------------------
-- Restore the stream's state, as saved by method [save].
----------------------------------------------------------------------
-- FIXME there are more fields than that to restore
function lexer :restore (s) self.i=s[1]; self.peeked=s[2] end
----------------------------------------------------------------------
-- Resynchronize: cancel any token in self.peeked, by emptying the
-- list and resetting the indexes
----------------------------------------------------------------------
function lexer :sync()
local p1 = self.peeked[1]
if p1 then
local li_first = p1.lineinfo.first
if li_first.comments then li_first=li_first.comments.lineinfo.first end
self.i = li_first.offset
self.column_offset = self.i - li_first.column
self.peeked = { }
self.attached_comments = p1.lineinfo.first.comments or { }
end
end
----------------------------------------------------------------------
-- Take the source and offset of an old lexer.
----------------------------------------------------------------------
function lexer :takeover(old)
self :sync(); old :sync()
for _, field in ipairs{ 'i', 'src', 'attached_comments', 'posfact' } do
self[field] = old[field]
end
return self
end
----------------------------------------------------------------------
-- Return the current position in the sources. This position is between
-- two tokens, and can be within a space / comment area, and therefore
-- have a non-null width. :lineinfo_left() returns the beginning of the
-- separation area, :lineinfo_right() returns the end of that area.
--
-- ____ last consummed token ____ first unconsummed token
-- / /
-- XXXXX <spaces and comments> YYYYY
-- \____ \____
-- :lineinfo_left() :lineinfo_right()
----------------------------------------------------------------------
function lexer :lineinfo_right()
return self :peek(1).lineinfo.first
end
function lexer :lineinfo_left()
return self.lineinfo_last_consumed
end
----------------------------------------------------------------------
-- Create a new lexstream.
----------------------------------------------------------------------
function lexer :newstream (src_or_stream, name)
name = name or "?"
if type(src_or_stream)=='table' then -- it's a stream
return setmetatable ({ }, self) :takeover (src_or_stream)
elseif type(src_or_stream)=='string' then -- it's a source string
local src = src_or_stream
local pos1 = M.new_position(1, 1, 1, name)
local stream = {
src_name = name; -- Name of the file
src = src; -- The source, as a single string
peeked = { }; -- Already peeked, but not discarded yet, tokens
i = 1; -- Character offset in src
attached_comments = { },-- comments accumulator
lineinfo_last_extracted = pos1,
lineinfo_last_consumed = pos1,
posfact = M.new_position_factory (src_or_stream, name)
}
setmetatable (stream, self)
-- Skip initial sharp-bang for Unix scripts
-- FIXME: redundant with mlp.chunk()
if src and src :match "^#!" then
local endofline = src :find "\n"
stream.i = endofline and (endofline + 1) or #src
end
return stream
else
assert(false, ":newstream() takes a source string or a stream, not a "..
type(src_or_stream))
end
end
----------------------------------------------------------------------
-- If there's no ... args, return the token a (whose truth value is
-- true) if it's a `Keyword{ }, or nil. If there are ... args, they
-- have to be strings. if the token a is a keyword, and it's content
-- is one of the ... args, then returns it (it's truth value is
-- true). If no a keyword or not in ..., return nil.
----------------------------------------------------------------------
function lexer :is_keyword (a, ...)
if not a or a.tag ~= "Keyword" then return false end
local words = {...}
if #words == 0 then return a[1] end
for _, w in ipairs (words) do
if w == a[1] then return w end
end
return false
end
----------------------------------------------------------------------
-- Cause an error if the next token isn't a keyword whose content
-- is listed among ... args (which have to be strings).
----------------------------------------------------------------------
function lexer :check (...)
local words = {...}
local a = self :next()
local function err ()
error ("Got " .. tostring (a) ..
", expected one of these keywords : '" ..
table.concat (words,"', '") .. "'") end
if not a or a.tag ~= "Keyword" then err () end
if #words == 0 then return a[1] end
for _, w in ipairs (words) do
if w == a[1] then return w end
end
err ()
end
----------------------------------------------------------------------
--
----------------------------------------------------------------------
function lexer :clone()
local alpha_clone, sym_clone = { }, { }
for word in pairs(self.alpha) do alpha_clone[word]=true end
for letter, list in pairs(self.sym) do sym_clone[letter] = { unpack(list) } end
local clone = { alpha=alpha_clone, sym=sym_clone }
setmetatable(clone, self)
clone.__index = clone
return clone
end
----------------------------------------------------------------------
-- Cancel everything left in a lexer, all subsequent attempts at
-- `:peek()` or `:next()` will return `Eof`.
----------------------------------------------------------------------
function lexer :kill()
self.i = #self.src+1
self.peeked = { }
self.attached_comments = { }
self.lineinfo_last = self.posfact :get_position (#self.src+1)
end
return M