Compatible Android
This commit is contained in:
@ -0,0 +1,162 @@
|
||||
---------------------------------------------------------------------------
|
||||
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
|
||||
--
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This program and the accompanying materials are made available
|
||||
-- under the terms of the Eclipse Public License v1.0 which
|
||||
-- accompanies this distribution, and is available at
|
||||
-- http://www.eclipse.org/legal/epl-v10.html
|
||||
--
|
||||
-- This program and the accompanying materials are also made available
|
||||
-- under the terms of the MIT public license which accompanies this
|
||||
-- distribution, and is available at http://www.lua.org/license.html
|
||||
--
|
||||
-- Contributors:
|
||||
-- Fabien Fleutot - API and implementation
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
--
|
||||
-- Convert between various code representation formats. Atomic
|
||||
-- converters are written in extenso, others are composed automatically
|
||||
-- by chaining the atomic ones together in a closure.
|
||||
--
|
||||
-- Supported formats are:
|
||||
--
|
||||
-- * srcfile: the name of a file containing sources.
|
||||
-- * src: these sources as a single string.
|
||||
-- * lexstream: a stream of lexemes.
|
||||
-- * ast: an abstract syntax tree.
|
||||
-- * proto: a (Yueliang) struture containing a high level
|
||||
-- representation of bytecode. Largely based on the
|
||||
-- Proto structure in Lua's VM
|
||||
-- * bytecode: a string dump of the function, as taken by
|
||||
-- loadstring() and produced by string.dump().
|
||||
-- * function: an executable lua function in RAM.
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
require 'checks'
|
||||
|
||||
local M = { }
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Order of the transformations. if 'a' is on the left of 'b', then a 'a' can
|
||||
-- be transformed into a 'b' (but not the other way around).
|
||||
-- M.sequence goes for numbers to format names, M.order goes from format
|
||||
-- names to numbers.
|
||||
--------------------------------------------------------------------------------
|
||||
M.sequence = {
|
||||
'srcfile', 'src', 'lexstream', 'ast', 'proto', 'bytecode', 'function' }
|
||||
|
||||
local arg_types = {
|
||||
srcfile = { 'string', '?string' },
|
||||
src = { 'string', '?string' },
|
||||
lexstream = { 'lexer.stream', '?string' },
|
||||
ast = { 'table', '?string' },
|
||||
proto = { 'table', '?string' },
|
||||
bytecode = { 'string', '?string' },
|
||||
}
|
||||
|
||||
M.order= { }; for a,b in pairs(M.sequence) do M.order[b]=a end
|
||||
|
||||
local CONV = { } -- conversion metatable __index
|
||||
|
||||
function CONV :srcfile_to_src(x, name)
|
||||
checks('metalua.compiler', 'string', '?string')
|
||||
name = name or '@'..x
|
||||
local f, msg = io.open (x, 'rb')
|
||||
if not f then error(msg) end
|
||||
local r, msg = f :read '*a'
|
||||
if not r then error("Cannot read file '"..x.."': "..msg) end
|
||||
f :close()
|
||||
return r, name
|
||||
end
|
||||
|
||||
function CONV :src_to_lexstream(src, name)
|
||||
checks('metalua.compiler', 'string', '?string')
|
||||
local r = self.parser.lexer :newstream (src, name)
|
||||
return r, name
|
||||
end
|
||||
|
||||
function CONV :lexstream_to_ast(lx, name)
|
||||
checks('metalua.compiler', 'lexer.stream', '?string')
|
||||
local r = self.parser.chunk(lx)
|
||||
r.source = name
|
||||
return r, name
|
||||
end
|
||||
|
||||
local bytecode_compiler = nil -- cache to avoid repeated `pcall(require(...))`
|
||||
local function get_bytecode_compiler()
|
||||
if bytecode_compiler then return bytecode_compiler else
|
||||
local status, result = pcall(require, 'metalua.compiler.bytecode')
|
||||
if status then
|
||||
bytecode_compiler = result
|
||||
return result
|
||||
elseif string.match(result, "not found") then
|
||||
error "Compilation only available with full Metalua"
|
||||
else error (result) end
|
||||
end
|
||||
end
|
||||
|
||||
function CONV :ast_to_proto(ast, name)
|
||||
checks('metalua.compiler', 'table', '?string')
|
||||
return get_bytecode_compiler().ast_to_proto(ast, name), name
|
||||
end
|
||||
|
||||
function CONV :proto_to_bytecode(proto, name)
|
||||
return get_bytecode_compiler().proto_to_bytecode(proto), name
|
||||
end
|
||||
|
||||
function CONV :bytecode_to_function(bc, name)
|
||||
checks('metalua.compiler', 'string', '?string')
|
||||
return loadstring(bc, name)
|
||||
end
|
||||
|
||||
-- Create all sensible combinations
|
||||
for i=1,#M.sequence do
|
||||
local src = M.sequence[i]
|
||||
for j=i+2, #M.sequence do
|
||||
local dst = M.sequence[j]
|
||||
local dst_name = src.."_to_"..dst
|
||||
local my_arg_types = arg_types[src]
|
||||
local functions = { }
|
||||
for k=i, j-1 do
|
||||
local name = M.sequence[k].."_to_"..M.sequence[k+1]
|
||||
local f = assert(CONV[name], name)
|
||||
table.insert (functions, f)
|
||||
end
|
||||
CONV[dst_name] = function(self, a, b)
|
||||
checks('metalua.compiler', unpack(my_arg_types))
|
||||
for _, f in ipairs(functions) do
|
||||
a, b = f(self, a, b)
|
||||
end
|
||||
return a, b
|
||||
end
|
||||
--printf("Created M.%s out of %s", dst_name, table.concat(n, ', '))
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- This one goes in the "wrong" direction, cannot be composed.
|
||||
--------------------------------------------------------------------------------
|
||||
function CONV :function_to_bytecode(...) return string.dump(...) end
|
||||
|
||||
function CONV :ast_to_src(...)
|
||||
require 'metalua.loader' -- ast_to_string isn't written in plain lua
|
||||
return require 'metalua.compiler.ast_to_src' (...)
|
||||
end
|
||||
|
||||
local MT = { __index=CONV, __type='metalua.compiler' }
|
||||
|
||||
function M.new()
|
||||
local parser = require 'metalua.compiler.parser' .new()
|
||||
local self = { parser = parser }
|
||||
setmetatable(self, MT)
|
||||
return self
|
||||
end
|
||||
|
||||
return M
|
@ -0,0 +1,42 @@
|
||||
--------------------------------------------------------------------------------
|
||||
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
|
||||
--
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This program and the accompanying materials are made available
|
||||
-- under the terms of the Eclipse Public License v1.0 which
|
||||
-- accompanies this distribution, and is available at
|
||||
-- http://www.eclipse.org/legal/epl-v10.html
|
||||
--
|
||||
-- This program and the accompanying materials are also made available
|
||||
-- under the terms of the MIT public license which accompanies this
|
||||
-- distribution, and is available at http://www.lua.org/license.html
|
||||
--
|
||||
-- Contributors:
|
||||
-- Fabien Fleutot - API and implementation
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Export all public APIs from sub-modules, squashed into a flat spacename
|
||||
|
||||
local MT = { __type='metalua.compiler.parser' }
|
||||
|
||||
local MODULE_REL_NAMES = { "annot.grammar", "expr", "meta", "misc",
|
||||
"stat", "table", "ext" }
|
||||
|
||||
local function new()
|
||||
local M = {
|
||||
lexer = require "metalua.compiler.parser.lexer" ();
|
||||
extensions = { } }
|
||||
for _, rel_name in ipairs(MODULE_REL_NAMES) do
|
||||
local abs_name = "metalua.compiler.parser."..rel_name
|
||||
local extender = require (abs_name)
|
||||
if not M.extensions[abs_name] then
|
||||
if type (extender) == 'function' then extender(M) end
|
||||
M.extensions[abs_name] = extender
|
||||
end
|
||||
end
|
||||
return setmetatable(M, MT)
|
||||
end
|
||||
|
||||
return { new = new }
|
@ -0,0 +1,48 @@
|
||||
--------------------------------------------------------------------------------
|
||||
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
|
||||
--
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This program and the accompanying materials are made available
|
||||
-- under the terms of the Eclipse Public License v1.0 which
|
||||
-- accompanies this distribution, and is available at
|
||||
-- http://www.eclipse.org/legal/epl-v10.html
|
||||
--
|
||||
-- This program and the accompanying materials are also made available
|
||||
-- under the terms of the MIT public license which accompanies this
|
||||
-- distribution, and is available at http://www.lua.org/license.html
|
||||
--
|
||||
-- Contributors:
|
||||
-- Fabien Fleutot - API and implementation
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
require 'checks'
|
||||
local gg = require 'metalua.grammar.generator'
|
||||
local M = { }
|
||||
|
||||
function M.opt(mlc, primary, a_type)
|
||||
checks('table', 'table|function', 'string')
|
||||
return gg.sequence{
|
||||
primary,
|
||||
gg.onkeyword{ "#", function() return assert(mlc.annot[a_type]) end },
|
||||
builder = function(x)
|
||||
local t, annot = unpack(x)
|
||||
return annot and { tag='Annot', t, annot } or t
|
||||
end }
|
||||
end
|
||||
|
||||
-- split a list of "foo" and "`Annot{foo, annot}" into a list of "foo"
|
||||
-- and a list of "annot".
|
||||
-- No annot list is returned if none of the elements were annotated.
|
||||
function M.split(lst)
|
||||
local x, a, some = { }, { }, false
|
||||
for i, p in ipairs(lst) do
|
||||
if p.tag=='Annot' then
|
||||
some, x[i], a[i] = true, unpack(p)
|
||||
else x[i] = p end
|
||||
end
|
||||
if some then return x, a else return lst end
|
||||
end
|
||||
|
||||
return M
|
@ -0,0 +1,112 @@
|
||||
--------------------------------------------------------------------------------
|
||||
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
|
||||
--
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This program and the accompanying materials are made available
|
||||
-- under the terms of the Eclipse Public License v1.0 which
|
||||
-- accompanies this distribution, and is available at
|
||||
-- http://www.eclipse.org/legal/epl-v10.html
|
||||
--
|
||||
-- This program and the accompanying materials are also made available
|
||||
-- under the terms of the MIT public license which accompanies this
|
||||
-- distribution, and is available at http://www.lua.org/license.html
|
||||
--
|
||||
-- Contributors:
|
||||
-- Fabien Fleutot - API and implementation
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
local gg = require 'metalua.grammar.generator'
|
||||
|
||||
return function(M)
|
||||
local _M = gg.future(M)
|
||||
M.lexer :add '->'
|
||||
local A = { }
|
||||
local _A = gg.future(A)
|
||||
M.annot = A
|
||||
|
||||
-- Type identifier: Lua keywords such as `"nil"` allowed.
|
||||
function M.annot.tid(lx)
|
||||
local w = lx :next()
|
||||
local t = w.tag
|
||||
if t=='Keyword' and w[1] :match '^[%a_][%w_]*$' or w.tag=='Id'
|
||||
then return {tag='TId'; lineinfo=w.lineinfo; w[1]}
|
||||
else return gg.parse_error (lx, 'tid expected') end
|
||||
end
|
||||
|
||||
local field_types = { var='TVar'; const='TConst';
|
||||
currently='TCurrently'; field='TField' }
|
||||
|
||||
-- TODO check lineinfo
|
||||
function M.annot.tf(lx)
|
||||
local tk = lx:next()
|
||||
local w = tk[1]
|
||||
local tag = field_types[w]
|
||||
if not tag then error ('Invalid field type '..w)
|
||||
elseif tag=='TField' then return {tag='TField'} else
|
||||
local te = M.te(lx)
|
||||
return {tag=tag; te}
|
||||
end
|
||||
end
|
||||
|
||||
M.annot.tebar_content = gg.list{
|
||||
name = 'tebar content',
|
||||
primary = _A.te,
|
||||
separators = { ",", ";" },
|
||||
terminators = ")" }
|
||||
|
||||
M.annot.tebar = gg.multisequence{
|
||||
name = 'annot.tebar',
|
||||
--{ '*', builder = 'TDynbar' }, -- maybe not user-available
|
||||
{ '(', _A.tebar_content, ')',
|
||||
builder = function(x) return x[1] end },
|
||||
{ _A.te }
|
||||
}
|
||||
|
||||
M.annot.te = gg.multisequence{
|
||||
name = 'annot.te',
|
||||
{ _A.tid, builder=function(x) return x[1] end },
|
||||
{ '*', builder = 'TDyn' },
|
||||
{ "[",
|
||||
gg.list{
|
||||
primary = gg.sequence{
|
||||
_M.expr, "=", _A.tf,
|
||||
builder = 'TPair'
|
||||
},
|
||||
separators = { ",", ";" },
|
||||
terminators = { "]", "|" } },
|
||||
gg.onkeyword{ "|", _A.tf },
|
||||
"]",
|
||||
builder = function(x)
|
||||
local fields, other = unpack(x)
|
||||
return { tag='TTable', other or {tag='TField'}, fields }
|
||||
end }, -- "[ ... ]"
|
||||
{ '(', _A.tebar_content, ')', '->', '(', _A.tebar_content, ')',
|
||||
builder = function(x)
|
||||
local p, r = unpack(x)
|
||||
return {tag='TFunction', p, r }
|
||||
end } }
|
||||
|
||||
M.annot.ts = gg.multisequence{
|
||||
name = 'annot.ts',
|
||||
{ 'return', _A.tebar_content, builder='TReturn' },
|
||||
{ _A.tid, builder = function(x)
|
||||
if x[1][1]=='pass' then return {tag='TPass'}
|
||||
else error "Bad statement type" end
|
||||
end } }
|
||||
|
||||
-- TODO: add parsers for statements:
|
||||
-- #return tebar
|
||||
-- #alias = te
|
||||
-- #ell = tf
|
||||
--[[
|
||||
M.annot.stat_annot = gg.sequence{
|
||||
gg.list{ primary=_A.tid, separators='.' },
|
||||
'=',
|
||||
XXX??,
|
||||
builder = 'Annot' }
|
||||
--]]
|
||||
|
||||
return M.annot
|
||||
end
|
@ -0,0 +1,27 @@
|
||||
--------------------------------------------------------------------------------
|
||||
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
|
||||
--
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This program and the accompanying materials are made available
|
||||
-- under the terms of the Eclipse Public License v1.0 which
|
||||
-- accompanies this distribution, and is available at
|
||||
-- http://www.eclipse.org/legal/epl-v10.html
|
||||
--
|
||||
-- This program and the accompanying materials are also made available
|
||||
-- under the terms of the MIT public license which accompanies this
|
||||
-- distribution, and is available at http://www.lua.org/license.html
|
||||
--
|
||||
-- Contributors:
|
||||
-- Fabien Fleutot - API and implementation
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Shared common parser table. It will be filled by parser.init(),
|
||||
-- and every other module will be able to call its elements at runtime.
|
||||
--
|
||||
-- If the table was directly created in parser.init, a circular
|
||||
-- dependency would be created: parser.init depends on other modules to fill the table,
|
||||
-- so other modules can't simultaneously depend on it.
|
||||
|
||||
return { }
|
@ -0,0 +1,213 @@
|
||||
-------------------------------------------------------------------------------
|
||||
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
|
||||
--
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This program and the accompanying materials are made available
|
||||
-- under the terms of the Eclipse Public License v1.0 which
|
||||
-- accompanies this distribution, and is available at
|
||||
-- http://www.eclipse.org/legal/epl-v10.html
|
||||
--
|
||||
-- This program and the accompanying materials are also made available
|
||||
-- under the terms of the MIT public license which accompanies this
|
||||
-- distribution, and is available at http://www.lua.org/license.html
|
||||
--
|
||||
-- Contributors:
|
||||
-- Fabien Fleutot - API and implementation
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
--
|
||||
-- Exported API:
|
||||
-- * [mlp.expr()]
|
||||
-- * [mlp.expr_list()]
|
||||
-- * [mlp.func_val()]
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
local pp = require 'metalua.pprint'
|
||||
local gg = require 'metalua.grammar.generator'
|
||||
local annot = require 'metalua.compiler.parser.annot.generator'
|
||||
|
||||
return function(M)
|
||||
local _M = gg.future(M)
|
||||
local _table = gg.future(M, 'table')
|
||||
local _meta = gg.future(M, 'meta') -- TODO move to ext?
|
||||
local _annot = gg.future(M, 'annot') -- TODO move to annot
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Non-empty expression list. Actually, this isn't used here, but that's
|
||||
-- handy to give to users.
|
||||
--------------------------------------------------------------------------------
|
||||
M.expr_list = gg.list{ primary=_M.expr, separators="," }
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Helpers for function applications / method applications
|
||||
--------------------------------------------------------------------------------
|
||||
M.func_args_content = gg.list{
|
||||
name = "function arguments",
|
||||
primary = _M.expr,
|
||||
separators = ",",
|
||||
terminators = ")" }
|
||||
|
||||
-- Used to parse methods
|
||||
M.method_args = gg.multisequence{
|
||||
name = "function argument(s)",
|
||||
{ "{", _table.content, "}" },
|
||||
{ "(", _M.func_args_content, ")", builder = unpack },
|
||||
{ "+{", _meta.quote_content, "}" },
|
||||
-- TODO lineinfo?
|
||||
function(lx) local r = M.opt_string(lx); return r and {r} or { } end }
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- [func_val] parses a function, from opening parameters parenthese to
|
||||
-- "end" keyword included. Used for anonymous functions as well as
|
||||
-- function declaration statements (both local and global).
|
||||
--
|
||||
-- It's wrapped in a [_func_val] eta expansion, so that when expr
|
||||
-- parser uses the latter, they will notice updates of [func_val]
|
||||
-- definitions.
|
||||
--------------------------------------------------------------------------------
|
||||
M.func_params_content = gg.list{
|
||||
name="function parameters",
|
||||
gg.multisequence{ { "...", builder = "Dots" }, annot.opt(M, _M.id, 'te') },
|
||||
separators = ",", terminators = {")", "|"} }
|
||||
|
||||
-- TODO move to annot
|
||||
M.func_val = gg.sequence{
|
||||
name = "function body",
|
||||
"(", _M.func_params_content, ")", _M.block, "end",
|
||||
builder = function(x)
|
||||
local params, body = unpack(x)
|
||||
local annots, some = { }, false
|
||||
for i, p in ipairs(params) do
|
||||
if p.tag=='Annot' then
|
||||
params[i], annots[i], some = p[1], p[2], true
|
||||
else annots[i] = false end
|
||||
end
|
||||
if some then return { tag='Function', params, body, annots }
|
||||
else return { tag='Function', params, body } end
|
||||
end }
|
||||
|
||||
local func_val = function(lx) return M.func_val(lx) end
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Default parser for primary expressions
|
||||
--------------------------------------------------------------------------------
|
||||
function M.id_or_literal (lx)
|
||||
local a = lx:next()
|
||||
if a.tag~="Id" and a.tag~="String" and a.tag~="Number" then
|
||||
local msg
|
||||
if a.tag=='Eof' then
|
||||
msg = "End of file reached when an expression was expected"
|
||||
elseif a.tag=='Keyword' then
|
||||
msg = "An expression was expected, and `"..a[1]..
|
||||
"' can't start an expression"
|
||||
else
|
||||
msg = "Unexpected expr token " .. pp.tostring (a)
|
||||
end
|
||||
gg.parse_error (lx, msg)
|
||||
end
|
||||
return a
|
||||
end
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Builder generator for operators. Wouldn't be worth it if "|x|" notation
|
||||
-- were allowed, but then lua 5.1 wouldn't compile it
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- opf1 = |op| |_,a| `Op{ op, a }
|
||||
local function opf1 (op) return
|
||||
function (_,a) return { tag="Op", op, a } end end
|
||||
|
||||
-- opf2 = |op| |a,_,b| `Op{ op, a, b }
|
||||
local function opf2 (op) return
|
||||
function (a,_,b) return { tag="Op", op, a, b } end end
|
||||
|
||||
-- opf2r = |op| |a,_,b| `Op{ op, b, a } -- (args reversed)
|
||||
local function opf2r (op) return
|
||||
function (a,_,b) return { tag="Op", op, b, a } end end
|
||||
|
||||
local function op_ne(a, _, b)
|
||||
-- This version allows to remove the "ne" operator from the AST definition.
|
||||
-- However, it doesn't always produce the exact same bytecode as Lua 5.1.
|
||||
return { tag="Op", "not",
|
||||
{ tag="Op", "eq", a, b, lineinfo= {
|
||||
first = a.lineinfo.first, last = b.lineinfo.last } } }
|
||||
end
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
--
|
||||
-- complete expression
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- FIXME: set line number. In [expr] transformers probably
|
||||
M.expr = gg.expr {
|
||||
name = "expression",
|
||||
primary = gg.multisequence{
|
||||
name = "expr primary",
|
||||
{ "(", _M.expr, ")", builder = "Paren" },
|
||||
{ "function", _M.func_val, builder = unpack },
|
||||
{ "-{", _meta.splice_content, "}", builder = unpack },
|
||||
{ "+{", _meta.quote_content, "}", builder = unpack },
|
||||
{ "nil", builder = "Nil" },
|
||||
{ "true", builder = "True" },
|
||||
{ "false", builder = "False" },
|
||||
{ "...", builder = "Dots" },
|
||||
{ "{", _table.content, "}", builder = unpack },
|
||||
_M.id_or_literal },
|
||||
|
||||
infix = {
|
||||
name = "expr infix op",
|
||||
{ "+", prec = 60, builder = opf2 "add" },
|
||||
{ "-", prec = 60, builder = opf2 "sub" },
|
||||
{ "*", prec = 70, builder = opf2 "mul" },
|
||||
{ "/", prec = 70, builder = opf2 "div" },
|
||||
{ "%", prec = 70, builder = opf2 "mod" },
|
||||
{ "^", prec = 90, builder = opf2 "pow", assoc = "right" },
|
||||
{ "//", prec = 70, builder = opf2 "idiv" },
|
||||
{ "&", prec = 36, builder = opf2 "band" },
|
||||
{ "|", prec = 32, builder = opf2 "bor" },
|
||||
{ "~", prec = 34, builder = opf2 "bxor" },
|
||||
{ "<<", prec = 38, builder = opf2 "shl" },
|
||||
{ ">>", prec = 38, builder = opf2 "shr" },
|
||||
{ "..", prec = 40, builder = opf2 "concat", assoc = "right" },
|
||||
{ "==", prec = 30, builder = opf2 "eq" },
|
||||
{ "~=", prec = 30, builder = op_ne },
|
||||
{ "<", prec = 30, builder = opf2 "lt" },
|
||||
{ "<=", prec = 30, builder = opf2 "le" },
|
||||
{ ">", prec = 30, builder = opf2r "lt" },
|
||||
{ ">=", prec = 30, builder = opf2r "le" },
|
||||
{ "and",prec = 20, builder = opf2 "and" },
|
||||
{ "or", prec = 10, builder = opf2 "or" } },
|
||||
|
||||
prefix = {
|
||||
name = "expr prefix op",
|
||||
{ "not", prec = 80, builder = opf1 "not" },
|
||||
{ "#", prec = 80, builder = opf1 "len" },
|
||||
{ "~", prec = 80, builder = opf2 "bnot" },
|
||||
{ "-", prec = 80, builder = opf1 "unm" } },
|
||||
|
||||
suffix = {
|
||||
name = "expr suffix op",
|
||||
{ "[", _M.expr, "]", builder = function (tab, idx)
|
||||
return {tag="Index", tab, idx[1]} end},
|
||||
{ ".", _M.id, builder = function (tab, field)
|
||||
return {tag="Index", tab, _M.id2string(field[1])} end },
|
||||
{ "(", _M.func_args_content, ")", builder = function(f, args)
|
||||
return {tag="Call", f, unpack(args[1])} end },
|
||||
{ "{", _table.content, "}", builder = function (f, arg)
|
||||
return {tag="Call", f, arg[1]} end},
|
||||
{ ":", _M.id, _M.method_args, builder = function (obj, post)
|
||||
local m_name, args = unpack(post)
|
||||
return {tag="Invoke", obj, _M.id2string(m_name), unpack(args)} end},
|
||||
{ "+{", _meta.quote_content, "}", builder = function (f, arg)
|
||||
return {tag="Call", f, arg[1] } end },
|
||||
default = { name="opt_string_arg", parse = _M.opt_string, builder = function(f, arg)
|
||||
return {tag="Call", f, arg } end } } }
|
||||
return M
|
||||
end
|
@ -0,0 +1,96 @@
|
||||
-------------------------------------------------------------------------------
|
||||
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
|
||||
--
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This program and the accompanying materials are made available
|
||||
-- under the terms of the Eclipse Public License v1.0 which
|
||||
-- accompanies this distribution, and is available at
|
||||
-- http://www.eclipse.org/legal/epl-v10.html
|
||||
--
|
||||
-- This program and the accompanying materials are also made available
|
||||
-- under the terms of the MIT public license which accompanies this
|
||||
-- distribution, and is available at http://www.lua.org/license.html
|
||||
--
|
||||
-- Contributors:
|
||||
-- Fabien Fleutot - API and implementation
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
--
|
||||
-- Non-Lua syntax extensions
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
local gg = require 'metalua.grammar.generator'
|
||||
|
||||
return function(M)
|
||||
|
||||
local _M = gg.future(M)
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
-- Algebraic Datatypes
|
||||
----------------------------------------------------------------------------
|
||||
local function adt (lx)
|
||||
local node = _M.id (lx)
|
||||
local tagval = node[1]
|
||||
-- tagkey = `Pair{ `String "key", `String{ -{tagval} } }
|
||||
local tagkey = { tag="Pair", {tag="String", "tag"}, {tag="String", tagval} }
|
||||
if lx:peek().tag == "String" or lx:peek().tag == "Number" then
|
||||
-- TODO support boolean litterals
|
||||
return { tag="Table", tagkey, lx:next() }
|
||||
elseif lx:is_keyword (lx:peek(), "{") then
|
||||
local x = M.table.table (lx)
|
||||
table.insert (x, 1, tagkey)
|
||||
return x
|
||||
else return { tag="Table", tagkey } end
|
||||
end
|
||||
|
||||
M.adt = gg.sequence{ "`", adt, builder = unpack }
|
||||
|
||||
M.expr.primary :add(M.adt)
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Anonymous lambda
|
||||
----------------------------------------------------------------------------
|
||||
M.lambda_expr = gg.sequence{
|
||||
"|", _M.func_params_content, "|", _M.expr,
|
||||
builder = function (x)
|
||||
local li = x[2].lineinfo
|
||||
return { tag="Function", x[1],
|
||||
{ {tag="Return", x[2], lineinfo=li }, lineinfo=li } }
|
||||
end }
|
||||
|
||||
M.expr.primary :add (M.lambda_expr)
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Allows to write "a `f` b" instead of "f(a, b)". Taken from Haskell.
|
||||
--------------------------------------------------------------------------------
|
||||
function M.expr_in_backquotes (lx) return M.expr(lx, 35) end -- 35=limited precedence
|
||||
M.expr.infix :add{ name = "infix function",
|
||||
"`", _M.expr_in_backquotes, "`", prec = 35, assoc="left",
|
||||
builder = function(a, op, b) return {tag="Call", op[1], a, b} end }
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- C-style op+assignments
|
||||
-- TODO: no protection against side-effects in LHS vars.
|
||||
--------------------------------------------------------------------------------
|
||||
local function op_assign(kw, op)
|
||||
local function rhs(a, b) return { tag="Op", op, a, b } end
|
||||
local function f(a,b)
|
||||
if #a ~= #b then gg.parse_error "assymetric operator+assignment" end
|
||||
local right = { }
|
||||
local r = { tag="Set", a, right }
|
||||
for i=1, #a do right[i] = { tag="Op", op, a[i], b[i] } end
|
||||
return r
|
||||
end
|
||||
M.lexer :add (kw)
|
||||
M.assignments[kw] = f
|
||||
end
|
||||
|
||||
local ops = { add='+='; sub='-='; mul='*='; div='/=' }
|
||||
for ast_op_name, keyword in pairs(ops) do op_assign(keyword, ast_op_name) end
|
||||
|
||||
return M
|
||||
end
|
@ -0,0 +1,44 @@
|
||||
--------------------------------------------------------------------------------
|
||||
-- Copyright (c) 2006-2014 Fabien Fleutot and others.
|
||||
--
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This program and the accompanying materials are made available
|
||||
-- under the terms of the Eclipse Public License v1.0 which
|
||||
-- accompanies this distribution, and is available at
|
||||
-- http://www.eclipse.org/legal/epl-v10.html
|
||||
--
|
||||
-- This program and the accompanying materials are also made available
|
||||
-- under the terms of the MIT public license which accompanies this
|
||||
-- distribution, and is available at http://www.lua.org/license.html
|
||||
--
|
||||
-- Contributors:
|
||||
-- Fabien Fleutot - API and implementation
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Generate a new lua-specific lexer, derived from the generic lexer.
|
||||
----------------------------------------------------------------------
|
||||
|
||||
local generic_lexer = require 'metalua.grammar.lexer'
|
||||
|
||||
return function()
|
||||
local lexer = generic_lexer.lexer :clone()
|
||||
|
||||
local keywords = {
|
||||
"and", "break", "do", "else", "elseif",
|
||||
"end", "false", "for", "function",
|
||||
"goto", -- Lua5.2
|
||||
"if",
|
||||
"in", "local", "nil", "not", "or", "repeat",
|
||||
"return", "then", "true", "until", "while",
|
||||
"...", "..", "==", ">=", "<=", "~=",
|
||||
"<<", ">>", "//", -- Lua5.3
|
||||
"::", -- Lua5.2
|
||||
"+{", "-{" } -- Metalua
|
||||
|
||||
for _, w in ipairs(keywords) do lexer :add (w) end
|
||||
|
||||
return lexer
|
||||
end
|
@ -0,0 +1,138 @@
|
||||
-------------------------------------------------------------------------------
|
||||
-- Copyright (c) 2006-2014 Fabien Fleutot and others.
|
||||
--
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This program and the accompanying materials are made available
|
||||
-- under the terms of the Eclipse Public License v1.0 which
|
||||
-- accompanies this distribution, and is available at
|
||||
-- http://www.eclipse.org/legal/epl-v10.html
|
||||
--
|
||||
-- This program and the accompanying materials are also made available
|
||||
-- under the terms of the MIT public license which accompanies this
|
||||
-- distribution, and is available at http://www.lua.org/license.html
|
||||
--
|
||||
-- Contributors:
|
||||
-- Fabien Fleutot - API and implementation
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
-- Compile-time metaprogramming features: splicing ASTs generated during compilation,
|
||||
-- AST quasi-quoting helpers.
|
||||
|
||||
local gg = require 'metalua.grammar.generator'
|
||||
|
||||
return function(M)
|
||||
local _M = gg.future(M)
|
||||
M.meta={ }
|
||||
local _MM = gg.future(M.meta)
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- External splicing: compile an AST into a chunk, load and evaluate
|
||||
-- that chunk, and replace the chunk by its result (which must also be
|
||||
-- an AST).
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- TODO: that's not part of the parser
|
||||
function M.meta.eval (ast)
|
||||
-- TODO: should there be one mlc per splice, or per parser instance?
|
||||
local mlc = require 'metalua.compiler'.new()
|
||||
local f = mlc :ast_to_function (ast, '=splice')
|
||||
local result=f(M) -- splices act on the current parser
|
||||
return result
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Going from an AST to an AST representing that AST
|
||||
-- the only hash-part key being lifted is `"tag"`.
|
||||
-- Doesn't lift subtrees protected inside a `Splice{ ... }.
|
||||
-- e.g. change `Foo{ 123 } into
|
||||
-- `Table{ `Pair{ `String "tag", `String "foo" }, `Number 123 }
|
||||
----------------------------------------------------------------------------
|
||||
local function lift (t)
|
||||
--print("QUOTING:", table.tostring(t, 60,'nohash'))
|
||||
local cases = { }
|
||||
function cases.table (t)
|
||||
local mt = { tag = "Table" }
|
||||
--table.insert (mt, { tag = "Pair", quote "quote", { tag = "True" } })
|
||||
if t.tag == "Splice" then
|
||||
assert (#t==1, "Invalid splice")
|
||||
local sp = t[1]
|
||||
return sp
|
||||
elseif t.tag then
|
||||
table.insert (mt, { tag="Pair", lift "tag", lift(t.tag) })
|
||||
end
|
||||
for _, v in ipairs (t) do
|
||||
table.insert (mt, lift(v))
|
||||
end
|
||||
return mt
|
||||
end
|
||||
function cases.number (t) return { tag = "Number", t, quote = true } end
|
||||
function cases.string (t) return { tag = "String", t, quote = true } end
|
||||
function cases.boolean (t) return { tag = t and "True" or "False", t, quote = true } end
|
||||
local f = cases [type(t)]
|
||||
if f then return f(t) else error ("Cannot quote an AST containing "..tostring(t)) end
|
||||
end
|
||||
M.meta.lift = lift
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- when this variable is false, code inside [-{...}] is compiled and
|
||||
-- avaluated immediately. When it's true (supposedly when we're
|
||||
-- parsing data inside a quasiquote), [-{foo}] is replaced by
|
||||
-- [`Splice{foo}], which will be unpacked by [quote()].
|
||||
--------------------------------------------------------------------------------
|
||||
local in_a_quote = false
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Parse the inside of a "-{ ... }"
|
||||
--------------------------------------------------------------------------------
|
||||
function M.meta.splice_content (lx)
|
||||
local parser_name = "expr"
|
||||
if lx:is_keyword (lx:peek(2), ":") then
|
||||
local a = lx:next()
|
||||
lx:next() -- skip ":"
|
||||
assert (a.tag=="Id", "Invalid splice parser name")
|
||||
parser_name = a[1]
|
||||
end
|
||||
-- TODO FIXME running a new parser with the old lexer?!
|
||||
local parser = require 'metalua.compiler.parser'.new()
|
||||
local ast = parser [parser_name](lx)
|
||||
if in_a_quote then -- only prevent quotation in this subtree
|
||||
--printf("SPLICE_IN_QUOTE:\n%s", _G.table.tostring(ast, "nohash", 60))
|
||||
return { tag="Splice", ast }
|
||||
else -- convert in a block, eval, replace with result
|
||||
if parser_name == "expr" then ast = { { tag="Return", ast } }
|
||||
elseif parser_name == "stat" then ast = { ast }
|
||||
elseif parser_name ~= "block" then
|
||||
error ("splice content must be an expr, stat or block") end
|
||||
--printf("EXEC THIS SPLICE:\n%s", _G.table.tostring(ast, "nohash", 60))
|
||||
return M.meta.eval (ast)
|
||||
end
|
||||
end
|
||||
|
||||
M.meta.splice = gg.sequence{ "-{", _MM.splice_content, "}", builder=unpack }
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Parse the inside of a "+{ ... }"
|
||||
--------------------------------------------------------------------------------
|
||||
function M.meta.quote_content (lx)
|
||||
local parser
|
||||
if lx:is_keyword (lx:peek(2), ":") then -- +{parser: content }
|
||||
local parser_name = M.id(lx)[1]
|
||||
parser = M[parser_name]
|
||||
lx:next() -- skip ":"
|
||||
else -- +{ content }
|
||||
parser = M.expr
|
||||
end
|
||||
|
||||
local prev_iq = in_a_quote
|
||||
in_a_quote = true
|
||||
--print("IN_A_QUOTE")
|
||||
local content = parser (lx)
|
||||
local q_content = M.meta.lift (content)
|
||||
in_a_quote = prev_iq
|
||||
return q_content
|
||||
end
|
||||
|
||||
return M
|
||||
end
|
@ -0,0 +1,147 @@
|
||||
-------------------------------------------------------------------------------
|
||||
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
|
||||
--
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This program and the accompanying materials are made available
|
||||
-- under the terms of the Eclipse Public License v1.0 which
|
||||
-- accompanies this distribution, and is available at
|
||||
-- http://www.eclipse.org/legal/epl-v10.html
|
||||
--
|
||||
-- This program and the accompanying materials are also made available
|
||||
-- under the terms of the MIT public license which accompanies this
|
||||
-- distribution, and is available at http://www.lua.org/license.html
|
||||
--
|
||||
-- Contributors:
|
||||
-- Fabien Fleutot - API and implementation
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
--
|
||||
-- Summary: metalua parser, miscellaneous utility functions.
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
--
|
||||
-- Exported API:
|
||||
-- * [mlp.fget()]
|
||||
-- * [mlp.id()]
|
||||
-- * [mlp.opt_id()]
|
||||
-- * [mlp.id_list()]
|
||||
-- * [mlp.string()]
|
||||
-- * [mlp.opt_string()]
|
||||
-- * [mlp.id2string()]
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
local gg = require 'metalua.grammar.generator'
|
||||
|
||||
-- TODO: replace splice-aware versions with naive ones, move etensions in ./meta
|
||||
|
||||
return function(M)
|
||||
local _M = gg.future(M)
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Try to read an identifier (possibly as a splice), or return [false] if no
|
||||
-- id is found.
|
||||
--------------------------------------------------------------------------------
|
||||
function M.opt_id (lx)
|
||||
local a = lx:peek();
|
||||
if lx:is_keyword (a, "-{") then
|
||||
local v = M.meta.splice(lx)
|
||||
if v.tag ~= "Id" and v.tag ~= "Splice" then
|
||||
gg.parse_error(lx, "Bad id splice")
|
||||
end
|
||||
return v
|
||||
elseif a.tag == "Id" then return lx:next()
|
||||
else return false end
|
||||
end
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Mandatory reading of an id: causes an error if it can't read one.
|
||||
--------------------------------------------------------------------------------
|
||||
function M.id (lx)
|
||||
return M.opt_id (lx) or gg.parse_error(lx,"Identifier expected")
|
||||
end
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Common helper function
|
||||
--------------------------------------------------------------------------------
|
||||
M.id_list = gg.list { primary = _M.id, separators = "," }
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Converts an identifier into a string. Hopefully one day it'll handle
|
||||
-- splices gracefully, but that proves quite tricky.
|
||||
--------------------------------------------------------------------------------
|
||||
function M.id2string (id)
|
||||
--print("id2string:", disp.ast(id))
|
||||
if id.tag == "Id" then id.tag = "String"; return id
|
||||
elseif id.tag == "Splice" then
|
||||
error ("id2string on splice not implemented")
|
||||
-- Evaluating id[1] will produce `Id{ xxx },
|
||||
-- and we want it to produce `String{ xxx }.
|
||||
-- The following is the plain notation of:
|
||||
-- +{ `String{ `Index{ `Splice{ -{id[1]} }, `Number 1 } } }
|
||||
return { tag="String", { tag="Index", { tag="Splice", id[1] },
|
||||
{ tag="Number", 1 } } }
|
||||
else error ("Identifier expected: "..table.tostring(id, 'nohash')) end
|
||||
end
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Read a string, possibly spliced, or return an error if it can't
|
||||
--------------------------------------------------------------------------------
|
||||
function M.string (lx)
|
||||
local a = lx:peek()
|
||||
if lx:is_keyword (a, "-{") then
|
||||
local v = M.meta.splice(lx)
|
||||
if v.tag ~= "String" and v.tag ~= "Splice" then
|
||||
gg.parse_error(lx,"Bad string splice")
|
||||
end
|
||||
return v
|
||||
elseif a.tag == "String" then return lx:next()
|
||||
else error "String expected" end
|
||||
end
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Try to read a string, or return false if it can't. No splice allowed.
|
||||
--------------------------------------------------------------------------------
|
||||
function M.opt_string (lx)
|
||||
return lx:peek().tag == "String" and lx:next()
|
||||
end
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Chunk reader: block + Eof
|
||||
--------------------------------------------------------------------------------
|
||||
function M.skip_initial_sharp_comment (lx)
|
||||
-- Dirty hack: I'm happily fondling lexer's private parts
|
||||
-- FIXME: redundant with lexer:newstream()
|
||||
lx :sync()
|
||||
local i = lx.src:match ("^#.-\n()", lx.i)
|
||||
if i then
|
||||
lx.i = i
|
||||
lx.column_offset = i
|
||||
lx.line = lx.line and lx.line + 1 or 1
|
||||
end
|
||||
end
|
||||
|
||||
local function chunk (lx)
|
||||
if lx:peek().tag == 'Eof' then
|
||||
return { } -- handle empty files
|
||||
else
|
||||
M.skip_initial_sharp_comment (lx)
|
||||
local chunk = M.block (lx)
|
||||
if lx:peek().tag ~= "Eof" then
|
||||
gg.parse_error(lx, "End-of-file expected")
|
||||
end
|
||||
return chunk
|
||||
end
|
||||
end
|
||||
|
||||
-- chunk is wrapped in a sequence so that it has a "transformer" field.
|
||||
M.chunk = gg.sequence { chunk, builder = unpack }
|
||||
|
||||
return M
|
||||
end
|
@ -0,0 +1,283 @@
|
||||
------------------------------------------------------------------------------
|
||||
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
|
||||
--
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This program and the accompanying materials are made available
|
||||
-- under the terms of the Eclipse Public License v1.0 which
|
||||
-- accompanies this distribution, and is available at
|
||||
-- http://www.eclipse.org/legal/epl-v10.html
|
||||
--
|
||||
-- This program and the accompanying materials are also made available
|
||||
-- under the terms of the MIT public license which accompanies this
|
||||
-- distribution, and is available at http://www.lua.org/license.html
|
||||
--
|
||||
-- Contributors:
|
||||
-- Fabien Fleutot - API and implementation
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
--
|
||||
-- Summary: metalua parser, statement/block parser. This is part of the
|
||||
-- definition of module [mlp].
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
--
|
||||
-- Exports API:
|
||||
-- * [mlp.stat()]
|
||||
-- * [mlp.block()]
|
||||
-- * [mlp.for_header()]
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
local lexer = require 'metalua.grammar.lexer'
|
||||
local gg = require 'metalua.grammar.generator'
|
||||
|
||||
local annot = require 'metalua.compiler.parser.annot.generator'
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- List of all keywords that indicate the end of a statement block. Users are
|
||||
-- likely to extend this list when designing extensions.
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
return function(M)
|
||||
local _M = gg.future(M)
|
||||
|
||||
M.block_terminators = { "else", "elseif", "end", "until", ")", "}", "]" }
|
||||
|
||||
-- FIXME: this must be handled from within GG!!!
|
||||
-- FIXME: there's no :add method in the list anyway. Added by gg.list?!
|
||||
function M.block_terminators :add(x)
|
||||
if type (x) == "table" then for _, y in ipairs(x) do self :add (y) end
|
||||
else table.insert (self, x) end
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- list of statements, possibly followed by semicolons
|
||||
----------------------------------------------------------------------------
|
||||
M.block = gg.list {
|
||||
name = "statements block",
|
||||
terminators = M.block_terminators,
|
||||
primary = function (lx)
|
||||
-- FIXME use gg.optkeyword()
|
||||
local x = M.stat (lx)
|
||||
if lx:is_keyword (lx:peek(), ";") then lx:next() end
|
||||
return x
|
||||
end }
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Helper function for "return <expr_list>" parsing.
|
||||
-- Called when parsing return statements.
|
||||
-- The specific test for initial ";" is because it's not a block terminator,
|
||||
-- so without it gg.list would choke on "return ;" statements.
|
||||
-- We don't make a modified copy of block_terminators because this list
|
||||
-- is sometimes modified at runtime, and the return parser would get out of
|
||||
-- sync if it was relying on a copy.
|
||||
----------------------------------------------------------------------------
|
||||
local return_expr_list_parser = gg.multisequence{
|
||||
{ ";" , builder = function() return { } end },
|
||||
default = gg.list {
|
||||
_M.expr, separators = ",", terminators = M.block_terminators } }
|
||||
|
||||
|
||||
local for_vars_list = gg.list{
|
||||
name = "for variables list",
|
||||
primary = _M.id,
|
||||
separators = ",",
|
||||
terminators = "in" }
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- for header, between [for] and [do] (exclusive).
|
||||
-- Return the `Forxxx{...} AST, without the body element (the last one).
|
||||
----------------------------------------------------------------------------
|
||||
function M.for_header (lx)
|
||||
local vars = M.id_list(lx)
|
||||
if lx :is_keyword (lx:peek(), "=") then
|
||||
if #vars ~= 1 then
|
||||
gg.parse_error (lx, "numeric for only accepts one variable")
|
||||
end
|
||||
lx:next() -- skip "="
|
||||
local exprs = M.expr_list (lx)
|
||||
if #exprs < 2 or #exprs > 3 then
|
||||
gg.parse_error (lx, "numeric for requires 2 or 3 boundaries")
|
||||
end
|
||||
return { tag="Fornum", vars[1], unpack (exprs) }
|
||||
else
|
||||
if not lx :is_keyword (lx :next(), "in") then
|
||||
gg.parse_error (lx, '"=" or "in" expected in for loop')
|
||||
end
|
||||
local exprs = M.expr_list (lx)
|
||||
return { tag="Forin", vars, exprs }
|
||||
end
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Function def parser helper: id ( . id ) *
|
||||
----------------------------------------------------------------------------
|
||||
local function fn_builder (list)
|
||||
local acc = list[1]
|
||||
local first = acc.lineinfo.first
|
||||
for i = 2, #list do
|
||||
local index = M.id2string(list[i])
|
||||
local li = lexer.new_lineinfo(first, index.lineinfo.last)
|
||||
acc = { tag="Index", acc, index, lineinfo=li }
|
||||
end
|
||||
return acc
|
||||
end
|
||||
local func_name = gg.list{ _M.id, separators = ".", builder = fn_builder }
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Function def parser helper: ( : id )?
|
||||
----------------------------------------------------------------------------
|
||||
local method_name = gg.onkeyword{ name = "method invocation", ":", _M.id,
|
||||
transformers = { function(x) return x and x.tag=='Id' and M.id2string(x) end } }
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Function def builder
|
||||
----------------------------------------------------------------------------
|
||||
local function funcdef_builder(x)
|
||||
local name, method, func = unpack(x)
|
||||
if method then
|
||||
name = { tag="Index", name, method,
|
||||
lineinfo = {
|
||||
first = name.lineinfo.first,
|
||||
last = method.lineinfo.last } }
|
||||
table.insert (func[1], 1, {tag="Id", "self"})
|
||||
end
|
||||
local r = { tag="Set", {name}, {func} }
|
||||
r[1].lineinfo = name.lineinfo
|
||||
r[2].lineinfo = func.lineinfo
|
||||
return r
|
||||
end
|
||||
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- if statement builder
|
||||
----------------------------------------------------------------------------
|
||||
local function if_builder (x)
|
||||
local cond_block_pairs, else_block, r = x[1], x[2], {tag="If"}
|
||||
local n_pairs = #cond_block_pairs
|
||||
for i = 1, n_pairs do
|
||||
local cond, block = unpack(cond_block_pairs[i])
|
||||
r[2*i-1], r[2*i] = cond, block
|
||||
end
|
||||
if else_block then table.insert(r, #r+1, else_block) end
|
||||
return r
|
||||
end
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- produce a list of (expr,block) pairs
|
||||
--------------------------------------------------------------------------------
|
||||
local elseifs_parser = gg.list {
|
||||
gg.sequence { _M.expr, "then", _M.block , name='elseif parser' },
|
||||
separators = "elseif",
|
||||
terminators = { "else", "end" }
|
||||
}
|
||||
|
||||
local annot_expr = gg.sequence {
|
||||
_M.expr,
|
||||
gg.onkeyword{ "#", gg.future(M, 'annot').tf },
|
||||
builder = function(x)
|
||||
local e, a = unpack(x)
|
||||
if a then return { tag='Annot', e, a }
|
||||
else return e end
|
||||
end }
|
||||
|
||||
local annot_expr_list = gg.list {
|
||||
primary = annot.opt(M, _M.expr, 'tf'), separators = ',' }
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- assignments and calls: statements that don't start with a keyword
|
||||
------------------------------------------------------------------------
|
||||
local function assign_or_call_stat_parser (lx)
|
||||
local e = annot_expr_list (lx)
|
||||
local a = lx:is_keyword(lx:peek())
|
||||
local op = a and M.assignments[a]
|
||||
-- TODO: refactor annotations
|
||||
if op then
|
||||
--FIXME: check that [e] is a LHS
|
||||
lx :next()
|
||||
local annots
|
||||
e, annots = annot.split(e)
|
||||
local v = M.expr_list (lx)
|
||||
if type(op)=="string" then return { tag=op, e, v, annots }
|
||||
else return op (e, v) end
|
||||
else
|
||||
assert (#e > 0)
|
||||
if #e > 1 then
|
||||
gg.parse_error (lx,
|
||||
"comma is not a valid statement separator; statement can be "..
|
||||
"separated by semicolons, or not separated at all")
|
||||
elseif e[1].tag ~= "Call" and e[1].tag ~= "Invoke" then
|
||||
local typename
|
||||
if e[1].tag == 'Id' then
|
||||
typename = '("'..e[1][1]..'") is an identifier'
|
||||
elseif e[1].tag == 'Op' then
|
||||
typename = "is an arithmetic operation"
|
||||
else typename = "is of type '"..(e[1].tag or "<list>").."'" end
|
||||
gg.parse_error (lx,
|
||||
"This expression %s; "..
|
||||
"a statement was expected, and only function and method call "..
|
||||
"expressions can be used as statements", typename);
|
||||
end
|
||||
return e[1]
|
||||
end
|
||||
end
|
||||
|
||||
M.local_stat_parser = gg.multisequence{
|
||||
-- local function <name> <func_val>
|
||||
{ "function", _M.id, _M.func_val, builder =
|
||||
function(x)
|
||||
local vars = { x[1], lineinfo = x[1].lineinfo }
|
||||
local vals = { x[2], lineinfo = x[2].lineinfo }
|
||||
return { tag="Localrec", vars, vals }
|
||||
end },
|
||||
-- local <id_list> ( = <expr_list> )?
|
||||
default = gg.sequence{
|
||||
gg.list{
|
||||
primary = annot.opt(M, _M.id, 'tf'),
|
||||
separators = ',' },
|
||||
gg.onkeyword{ "=", _M.expr_list },
|
||||
builder = function(x)
|
||||
local annotated_left, right = unpack(x)
|
||||
local left, annotations = annot.split(annotated_left)
|
||||
return {tag="Local", left, right or { }, annotations }
|
||||
end } }
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- statement
|
||||
------------------------------------------------------------------------
|
||||
M.stat = gg.multisequence {
|
||||
name = "statement",
|
||||
{ "do", _M.block, "end", builder =
|
||||
function (x) return { tag="Do", unpack (x[1]) } end },
|
||||
{ "for", _M.for_header, "do", _M.block, "end", builder =
|
||||
function (x) x[1][#x[1]+1] = x[2]; return x[1] end },
|
||||
{ "function", func_name, method_name, _M.func_val, builder=funcdef_builder },
|
||||
{ "while", _M.expr, "do", _M.block, "end", builder = "While" },
|
||||
{ "repeat", _M.block, "until", _M.expr, builder = "Repeat" },
|
||||
{ "local", _M.local_stat_parser, builder = unpack },
|
||||
{ "return", return_expr_list_parser, builder =
|
||||
function(x) x[1].tag='Return'; return x[1] end },
|
||||
{ "goto", _M.id, builder =
|
||||
function(x) x[1].tag='Goto'; return x[1] end },
|
||||
{ "::", _M.id, "::", builder =
|
||||
function(x) x[1].tag='Label'; return x[1] end },
|
||||
{ "break", builder = function() return { tag="Break" } end },
|
||||
{ "-{", gg.future(M, 'meta').splice_content, "}", builder = unpack },
|
||||
{ "if", gg.nonempty(elseifs_parser), gg.onkeyword{ "else", M.block }, "end",
|
||||
builder = if_builder },
|
||||
default = assign_or_call_stat_parser }
|
||||
|
||||
M.assignments = {
|
||||
["="] = "Set"
|
||||
}
|
||||
|
||||
function M.assignments:add(k, v) self[k] = v end
|
||||
|
||||
return M
|
||||
end
|
@ -0,0 +1,77 @@
|
||||
--------------------------------------------------------------------------------
|
||||
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
|
||||
--
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This program and the accompanying materials are made available
|
||||
-- under the terms of the Eclipse Public License v1.0 which
|
||||
-- accompanies this distribution, and is available at
|
||||
-- http://www.eclipse.org/legal/epl-v10.html
|
||||
--
|
||||
-- This program and the accompanying materials are also made available
|
||||
-- under the terms of the MIT public license which accompanies this
|
||||
-- distribution, and is available at http://www.lua.org/license.html
|
||||
--
|
||||
-- Contributors:
|
||||
-- Fabien Fleutot - API and implementation
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
--
|
||||
-- Exported API:
|
||||
-- * [M.table_bracket_field()]
|
||||
-- * [M.table_field()]
|
||||
-- * [M.table_content()]
|
||||
-- * [M.table()]
|
||||
--
|
||||
-- KNOWN BUG: doesn't handle final ";" or "," before final "}"
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
local gg = require 'metalua.grammar.generator'
|
||||
|
||||
return function(M)
|
||||
|
||||
M.table = { }
|
||||
local _table = gg.future(M.table)
|
||||
local _expr = gg.future(M).expr
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- `[key] = value` table field definition
|
||||
--------------------------------------------------------------------------------
|
||||
M.table.bracket_pair = gg.sequence{ "[", _expr, "]", "=", _expr, builder = "Pair" }
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- table element parser: list value, `id = value` pair or `[value] = value` pair.
|
||||
--------------------------------------------------------------------------------
|
||||
function M.table.element (lx)
|
||||
if lx :is_keyword (lx :peek(), "[") then return M.table.bracket_pair(lx) end
|
||||
local e = M.expr (lx)
|
||||
if not lx :is_keyword (lx :peek(), "=") then return e end
|
||||
lx :next(); -- skip the "="
|
||||
local key = M.id2string(e) -- will fail on non-identifiers
|
||||
local val = M.expr(lx)
|
||||
local r = { tag="Pair", key, val }
|
||||
r.lineinfo = { first = key.lineinfo.first, last = val.lineinfo.last }
|
||||
return r
|
||||
end
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
-- table constructor, without enclosing braces; returns a full table object
|
||||
-----------------------------------------------------------------------------
|
||||
M.table.content = gg.list {
|
||||
-- eta expansion to allow patching the element definition
|
||||
primary = _table.element,
|
||||
separators = { ",", ";" },
|
||||
terminators = "}",
|
||||
builder = "Table" }
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- complete table constructor including [{...}]
|
||||
--------------------------------------------------------------------------------
|
||||
-- TODO beware, stat and expr use only table.content, this can't be patched.
|
||||
M.table.table = gg.sequence{ "{", _table.content, "}", builder = unpack }
|
||||
|
||||
return M
|
||||
end
|
@ -0,0 +1,832 @@
|
||||
--------------------------------------------------------------------------------
|
||||
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
|
||||
--
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This program and the accompanying materials are made available
|
||||
-- under the terms of the Eclipse Public License v1.0 which
|
||||
-- accompanies this distribution, and is available at
|
||||
-- http://www.eclipse.org/legal/epl-v10.html
|
||||
--
|
||||
-- This program and the accompanying materials are also made available
|
||||
-- under the terms of the MIT public license which accompanies this
|
||||
-- distribution, and is available at http://www.lua.org/license.html
|
||||
--
|
||||
-- Contributors:
|
||||
-- Fabien Fleutot - API and implementation
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
--
|
||||
-- Summary: parser generator. Collection of higher order functors,
|
||||
-- which allow to build and combine parsers. Relies on a lexer
|
||||
-- that supports the same API as the one exposed in mll.lua.
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
--
|
||||
-- Exported API:
|
||||
--
|
||||
-- Parser generators:
|
||||
-- * [gg.sequence()]
|
||||
-- * [gg.multisequence()]
|
||||
-- * [gg.expr()]
|
||||
-- * [gg.list()]
|
||||
-- * [gg.onkeyword()]
|
||||
-- * [gg.optkeyword()]
|
||||
--
|
||||
-- Other functions:
|
||||
-- * [gg.parse_error()]
|
||||
-- * [gg.make_parser()]
|
||||
-- * [gg.is_parser()]
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
local M = { }
|
||||
|
||||
local lexer = require 'metalua.grammar.lexer'
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Symbol generator: [gensym()] returns a guaranteed-to-be-unique identifier.
|
||||
-- The main purpose is to avoid variable capture in macros.
|
||||
--
|
||||
-- If a string is passed as an argument, theis string will be part of the
|
||||
-- id name (helpful for macro debugging)
|
||||
--------------------------------------------------------------------------------
|
||||
local gensymidx = 0
|
||||
|
||||
function M.gensym (arg)
|
||||
gensymidx = gensymidx + 1
|
||||
return { tag="Id", string.format(".%i.%s", gensymidx, arg or "")}
|
||||
end
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- parser metatable, which maps __call to method parse, and adds some
|
||||
-- error tracing boilerplate.
|
||||
-------------------------------------------------------------------------------
|
||||
local parser_metatable = { }
|
||||
|
||||
function parser_metatable :__call (lx, ...)
|
||||
return self :parse (lx, ...)
|
||||
end
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- Turn a table into a parser, mainly by setting the metatable.
|
||||
-------------------------------------------------------------------------------
|
||||
function M.make_parser(kind, p)
|
||||
p.kind = kind
|
||||
if not p.transformers then p.transformers = { } end
|
||||
function p.transformers:add (x)
|
||||
table.insert (self, x)
|
||||
end
|
||||
setmetatable (p, parser_metatable)
|
||||
return p
|
||||
end
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- Return true iff [x] is a parser.
|
||||
-- If it's a gg-generated parser, return the name of its kind.
|
||||
-------------------------------------------------------------------------------
|
||||
function M.is_parser (x)
|
||||
return type(x)=="function" or getmetatable(x)==parser_metatable and x.kind
|
||||
end
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- Parse a sequence, without applying builder nor transformers.
|
||||
-------------------------------------------------------------------------------
|
||||
local function raw_parse_sequence (lx, p)
|
||||
local r = { }
|
||||
for i=1, #p do
|
||||
local e=p[i]
|
||||
if type(e) == "string" then
|
||||
local kw = lx :next()
|
||||
if not lx :is_keyword (kw, e) then
|
||||
M.parse_error(
|
||||
lx, "A keyword was expected, probably `%s'.", e)
|
||||
end
|
||||
elseif M.is_parser (e) then
|
||||
table.insert (r, e(lx))
|
||||
else -- Invalid parser definition, this is *not* a parsing error
|
||||
error(string.format(
|
||||
"Sequence `%s': element #%i is neither a string nor a parser: %s",
|
||||
p.name, i, table.tostring(e)))
|
||||
end
|
||||
end
|
||||
return r
|
||||
end
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- Parse a multisequence, without applying multisequence transformers.
|
||||
-- The sequences are completely parsed.
|
||||
-------------------------------------------------------------------------------
|
||||
local function raw_parse_multisequence (lx, sequence_table, default)
|
||||
local seq_parser = sequence_table[lx:is_keyword(lx:peek())]
|
||||
if seq_parser then return seq_parser (lx)
|
||||
elseif default then return default (lx)
|
||||
else return false end
|
||||
end
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- Applies all transformers listed in parser on ast.
|
||||
-------------------------------------------------------------------------------
|
||||
local function transform (ast, parser, fli, lli)
|
||||
if parser.transformers then
|
||||
for _, t in ipairs (parser.transformers) do ast = t(ast) or ast end
|
||||
end
|
||||
if type(ast) == 'table' then
|
||||
local ali = ast.lineinfo
|
||||
if not ali or ali.first~=fli or ali.last~=lli then
|
||||
ast.lineinfo = lexer.new_lineinfo(fli, lli)
|
||||
end
|
||||
end
|
||||
return ast
|
||||
end
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- Generate a tracable parsing error (not implemented yet)
|
||||
-------------------------------------------------------------------------------
|
||||
function M.parse_error(lx, fmt, ...)
|
||||
local li = lx:lineinfo_left()
|
||||
local file, line, column, offset, positions
|
||||
if li then
|
||||
file, line, column, offset = li.source, li.line, li.column, li.offset
|
||||
positions = { first = li, last = li }
|
||||
else
|
||||
line, column, offset = -1, -1, -1
|
||||
end
|
||||
|
||||
local msg = string.format("line %i, char %i: "..fmt, line, column, ...)
|
||||
if file and file~='?' then msg = "file "..file..", "..msg end
|
||||
|
||||
local src = lx.src
|
||||
if offset>0 and src then
|
||||
local i, j = offset, offset
|
||||
while src:sub(i,i) ~= '\n' and i>=0 do i=i-1 end
|
||||
while src:sub(j,j) ~= '\n' and j<=#src do j=j+1 end
|
||||
local srcline = src:sub (i+1, j-1)
|
||||
local idx = string.rep (" ", column).."^"
|
||||
msg = string.format("%s\n>>> %s\n>>> %s", msg, srcline, idx)
|
||||
end
|
||||
--lx :kill()
|
||||
error(msg)
|
||||
end
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
--
|
||||
-- Sequence parser generator
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
-- Input fields:
|
||||
--
|
||||
-- * [builder]: how to build an AST out of sequence parts. let [x] be the list
|
||||
-- of subparser results (keywords are simply omitted). [builder] can be:
|
||||
-- - [nil], in which case the result of parsing is simply [x]
|
||||
-- - a string, which is then put as a tag on [x]
|
||||
-- - a function, which takes [x] as a parameter and returns an AST.
|
||||
--
|
||||
-- * [name]: the name of the parser. Used for debug messages
|
||||
--
|
||||
-- * [transformers]: a list of AST->AST functions, applied in order on ASTs
|
||||
-- returned by the parser.
|
||||
--
|
||||
-- * Table-part entries corresponds to keywords (strings) and subparsers
|
||||
-- (function and callable objects).
|
||||
--
|
||||
-- After creation, the following fields are added:
|
||||
-- * [parse] the parsing function lexer->AST
|
||||
-- * [kind] == "sequence"
|
||||
-- * [name] is set, if it wasn't in the input.
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
function M.sequence (p)
|
||||
M.make_parser ("sequence", p)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
-- Parsing method
|
||||
-------------------------------------------------------------------
|
||||
function p:parse (lx)
|
||||
|
||||
-- Raw parsing:
|
||||
local fli = lx:lineinfo_right()
|
||||
local seq = raw_parse_sequence (lx, self)
|
||||
local lli = lx:lineinfo_left()
|
||||
|
||||
-- Builder application:
|
||||
local builder, tb = self.builder, type (self.builder)
|
||||
if tb == "string" then seq.tag = builder
|
||||
elseif tb == "function" or builder and builder.__call then seq = builder(seq)
|
||||
elseif builder == nil then -- nothing
|
||||
else error ("Invalid builder of type "..tb.." in sequence") end
|
||||
seq = transform (seq, self, fli, lli)
|
||||
assert (not seq or seq.lineinfo)
|
||||
return seq
|
||||
end
|
||||
|
||||
-------------------------------------------------------------------
|
||||
-- Construction
|
||||
-------------------------------------------------------------------
|
||||
-- Try to build a proper name
|
||||
if p.name then
|
||||
-- don't touch existing name
|
||||
elseif type(p[1])=="string" then -- find name based on 1st keyword
|
||||
if #p==1 then p.name=p[1]
|
||||
elseif type(p[#p])=="string" then
|
||||
p.name = p[1] .. " ... " .. p[#p]
|
||||
else p.name = p[1] .. " ..." end
|
||||
else -- can't find a decent name
|
||||
p.name = "unnamed_sequence"
|
||||
end
|
||||
|
||||
return p
|
||||
end --</sequence>
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
--
|
||||
-- Multiple, keyword-driven, sequence parser generator
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
-- in [p], useful fields are:
|
||||
--
|
||||
-- * [transformers]: as usual
|
||||
--
|
||||
-- * [name]: as usual
|
||||
--
|
||||
-- * Table-part entries must be sequence parsers, or tables which can
|
||||
-- be turned into a sequence parser by [gg.sequence]. These
|
||||
-- sequences must start with a keyword, and this initial keyword
|
||||
-- must be different for each sequence. The table-part entries will
|
||||
-- be removed after [gg.multisequence] returns.
|
||||
--
|
||||
-- * [default]: the parser to run if the next keyword in the lexer is
|
||||
-- none of the registered initial keywords. If there's no default
|
||||
-- parser and no suitable initial keyword, the multisequence parser
|
||||
-- simply returns [false].
|
||||
--
|
||||
-- After creation, the following fields are added:
|
||||
--
|
||||
-- * [parse] the parsing function lexer->AST
|
||||
--
|
||||
-- * [sequences] the table of sequences, indexed by initial keywords.
|
||||
--
|
||||
-- * [add] method takes a sequence parser or a config table for
|
||||
-- [gg.sequence], and adds/replaces the corresponding sequence
|
||||
-- parser. If the keyword was already used, the former sequence is
|
||||
-- removed and a warning is issued.
|
||||
--
|
||||
-- * [get] method returns a sequence by its initial keyword
|
||||
--
|
||||
-- * [kind] == "multisequence"
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
function M.multisequence (p)
|
||||
M.make_parser ("multisequence", p)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
-- Add a sequence (might be just a config table for [gg.sequence])
|
||||
-------------------------------------------------------------------
|
||||
function p :add (s)
|
||||
-- compile if necessary:
|
||||
local keyword = type(s)=='table' and s[1]
|
||||
if type(s)=='table' and not M.is_parser(s) then M.sequence(s) end
|
||||
if M.is_parser(s)~='sequence' or type(keyword)~='string' then
|
||||
if self.default then -- two defaults
|
||||
error ("In a multisequence parser, all but one sequences "..
|
||||
"must start with a keyword")
|
||||
else self.default = s end -- first default
|
||||
else
|
||||
if self.sequences[keyword] then -- duplicate keyword
|
||||
-- TODO: warn that initial keyword `keyword` is overloaded in multiseq
|
||||
end
|
||||
self.sequences[keyword] = s
|
||||
end
|
||||
end -- </multisequence.add>
|
||||
|
||||
-------------------------------------------------------------------
|
||||
-- Get the sequence starting with this keyword. [kw :: string]
|
||||
-------------------------------------------------------------------
|
||||
function p :get (kw) return self.sequences [kw] end
|
||||
|
||||
-------------------------------------------------------------------
|
||||
-- Remove the sequence starting with keyword [kw :: string]
|
||||
-------------------------------------------------------------------
|
||||
function p :del (kw)
|
||||
if not self.sequences[kw] then
|
||||
-- TODO: warn that we try to delete a non-existent entry
|
||||
end
|
||||
local removed = self.sequences[kw]
|
||||
self.sequences[kw] = nil
|
||||
return removed
|
||||
end
|
||||
|
||||
-------------------------------------------------------------------
|
||||
-- Parsing method
|
||||
-------------------------------------------------------------------
|
||||
function p :parse (lx)
|
||||
local fli = lx:lineinfo_right()
|
||||
local x = raw_parse_multisequence (lx, self.sequences, self.default)
|
||||
local lli = lx:lineinfo_left()
|
||||
return transform (x, self, fli, lli)
|
||||
end
|
||||
|
||||
-------------------------------------------------------------------
|
||||
-- Construction
|
||||
-------------------------------------------------------------------
|
||||
-- Register the sequences passed to the constructor. They're going
|
||||
-- from the array part of the parser to the hash part of field
|
||||
-- [sequences]
|
||||
p.sequences = { }
|
||||
for i=1, #p do p :add (p[i]); p[i] = nil end
|
||||
|
||||
-- FIXME: why is this commented out?
|
||||
--if p.default and not is_parser(p.default) then sequence(p.default) end
|
||||
return p
|
||||
end --</multisequence>
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
--
|
||||
-- Expression parser generator
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
--
|
||||
-- Expression configuration relies on three tables: [prefix], [infix]
|
||||
-- and [suffix]. Moreover, the primary parser can be replaced by a
|
||||
-- table: in this case the [primary] table will be passed to
|
||||
-- [gg.multisequence] to create a parser.
|
||||
--
|
||||
-- Each of these tables is a modified multisequence parser: the
|
||||
-- differences with respect to regular multisequence config tables are:
|
||||
--
|
||||
-- * the builder takes specific parameters:
|
||||
-- - for [prefix], it takes the result of the prefix sequence parser,
|
||||
-- and the prefixed expression
|
||||
-- - for [infix], it takes the left-hand-side expression, the results
|
||||
-- of the infix sequence parser, and the right-hand-side expression.
|
||||
-- - for [suffix], it takes the suffixed expression, and the result
|
||||
-- of the suffix sequence parser.
|
||||
--
|
||||
-- * the default field is a list, with parameters:
|
||||
-- - [parser] the raw parsing function
|
||||
-- - [transformers], as usual
|
||||
-- - [prec], the operator's precedence
|
||||
-- - [assoc] for [infix] table, the operator's associativity, which
|
||||
-- can be "left", "right" or "flat" (default to left)
|
||||
--
|
||||
-- In [p], useful fields are:
|
||||
-- * [transformers]: as usual
|
||||
-- * [name]: as usual
|
||||
-- * [primary]: the atomic expression parser, or a multisequence config
|
||||
-- table (mandatory)
|
||||
-- * [prefix]: prefix operators config table, see above.
|
||||
-- * [infix]: infix operators config table, see above.
|
||||
-- * [suffix]: suffix operators config table, see above.
|
||||
--
|
||||
-- After creation, these fields are added:
|
||||
-- * [kind] == "expr"
|
||||
-- * [parse] as usual
|
||||
-- * each table is turned into a multisequence, and therefore has an
|
||||
-- [add] method
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
function M.expr (p)
|
||||
M.make_parser ("expr", p)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
-- parser method.
|
||||
-- In addition to the lexer, it takes an optional precedence:
|
||||
-- it won't read expressions whose precedence is lower or equal
|
||||
-- to [prec].
|
||||
-------------------------------------------------------------------
|
||||
function p :parse (lx, prec)
|
||||
prec = prec or 0
|
||||
|
||||
------------------------------------------------------
|
||||
-- Extract the right parser and the corresponding
|
||||
-- options table, for (pre|in|suff)fix operators.
|
||||
-- Options include prec, assoc, transformers.
|
||||
------------------------------------------------------
|
||||
local function get_parser_info (tab)
|
||||
local p2 = tab :get (lx :is_keyword (lx :peek()))
|
||||
if p2 then -- keyword-based sequence found
|
||||
local function parser(lx) return raw_parse_sequence(lx, p2) end
|
||||
return parser, p2
|
||||
else -- Got to use the default parser
|
||||
local d = tab.default
|
||||
if d then return d.parse or d.parser, d
|
||||
else return false, false end
|
||||
end
|
||||
end
|
||||
|
||||
------------------------------------------------------
|
||||
-- Look for a prefix sequence. Multiple prefixes are
|
||||
-- handled through the recursive [p.parse] call.
|
||||
-- Notice the double-transform: one for the primary
|
||||
-- expr, and one for the one with the prefix op.
|
||||
------------------------------------------------------
|
||||
local function handle_prefix ()
|
||||
local fli = lx :lineinfo_right()
|
||||
local p2_func, p2 = get_parser_info (self.prefix)
|
||||
local op = p2_func and p2_func (lx)
|
||||
if op then -- Keyword-based sequence found
|
||||
local ili = lx :lineinfo_right() -- Intermediate LineInfo
|
||||
local e = p2.builder (op, self :parse (lx, p2.prec))
|
||||
local lli = lx :lineinfo_left()
|
||||
return transform (transform (e, p2, ili, lli), self, fli, lli)
|
||||
else -- No prefix found, get a primary expression
|
||||
local e = self.primary(lx)
|
||||
local lli = lx :lineinfo_left()
|
||||
return transform (e, self, fli, lli)
|
||||
end
|
||||
end --</expr.parse.handle_prefix>
|
||||
|
||||
------------------------------------------------------
|
||||
-- Look for an infix sequence+right-hand-side operand.
|
||||
-- Return the whole binary expression result,
|
||||
-- or false if no operator was found.
|
||||
------------------------------------------------------
|
||||
local function handle_infix (e)
|
||||
local p2_func, p2 = get_parser_info (self.infix)
|
||||
if not p2 then return false end
|
||||
|
||||
-----------------------------------------
|
||||
-- Handle flattening operators: gather all operands
|
||||
-- of the series in [list]; when a different operator
|
||||
-- is found, stop, build from [list], [transform] and
|
||||
-- return.
|
||||
-----------------------------------------
|
||||
if (not p2.prec or p2.prec>prec) and p2.assoc=="flat" then
|
||||
local fli = lx:lineinfo_right()
|
||||
local pflat, list = p2, { e }
|
||||
repeat
|
||||
local op = p2_func(lx)
|
||||
if not op then break end
|
||||
table.insert (list, self:parse (lx, p2.prec))
|
||||
local _ -- We only care about checking that p2==pflat
|
||||
_, p2 = get_parser_info (self.infix)
|
||||
until p2 ~= pflat
|
||||
local e2 = pflat.builder (list)
|
||||
local lli = lx:lineinfo_left()
|
||||
return transform (transform (e2, pflat, fli, lli), self, fli, lli)
|
||||
|
||||
-----------------------------------------
|
||||
-- Handle regular infix operators: [e] the LHS is known,
|
||||
-- just gather the operator and [e2] the RHS.
|
||||
-- Result goes in [e3].
|
||||
-----------------------------------------
|
||||
elseif p2.prec and p2.prec>prec or
|
||||
p2.prec==prec and p2.assoc=="right" then
|
||||
local fli = e.lineinfo.first -- lx:lineinfo_right()
|
||||
local op = p2_func(lx)
|
||||
if not op then return false end
|
||||
local e2 = self:parse (lx, p2.prec)
|
||||
local e3 = p2.builder (e, op, e2)
|
||||
local lli = lx:lineinfo_left()
|
||||
return transform (transform (e3, p2, fli, lli), self, fli, lli)
|
||||
|
||||
-----------------------------------------
|
||||
-- Check for non-associative operators, and complain if applicable.
|
||||
-----------------------------------------
|
||||
elseif p2.assoc=="none" and p2.prec==prec then
|
||||
M.parse_error (lx, "non-associative operator!")
|
||||
|
||||
-----------------------------------------
|
||||
-- No infix operator suitable at that precedence
|
||||
-----------------------------------------
|
||||
else return false end
|
||||
|
||||
end --</expr.parse.handle_infix>
|
||||
|
||||
------------------------------------------------------
|
||||
-- Look for a suffix sequence.
|
||||
-- Return the result of suffix operator on [e],
|
||||
-- or false if no operator was found.
|
||||
------------------------------------------------------
|
||||
local function handle_suffix (e)
|
||||
-- FIXME bad fli, must take e.lineinfo.first
|
||||
local p2_func, p2 = get_parser_info (self.suffix)
|
||||
if not p2 then return false end
|
||||
if not p2.prec or p2.prec>=prec then
|
||||
--local fli = lx:lineinfo_right()
|
||||
local fli = e.lineinfo.first
|
||||
local op = p2_func(lx)
|
||||
if not op then return false end
|
||||
local lli = lx:lineinfo_left()
|
||||
e = p2.builder (e, op)
|
||||
e = transform (transform (e, p2, fli, lli), self, fli, lli)
|
||||
return e
|
||||
end
|
||||
return false
|
||||
end --</expr.parse.handle_suffix>
|
||||
|
||||
------------------------------------------------------
|
||||
-- Parser body: read suffix and (infix+operand)
|
||||
-- extensions as long as we're able to fetch more at
|
||||
-- this precedence level.
|
||||
------------------------------------------------------
|
||||
local e = handle_prefix()
|
||||
repeat
|
||||
local x = handle_suffix (e); e = x or e
|
||||
local y = handle_infix (e); e = y or e
|
||||
until not (x or y)
|
||||
|
||||
-- No transform: it already happened in operators handling
|
||||
return e
|
||||
end --</expr.parse>
|
||||
|
||||
-------------------------------------------------------------------
|
||||
-- Construction
|
||||
-------------------------------------------------------------------
|
||||
if not p.primary then p.primary=p[1]; p[1]=nil end
|
||||
for _, t in ipairs{ "primary", "prefix", "infix", "suffix" } do
|
||||
if not p[t] then p[t] = { } end
|
||||
if not M.is_parser(p[t]) then M.multisequence(p[t]) end
|
||||
end
|
||||
function p:add(...) return self.primary:add(...) end
|
||||
return p
|
||||
end --</expr>
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
--
|
||||
-- List parser generator
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
-- In [p], the following fields can be provided in input:
|
||||
--
|
||||
-- * [builder]: takes list of subparser results, returns AST
|
||||
-- * [transformers]: as usual
|
||||
-- * [name]: as usual
|
||||
--
|
||||
-- * [terminators]: list of strings representing the keywords which
|
||||
-- might mark the end of the list. When non-empty, the list is
|
||||
-- allowed to be empty. A string is treated as a single-element
|
||||
-- table, whose element is that string, e.g. ["do"] is the same as
|
||||
-- [{"do"}].
|
||||
--
|
||||
-- * [separators]: list of strings representing the keywords which can
|
||||
-- separate elements of the list. When non-empty, one of these
|
||||
-- keyword has to be found between each element. Lack of a separator
|
||||
-- indicates the end of the list. A string is treated as a
|
||||
-- single-element table, whose element is that string, e.g. ["do"]
|
||||
-- is the same as [{"do"}]. If [terminators] is empty/nil, then
|
||||
-- [separators] has to be non-empty.
|
||||
--
|
||||
-- After creation, the following fields are added:
|
||||
-- * [parse] the parsing function lexer->AST
|
||||
-- * [kind] == "list"
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
function M.list (p)
|
||||
M.make_parser ("list", p)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
-- Parsing method
|
||||
-------------------------------------------------------------------
|
||||
function p :parse (lx)
|
||||
|
||||
------------------------------------------------------
|
||||
-- Used to quickly check whether there's a terminator
|
||||
-- or a separator immediately ahead
|
||||
------------------------------------------------------
|
||||
local function peek_is_in (keywords)
|
||||
return keywords and lx:is_keyword(lx:peek(), unpack(keywords)) end
|
||||
|
||||
local x = { }
|
||||
local fli = lx :lineinfo_right()
|
||||
|
||||
-- if there's a terminator to start with, don't bother trying
|
||||
local is_empty_list = self.terminators and (peek_is_in (self.terminators) or lx:peek().tag=="Eof")
|
||||
if not is_empty_list then
|
||||
repeat
|
||||
local item = self.primary(lx)
|
||||
table.insert (x, item) -- read one element
|
||||
until
|
||||
-- There's a separator list specified, and next token isn't in it.
|
||||
-- Otherwise, consume it with [lx:next()]
|
||||
self.separators and not(peek_is_in (self.separators) and lx:next()) or
|
||||
-- Terminator token ahead
|
||||
peek_is_in (self.terminators) or
|
||||
-- Last reason: end of file reached
|
||||
lx:peek().tag=="Eof"
|
||||
end
|
||||
|
||||
local lli = lx:lineinfo_left()
|
||||
|
||||
-- Apply the builder. It can be a string, or a callable value,
|
||||
-- or simply nothing.
|
||||
local b = self.builder
|
||||
if b then
|
||||
if type(b)=="string" then x.tag = b -- b is a string, use it as a tag
|
||||
elseif type(b)=="function" then x=b(x)
|
||||
else
|
||||
local bmt = getmetatable(b)
|
||||
if bmt and bmt.__call then x=b(x) end
|
||||
end
|
||||
end
|
||||
return transform (x, self, fli, lli)
|
||||
end --</list.parse>
|
||||
|
||||
-------------------------------------------------------------------
|
||||
-- Construction
|
||||
-------------------------------------------------------------------
|
||||
if not p.primary then p.primary = p[1]; p[1] = nil end
|
||||
if type(p.terminators) == "string" then p.terminators = { p.terminators }
|
||||
elseif p.terminators and #p.terminators == 0 then p.terminators = nil end
|
||||
if type(p.separators) == "string" then p.separators = { p.separators }
|
||||
elseif p.separators and #p.separators == 0 then p.separators = nil end
|
||||
|
||||
return p
|
||||
end --</list>
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
--
|
||||
-- Keyword-conditioned parser generator
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
--
|
||||
-- Only apply a parser if a given keyword is found. The result of
|
||||
-- [gg.onkeyword] parser is the result of the subparser (modulo
|
||||
-- [transformers] applications).
|
||||
--
|
||||
-- lineinfo: the keyword is *not* included in the boundaries of the
|
||||
-- resulting lineinfo. A review of all usages of gg.onkeyword() in the
|
||||
-- implementation of metalua has shown that it was the appropriate choice
|
||||
-- in every case.
|
||||
--
|
||||
-- Input fields:
|
||||
--
|
||||
-- * [name]: as usual
|
||||
--
|
||||
-- * [transformers]: as usual
|
||||
--
|
||||
-- * [peek]: if non-nil, the conditioning keyword is left in the lexeme
|
||||
-- stream instead of being consumed.
|
||||
--
|
||||
-- * [primary]: the subparser.
|
||||
--
|
||||
-- * [keywords]: list of strings representing triggering keywords.
|
||||
--
|
||||
-- * Table-part entries can contain strings, and/or exactly one parser.
|
||||
-- Strings are put in [keywords], and the parser is put in [primary].
|
||||
--
|
||||
-- After the call, the following fields will be set:
|
||||
--
|
||||
-- * [parse] the parsing method
|
||||
-- * [kind] == "onkeyword"
|
||||
-- * [primary]
|
||||
-- * [keywords]
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
function M.onkeyword (p)
|
||||
M.make_parser ("onkeyword", p)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
-- Parsing method
|
||||
-------------------------------------------------------------------
|
||||
function p :parse (lx)
|
||||
if lx :is_keyword (lx:peek(), unpack(self.keywords)) then
|
||||
local fli = lx:lineinfo_right()
|
||||
if not self.peek then lx:next() end
|
||||
local content = self.primary (lx)
|
||||
local lli = lx:lineinfo_left()
|
||||
local li = content.lineinfo or { }
|
||||
fli, lli = li.first or fli, li.last or lli
|
||||
return transform (content, p, fli, lli)
|
||||
else return false end
|
||||
end
|
||||
|
||||
-------------------------------------------------------------------
|
||||
-- Construction
|
||||
-------------------------------------------------------------------
|
||||
if not p.keywords then p.keywords = { } end
|
||||
for _, x in ipairs(p) do
|
||||
if type(x)=="string" then table.insert (p.keywords, x)
|
||||
else assert (not p.primary and M.is_parser (x)); p.primary = x end
|
||||
end
|
||||
assert (next (p.keywords), "Missing trigger keyword in gg.onkeyword")
|
||||
assert (p.primary, 'no primary parser in gg.onkeyword')
|
||||
return p
|
||||
end --</onkeyword>
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
--
|
||||
-- Optional keyword consummer pseudo-parser generator
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
--
|
||||
-- This doesn't return a real parser, just a function. That function parses
|
||||
-- one of the keywords passed as parameters, and returns it. It returns
|
||||
-- [false] if no matching keyword is found.
|
||||
--
|
||||
-- Notice that tokens returned by lexer already carry lineinfo, therefore
|
||||
-- there's no need to add them, as done usually through transform() calls.
|
||||
-------------------------------------------------------------------------------
|
||||
function M.optkeyword (...)
|
||||
local args = {...}
|
||||
if type (args[1]) == "table" then
|
||||
assert (#args == 1)
|
||||
args = args[1]
|
||||
end
|
||||
for _, v in ipairs(args) do assert (type(v)=="string") end
|
||||
return function (lx)
|
||||
local x = lx:is_keyword (lx:peek(), unpack (args))
|
||||
if x then lx:next(); return x
|
||||
else return false end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
--
|
||||
-- Run a parser with a special lexer
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
--
|
||||
-- This doesn't return a real parser, just a function.
|
||||
-- First argument is the lexer class to be used with the parser,
|
||||
-- 2nd is the parser itself.
|
||||
-- The resulting parser returns whatever the argument parser does.
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
function M.with_lexer(new_lexer, parser)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
-- Most gg functions take their parameters in a table, so it's
|
||||
-- better to silently accept when with_lexer{ } is called with
|
||||
-- its arguments in a list:
|
||||
-------------------------------------------------------------------
|
||||
if not parser and #new_lexer==2 and type(new_lexer[1])=='table' then
|
||||
return M.with_lexer(unpack(new_lexer))
|
||||
end
|
||||
|
||||
-------------------------------------------------------------------
|
||||
-- Save the current lexer, switch it for the new one, run the parser,
|
||||
-- restore the previous lexer, even if the parser caused an error.
|
||||
-------------------------------------------------------------------
|
||||
return function (lx)
|
||||
local old_lexer = getmetatable(lx)
|
||||
lx:sync()
|
||||
setmetatable(lx, new_lexer)
|
||||
local status, result = pcall(parser, lx)
|
||||
lx:sync()
|
||||
setmetatable(lx, old_lexer)
|
||||
if status then return result else error(result) end
|
||||
end
|
||||
end
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
--
|
||||
-- Make sure a parser is used and returns successfully.
|
||||
--
|
||||
--------------------------------------------------------------------------------
|
||||
function M.nonempty(primary)
|
||||
local p = M.make_parser('non-empty list', { primary = primary, name=primary.name })
|
||||
function p :parse (lx)
|
||||
local fli = lx:lineinfo_right()
|
||||
local content = self.primary (lx)
|
||||
local lli = lx:lineinfo_left()
|
||||
local li = content.lineinfo or { }
|
||||
fli, lli = li.first or fli, li.last or lli
|
||||
if #content == 0 then
|
||||
M.parse_error (lx, "`%s' must not be empty.", self.name or "list")
|
||||
else
|
||||
return transform (content, self, fli, lli)
|
||||
end
|
||||
end
|
||||
return p
|
||||
end
|
||||
|
||||
local FUTURE_MT = { }
|
||||
function FUTURE_MT:__tostring() return "<Proxy parser module>" end
|
||||
function FUTURE_MT:__newindex(key, value) error "don't write in futures" end
|
||||
function FUTURE_MT :__index (parser_name)
|
||||
return function(...)
|
||||
local p, m = rawget(self, '__path'), self.__module
|
||||
if p then for _, name in ipairs(p) do
|
||||
m=rawget(m, name)
|
||||
if not m then error ("Submodule '"..name.."' undefined") end
|
||||
end end
|
||||
local f = rawget(m, parser_name)
|
||||
if not f then error ("Parser '"..parser_name.."' undefined") end
|
||||
return f(...)
|
||||
end
|
||||
end
|
||||
|
||||
function M.future(module, ...)
|
||||
checks('table')
|
||||
local path = ... and {...}
|
||||
if path then for _, x in ipairs(path) do
|
||||
assert(type(x)=='string', "Bad future arg")
|
||||
end end
|
||||
local self = { __module = module,
|
||||
__path = path }
|
||||
return setmetatable(self, FUTURE_MT)
|
||||
end
|
||||
|
||||
return M
|
@ -0,0 +1,678 @@
|
||||
-------------------------------------------------------------------------------
|
||||
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
|
||||
--
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This program and the accompanying materials are made available
|
||||
-- under the terms of the Eclipse Public License v1.0 which
|
||||
-- accompanies this distribution, and is available at
|
||||
-- http://www.eclipse.org/legal/epl-v10.html
|
||||
--
|
||||
-- This program and the accompanying materials are also made available
|
||||
-- under the terms of the MIT public license which accompanies this
|
||||
-- distribution, and is available at http://www.lua.org/license.html
|
||||
--
|
||||
-- Contributors:
|
||||
-- Fabien Fleutot - API and implementation
|
||||
--
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
require 'checks'
|
||||
|
||||
local M = { }
|
||||
|
||||
local lexer = { alpha={ }, sym={ } }
|
||||
lexer.__index=lexer
|
||||
lexer.__type='lexer.stream'
|
||||
|
||||
M.lexer = lexer
|
||||
|
||||
|
||||
local debugf = function() end
|
||||
-- local debugf=printf
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Some locale settings produce bad results, e.g. French locale
|
||||
-- expect float numbers to use commas instead of periods.
|
||||
-- TODO: change number parser into something loclae-independent,
|
||||
-- locales are nasty.
|
||||
----------------------------------------------------------------------
|
||||
os.setlocale('C')
|
||||
|
||||
local MT = { }
|
||||
|
||||
M.metatables=MT
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Create a new metatable, for a new class of objects.
|
||||
----------------------------------------------------------------------
|
||||
local function new_metatable(name)
|
||||
local mt = { __type = 'lexer.'..name };
|
||||
mt.__index = mt
|
||||
MT[name] = mt
|
||||
end
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Position: represent a point in a source file.
|
||||
----------------------------------------------------------------------
|
||||
new_metatable 'position'
|
||||
|
||||
local position_idx=1
|
||||
|
||||
function M.new_position(line, column, offset, source)
|
||||
checks('number', 'number', 'number', 'string')
|
||||
local id = position_idx; position_idx = position_idx+1
|
||||
return setmetatable({line=line, column=column, offset=offset,
|
||||
source=source, id=id}, MT.position)
|
||||
end
|
||||
|
||||
function MT.position :__tostring()
|
||||
return string.format("<%s%s|L%d|C%d|K%d>",
|
||||
self.comments and "C|" or "",
|
||||
self.source, self.line, self.column, self.offset)
|
||||
end
|
||||
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Position factory: convert offsets into line/column/offset positions.
|
||||
----------------------------------------------------------------------
|
||||
new_metatable 'position_factory'
|
||||
|
||||
function M.new_position_factory(src, src_name)
|
||||
-- assert(type(src)=='string')
|
||||
-- assert(type(src_name)=='string')
|
||||
local lines = { 1 }
|
||||
for offset in src :gmatch '\n()' do table.insert(lines, offset) end
|
||||
local max = #src+1
|
||||
table.insert(lines, max+1) -- +1 includes Eof
|
||||
return setmetatable({ src_name=src_name, line2offset=lines, max=max },
|
||||
MT.position_factory)
|
||||
end
|
||||
|
||||
function MT.position_factory :get_position (offset)
|
||||
-- assert(type(offset)=='number')
|
||||
assert(offset<=self.max)
|
||||
local line2offset = self.line2offset
|
||||
local left = self.last_left or 1
|
||||
if offset<line2offset[left] then left=1 end
|
||||
local right = left+1
|
||||
if line2offset[right]<=offset then right = right+1 end
|
||||
if line2offset[right]<=offset then right = #line2offset end
|
||||
while true do
|
||||
-- print (" trying lines "..left.."/"..right..", offsets "..line2offset[left]..
|
||||
-- "/"..line2offset[right].." for offset "..offset)
|
||||
-- assert(line2offset[left]<=offset)
|
||||
-- assert(offset<line2offset[right])
|
||||
-- assert(left<right)
|
||||
if left+1==right then break end
|
||||
local middle = math.floor((left+right)/2)
|
||||
if line2offset[middle]<=offset then left=middle else right=middle end
|
||||
end
|
||||
-- assert(left+1==right)
|
||||
-- printf("found that offset %d is between %d and %d, hence on line %d",
|
||||
-- offset, line2offset[left], line2offset[right], left)
|
||||
local line = left
|
||||
local column = offset - line2offset[line] + 1
|
||||
self.last_left = left
|
||||
return M.new_position(line, column, offset, self.src_name)
|
||||
end
|
||||
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Lineinfo: represent a node's range in a source file;
|
||||
-- embed information about prefix and suffix comments.
|
||||
----------------------------------------------------------------------
|
||||
new_metatable 'lineinfo'
|
||||
|
||||
function M.new_lineinfo(first, last)
|
||||
checks('lexer.position', 'lexer.position')
|
||||
return setmetatable({first=first, last=last}, MT.lineinfo)
|
||||
end
|
||||
|
||||
function MT.lineinfo :__tostring()
|
||||
local fli, lli = self.first, self.last
|
||||
local line = fli.line; if line~=lli.line then line =line ..'-'..lli.line end
|
||||
local column = fli.column; if column~=lli.column then column=column..'-'..lli.column end
|
||||
local offset = fli.offset; if offset~=lli.offset then offset=offset..'-'..lli.offset end
|
||||
return string.format("<%s%s|L%s|C%s|K%s%s>",
|
||||
fli.comments and "C|" or "",
|
||||
fli.source, line, column, offset,
|
||||
lli.comments and "|C" or "")
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Token: atomic Lua language element, with a category, a content,
|
||||
-- and some lineinfo relating it to its original source.
|
||||
----------------------------------------------------------------------
|
||||
new_metatable 'token'
|
||||
|
||||
function M.new_token(tag, content, lineinfo)
|
||||
--printf("TOKEN `%s{ %q, lineinfo = %s} boundaries %d, %d",
|
||||
-- tag, content, tostring(lineinfo), lineinfo.first.id, lineinfo.last.id)
|
||||
return setmetatable({tag=tag, lineinfo=lineinfo, content}, MT.token)
|
||||
end
|
||||
|
||||
function MT.token :__tostring()
|
||||
--return string.format("`%s{ %q, %s }", self.tag, self[1], tostring(self.lineinfo))
|
||||
return string.format("`%s %q", self.tag, self[1])
|
||||
end
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Comment: series of comment blocks with associated lineinfo.
|
||||
-- To be attached to the tokens just before and just after them.
|
||||
----------------------------------------------------------------------
|
||||
new_metatable 'comment'
|
||||
|
||||
function M.new_comment(lines)
|
||||
local first = lines[1].lineinfo.first
|
||||
local last = lines[#lines].lineinfo.last
|
||||
local lineinfo = M.new_lineinfo(first, last)
|
||||
return setmetatable({lineinfo=lineinfo, unpack(lines)}, MT.comment)
|
||||
end
|
||||
|
||||
function MT.comment :text()
|
||||
local last_line = self[1].lineinfo.last.line
|
||||
local acc = { }
|
||||
for i, line in ipairs(self) do
|
||||
local nreturns = line.lineinfo.first.line - last_line
|
||||
table.insert(acc, ("\n"):rep(nreturns))
|
||||
table.insert(acc, line[1])
|
||||
end
|
||||
return table.concat(acc)
|
||||
end
|
||||
|
||||
function M.new_comment_line(text, lineinfo, nequals)
|
||||
checks('string', 'lexer.lineinfo', '?number')
|
||||
return { lineinfo = lineinfo, text, nequals }
|
||||
end
|
||||
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Patterns used by [lexer :extract] to decompose the raw string into
|
||||
-- correctly tagged tokens.
|
||||
----------------------------------------------------------------------
|
||||
lexer.patterns = {
|
||||
spaces = "^[ \r\n\t]*()",
|
||||
short_comment = "^%-%-([^\n]*)\n?()",
|
||||
--final_short_comment = "^%-%-([^\n]*)()$",
|
||||
long_comment = "^%-%-%[(=*)%[\n?(.-)%]%1%]()",
|
||||
long_string = "^%[(=*)%[\n?(.-)%]%1%]()",
|
||||
number_longint = "^%d+[uU]?[lL][lL]()",
|
||||
number_longint_hex = "^%x+[uU]?[lL][lL]()",
|
||||
number_mantissa = { "^%d+%.?%d*()", "^%d*%.%d+()" },
|
||||
number_mantissa_hex = { "^%x+%.?%x*()", "^%x*%.%x+()" }, --Lua5.1 and Lua5.2
|
||||
number_exponent = "^[eE][%+%-]?%d+()",
|
||||
number_exponent_hex = "^[pP][%+%-]?%d+()", --Lua5.2
|
||||
number_hex = "^0[xX]()",
|
||||
number_imaginary = "^[iI]()",
|
||||
word = "^([%a_][%w_]*)()",
|
||||
}
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- unescape a whole string, applying [unesc_digits] and
|
||||
-- [unesc_letter] as many times as required.
|
||||
----------------------------------------------------------------------
|
||||
local function unescape_string (s)
|
||||
|
||||
-- Turn the digits of an escape sequence into the corresponding
|
||||
-- character, e.g. [unesc_digits("123") == string.char(123)].
|
||||
local function unesc_digits (backslashes, digits)
|
||||
if #backslashes%2==0 then
|
||||
-- Even number of backslashes, they escape each other, not the digits.
|
||||
-- Return them so that unesc_letter() can treat them
|
||||
return backslashes..digits
|
||||
else
|
||||
-- Remove the odd backslash, which escapes the number sequence.
|
||||
-- The rest will be returned and parsed by unesc_letter()
|
||||
backslashes = backslashes :sub (1,-2)
|
||||
end
|
||||
local k, j, i = digits :reverse() :byte(1, 3)
|
||||
local z = string.byte "0"
|
||||
local code = (k or z) + 10*(j or z) + 100*(i or z) - 111*z
|
||||
if code > 255 then
|
||||
error ("Illegal escape sequence '\\"..digits..
|
||||
"' in string: ASCII codes must be in [0..255]")
|
||||
end
|
||||
local c = string.char (code)
|
||||
if c == '\\' then c = '\\\\' end -- parsed by unesc_letter (test: "\092b" --> "\\b")
|
||||
return backslashes..c
|
||||
end
|
||||
|
||||
-- Turn hex digits of escape sequence into char.
|
||||
local function unesc_hex(backslashes, digits)
|
||||
if #backslashes%2==0 then
|
||||
return backslashes..'x'..digits
|
||||
else
|
||||
backslashes = backslashes :sub (1,-2)
|
||||
end
|
||||
local c = string.char(tonumber(digits,16))
|
||||
if c == '\\' then c = '\\\\' end -- parsed by unesc_letter (test: "\x5cb" --> "\\b")
|
||||
return backslashes..c
|
||||
end
|
||||
|
||||
-- Handle Lua 5.2 \z sequences
|
||||
local function unesc_z(backslashes, more)
|
||||
if #backslashes%2==0 then
|
||||
return backslashes..more
|
||||
else
|
||||
return backslashes :sub (1,-2)
|
||||
end
|
||||
end
|
||||
|
||||
-- Take a letter [x], and returns the character represented by the
|
||||
-- sequence ['\\'..x], e.g. [unesc_letter "n" == "\n"].
|
||||
local function unesc_letter(x)
|
||||
local t = {
|
||||
a = "\a", b = "\b", f = "\f",
|
||||
n = "\n", r = "\r", t = "\t", v = "\v",
|
||||
["\\"] = "\\", ["'"] = "'", ['"'] = '"', ["\n"] = "\n" }
|
||||
return t[x] or x
|
||||
end
|
||||
|
||||
s = s: gsub ("(\\+)(z%s*)", unesc_z) -- Lua 5.2
|
||||
s = s: gsub ("(\\+)([0-9][0-9]?[0-9]?)", unesc_digits)
|
||||
s = s: gsub ("(\\+)x([0-9a-fA-F][0-9a-fA-F])", unesc_hex) -- Lua 5.2
|
||||
s = s: gsub ("\\(%D)",unesc_letter)
|
||||
return s
|
||||
end
|
||||
|
||||
lexer.extractors = {
|
||||
"extract_long_comment", "extract_short_comment",
|
||||
"extract_short_string", "extract_word", "extract_number",
|
||||
"extract_long_string", "extract_symbol" }
|
||||
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Really extract next token from the raw string
|
||||
-- (and update the index).
|
||||
-- loc: offset of the position just after spaces and comments
|
||||
-- previous_i: offset in src before extraction began
|
||||
----------------------------------------------------------------------
|
||||
function lexer :extract ()
|
||||
local attached_comments = { }
|
||||
local function gen_token(...)
|
||||
local token = M.new_token(...)
|
||||
if #attached_comments>0 then -- attach previous comments to token
|
||||
local comments = M.new_comment(attached_comments)
|
||||
token.lineinfo.first.comments = comments
|
||||
if self.lineinfo_last_extracted then
|
||||
self.lineinfo_last_extracted.comments = comments
|
||||
end
|
||||
attached_comments = { }
|
||||
end
|
||||
token.lineinfo.first.facing = self.lineinfo_last_extracted
|
||||
self.lineinfo_last_extracted.facing = assert(token.lineinfo.first)
|
||||
self.lineinfo_last_extracted = assert(token.lineinfo.last)
|
||||
return token
|
||||
end
|
||||
while true do -- loop until a non-comment token is found
|
||||
|
||||
-- skip whitespaces
|
||||
self.i = self.src:match (self.patterns.spaces, self.i)
|
||||
if self.i>#self.src then
|
||||
local fli = self.posfact :get_position (#self.src+1)
|
||||
local lli = self.posfact :get_position (#self.src+1) -- ok?
|
||||
local tok = gen_token("Eof", "eof", M.new_lineinfo(fli, lli))
|
||||
tok.lineinfo.last.facing = lli
|
||||
return tok
|
||||
end
|
||||
local i_first = self.i -- loc = position after whitespaces
|
||||
|
||||
-- try every extractor until a token is found
|
||||
for _, extractor in ipairs(self.extractors) do
|
||||
local tag, content, xtra = self [extractor] (self)
|
||||
if tag then
|
||||
local fli = self.posfact :get_position (i_first)
|
||||
local lli = self.posfact :get_position (self.i-1)
|
||||
local lineinfo = M.new_lineinfo(fli, lli)
|
||||
if tag=='Comment' then
|
||||
local prev_comment = attached_comments[#attached_comments]
|
||||
if not xtra -- new comment is short
|
||||
and prev_comment and not prev_comment[2] -- prev comment is short
|
||||
and prev_comment.lineinfo.last.line+1==fli.line then -- adjascent lines
|
||||
-- concat with previous comment
|
||||
prev_comment[1] = prev_comment[1].."\n"..content -- TODO quadratic, BAD!
|
||||
prev_comment.lineinfo.last = lli
|
||||
else -- accumulate comment
|
||||
local comment = M.new_comment_line(content, lineinfo, xtra)
|
||||
table.insert(attached_comments, comment)
|
||||
end
|
||||
break -- back to skipping spaces
|
||||
else -- not a comment: real token, then
|
||||
return gen_token(tag, content, lineinfo)
|
||||
end -- if token is a comment
|
||||
end -- if token found
|
||||
end -- for each extractor
|
||||
end -- while token is a comment
|
||||
end -- :extract()
|
||||
|
||||
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Extract a short comment.
|
||||
----------------------------------------------------------------------
|
||||
function lexer :extract_short_comment()
|
||||
-- TODO: handle final_short_comment
|
||||
local content, j = self.src :match (self.patterns.short_comment, self.i)
|
||||
if content then self.i=j; return 'Comment', content, nil end
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Extract a long comment.
|
||||
----------------------------------------------------------------------
|
||||
function lexer :extract_long_comment()
|
||||
local equals, content, j = self.src:match (self.patterns.long_comment, self.i)
|
||||
if j then self.i = j; return "Comment", content, #equals end
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Extract a '...' or "..." short string.
|
||||
----------------------------------------------------------------------
|
||||
function lexer :extract_short_string()
|
||||
local k = self.src :sub (self.i,self.i) -- first char
|
||||
if k~=[[']] and k~=[["]] then return end -- no match'
|
||||
local i = self.i + 1
|
||||
local j = i
|
||||
while true do
|
||||
local x,y; x, j, y = self.src :match ("([\\\r\n"..k.."])()(.?)", j) -- next interesting char
|
||||
if x == '\\' then
|
||||
if y == 'z' then -- Lua 5.2 \z
|
||||
j = self.src :match ("^%s*()", j+1)
|
||||
else
|
||||
j=j+1 -- escaped char
|
||||
end
|
||||
elseif x == k then break -- end of string
|
||||
else
|
||||
assert (not x or x=='\r' or x=='\n')
|
||||
return nil, 'Unterminated string'
|
||||
end
|
||||
end
|
||||
self.i = j
|
||||
|
||||
return 'String', unescape_string (self.src :sub (i,j-2))
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Extract Id or Keyword.
|
||||
----------------------------------------------------------------------
|
||||
function lexer :extract_word()
|
||||
local word, j = self.src:match (self.patterns.word, self.i)
|
||||
if word then
|
||||
self.i = j
|
||||
return (self.alpha [word] and 'Keyword' or 'Id'), word
|
||||
end
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Extract Number.
|
||||
----------------------------------------------------------------------
|
||||
function lexer :extract_number()
|
||||
local patt = self.patterns
|
||||
local s = self.src
|
||||
local j = s:match(patt.number_hex, self.i)
|
||||
local hex = j ~= nil
|
||||
local longint = hex and patt.number_longint_hex or patt.number_longint
|
||||
local mantissa1 = hex and patt.number_mantissa_hex[1] or patt.number_mantissa[1]
|
||||
local mantissa2 = hex and patt.number_mantissa_hex[2] or patt.number_mantissa[2]
|
||||
local exponent = hex and patt.number_exponent_hex or patt.number_exponent
|
||||
if not hex then j = self.i end
|
||||
|
||||
local t = s:match(longint, j)
|
||||
if t then
|
||||
j = t
|
||||
else
|
||||
j = s:match(mantissa1, j) or s:match(mantissa2, j)
|
||||
if not j then return end
|
||||
j = s:match(exponent, j) or j
|
||||
j = s:match(patt.number_imaginary, j) or j
|
||||
end
|
||||
|
||||
local str = self.src:sub (self.i, j-1)
|
||||
self.i = j
|
||||
-- Number found, interpret with tonumber() and return it
|
||||
-- return str as the fallback when processing formats not supported by the current interpreter
|
||||
return 'Number', (tonumber (str) or str)
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Extract long string.
|
||||
----------------------------------------------------------------------
|
||||
function lexer :extract_long_string()
|
||||
local _, content, j = self.src :match (self.patterns.long_string, self.i)
|
||||
if j then self.i = j; return 'String', content end
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Extract symbol.
|
||||
----------------------------------------------------------------------
|
||||
function lexer :extract_symbol()
|
||||
local k = self.src:sub (self.i,self.i)
|
||||
local symk = self.sym [k] -- symbols starting with `k`
|
||||
if not symk then
|
||||
self.i = self.i + 1
|
||||
return 'Keyword', k
|
||||
end
|
||||
for _, sym in pairs (symk) do
|
||||
if sym == self.src:sub (self.i, self.i + #sym - 1) then
|
||||
self.i = self.i + #sym
|
||||
return 'Keyword', sym
|
||||
end
|
||||
end
|
||||
self.i = self.i+1
|
||||
return 'Keyword', k
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Add a keyword to the list of keywords recognized by the lexer.
|
||||
----------------------------------------------------------------------
|
||||
function lexer :add (w, ...)
|
||||
assert(not ..., "lexer :add() takes only one arg, although possibly a table")
|
||||
if type (w) == "table" then
|
||||
for _, x in ipairs (w) do self :add (x) end
|
||||
else
|
||||
if w:match (self.patterns.word .. "$") then self.alpha [w] = true
|
||||
elseif w:match "^%p%p+$" then
|
||||
local k = w:sub(1,1)
|
||||
local list = self.sym [k]
|
||||
if not list then list = { }; self.sym [k] = list end
|
||||
table.insert (list, w)
|
||||
elseif w:match "^%p$" then return
|
||||
else error "Invalid keyword" end
|
||||
end
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Return the [n]th next token, without consuming it.
|
||||
-- [n] defaults to 1. If it goes pass the end of the stream, an EOF
|
||||
-- token is returned.
|
||||
----------------------------------------------------------------------
|
||||
function lexer :peek (n)
|
||||
if not n then n=1 end
|
||||
if n > #self.peeked then
|
||||
for i = #self.peeked+1, n do
|
||||
self.peeked [i] = self :extract()
|
||||
end
|
||||
end
|
||||
return self.peeked [n]
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Return the [n]th next token, removing it as well as the 0..n-1
|
||||
-- previous tokens. [n] defaults to 1. If it goes pass the end of the
|
||||
-- stream, an EOF token is returned.
|
||||
----------------------------------------------------------------------
|
||||
function lexer :next (n)
|
||||
n = n or 1
|
||||
self :peek (n)
|
||||
local a
|
||||
for i=1,n do
|
||||
a = table.remove (self.peeked, 1)
|
||||
-- TODO: is this used anywhere? I think not. a.lineinfo.last may be nil.
|
||||
--self.lastline = a.lineinfo.last.line
|
||||
end
|
||||
self.lineinfo_last_consumed = a.lineinfo.last
|
||||
return a
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Returns an object which saves the stream's current state.
|
||||
----------------------------------------------------------------------
|
||||
-- FIXME there are more fields than that to save
|
||||
function lexer :save () return { self.i; {unpack(self.peeked) } } end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Restore the stream's state, as saved by method [save].
|
||||
----------------------------------------------------------------------
|
||||
-- FIXME there are more fields than that to restore
|
||||
function lexer :restore (s) self.i=s[1]; self.peeked=s[2] end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Resynchronize: cancel any token in self.peeked, by emptying the
|
||||
-- list and resetting the indexes
|
||||
----------------------------------------------------------------------
|
||||
function lexer :sync()
|
||||
local p1 = self.peeked[1]
|
||||
if p1 then
|
||||
local li_first = p1.lineinfo.first
|
||||
if li_first.comments then li_first=li_first.comments.lineinfo.first end
|
||||
self.i = li_first.offset
|
||||
self.column_offset = self.i - li_first.column
|
||||
self.peeked = { }
|
||||
self.attached_comments = p1.lineinfo.first.comments or { }
|
||||
end
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Take the source and offset of an old lexer.
|
||||
----------------------------------------------------------------------
|
||||
function lexer :takeover(old)
|
||||
self :sync(); old :sync()
|
||||
for _, field in ipairs{ 'i', 'src', 'attached_comments', 'posfact' } do
|
||||
self[field] = old[field]
|
||||
end
|
||||
return self
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Return the current position in the sources. This position is between
|
||||
-- two tokens, and can be within a space / comment area, and therefore
|
||||
-- have a non-null width. :lineinfo_left() returns the beginning of the
|
||||
-- separation area, :lineinfo_right() returns the end of that area.
|
||||
--
|
||||
-- ____ last consummed token ____ first unconsummed token
|
||||
-- / /
|
||||
-- XXXXX <spaces and comments> YYYYY
|
||||
-- \____ \____
|
||||
-- :lineinfo_left() :lineinfo_right()
|
||||
----------------------------------------------------------------------
|
||||
function lexer :lineinfo_right()
|
||||
return self :peek(1).lineinfo.first
|
||||
end
|
||||
|
||||
function lexer :lineinfo_left()
|
||||
return self.lineinfo_last_consumed
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Create a new lexstream.
|
||||
----------------------------------------------------------------------
|
||||
function lexer :newstream (src_or_stream, name)
|
||||
name = name or "?"
|
||||
if type(src_or_stream)=='table' then -- it's a stream
|
||||
return setmetatable ({ }, self) :takeover (src_or_stream)
|
||||
elseif type(src_or_stream)=='string' then -- it's a source string
|
||||
local src = src_or_stream
|
||||
local pos1 = M.new_position(1, 1, 1, name)
|
||||
local stream = {
|
||||
src_name = name; -- Name of the file
|
||||
src = src; -- The source, as a single string
|
||||
peeked = { }; -- Already peeked, but not discarded yet, tokens
|
||||
i = 1; -- Character offset in src
|
||||
attached_comments = { },-- comments accumulator
|
||||
lineinfo_last_extracted = pos1,
|
||||
lineinfo_last_consumed = pos1,
|
||||
posfact = M.new_position_factory (src_or_stream, name)
|
||||
}
|
||||
setmetatable (stream, self)
|
||||
|
||||
-- Skip initial sharp-bang for Unix scripts
|
||||
-- FIXME: redundant with mlp.chunk()
|
||||
if src and src :match "^#!" then
|
||||
local endofline = src :find "\n"
|
||||
stream.i = endofline and (endofline + 1) or #src
|
||||
end
|
||||
return stream
|
||||
else
|
||||
assert(false, ":newstream() takes a source string or a stream, not a "..
|
||||
type(src_or_stream))
|
||||
end
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- If there's no ... args, return the token a (whose truth value is
|
||||
-- true) if it's a `Keyword{ }, or nil. If there are ... args, they
|
||||
-- have to be strings. if the token a is a keyword, and it's content
|
||||
-- is one of the ... args, then returns it (it's truth value is
|
||||
-- true). If no a keyword or not in ..., return nil.
|
||||
----------------------------------------------------------------------
|
||||
function lexer :is_keyword (a, ...)
|
||||
if not a or a.tag ~= "Keyword" then return false end
|
||||
local words = {...}
|
||||
if #words == 0 then return a[1] end
|
||||
for _, w in ipairs (words) do
|
||||
if w == a[1] then return w end
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Cause an error if the next token isn't a keyword whose content
|
||||
-- is listed among ... args (which have to be strings).
|
||||
----------------------------------------------------------------------
|
||||
function lexer :check (...)
|
||||
local words = {...}
|
||||
local a = self :next()
|
||||
local function err ()
|
||||
error ("Got " .. tostring (a) ..
|
||||
", expected one of these keywords : '" ..
|
||||
table.concat (words,"', '") .. "'") end
|
||||
if not a or a.tag ~= "Keyword" then err () end
|
||||
if #words == 0 then return a[1] end
|
||||
for _, w in ipairs (words) do
|
||||
if w == a[1] then return w end
|
||||
end
|
||||
err ()
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
--
|
||||
----------------------------------------------------------------------
|
||||
function lexer :clone()
|
||||
local alpha_clone, sym_clone = { }, { }
|
||||
for word in pairs(self.alpha) do alpha_clone[word]=true end
|
||||
for letter, list in pairs(self.sym) do sym_clone[letter] = { unpack(list) } end
|
||||
local clone = { alpha=alpha_clone, sym=sym_clone }
|
||||
setmetatable(clone, self)
|
||||
clone.__index = clone
|
||||
return clone
|
||||
end
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Cancel everything left in a lexer, all subsequent attempts at
|
||||
-- `:peek()` or `:next()` will return `Eof`.
|
||||
----------------------------------------------------------------------
|
||||
function lexer :kill()
|
||||
self.i = #self.src+1
|
||||
self.peeked = { }
|
||||
self.attached_comments = { }
|
||||
self.lineinfo_last = self.posfact :get_position (#self.src+1)
|
||||
end
|
||||
|
||||
return M
|
@ -0,0 +1,295 @@
|
||||
-------------------------------------------------------------------------------
|
||||
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
|
||||
--
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This program and the accompanying materials are made available
|
||||
-- under the terms of the Eclipse Public License v1.0 which
|
||||
-- accompanies this distribution, and is available at
|
||||
-- http://www.eclipse.org/legal/epl-v10.html
|
||||
--
|
||||
-- This program and the accompanying materials are also made available
|
||||
-- under the terms of the MIT public license which accompanies this
|
||||
-- distribution, and is available at http://www.lua.org/license.html
|
||||
--
|
||||
-- Contributors:
|
||||
-- Fabien Fleutot - API and implementation
|
||||
--
|
||||
----------------------------------------------------------------------
|
||||
|
||||
----------------------------------------------------------------------
|
||||
----------------------------------------------------------------------
|
||||
--
|
||||
-- Lua objects pretty-printer
|
||||
--
|
||||
----------------------------------------------------------------------
|
||||
----------------------------------------------------------------------
|
||||
|
||||
local M = { }
|
||||
|
||||
M.DEFAULT_CFG = {
|
||||
hide_hash = false; -- Print the non-array part of tables?
|
||||
metalua_tag = true; -- Use Metalua's backtick syntax sugar?
|
||||
fix_indent = nil; -- If a number, number of indentation spaces;
|
||||
-- If false, indent to the previous brace.
|
||||
line_max = nil; -- If a number, tries to avoid making lines with
|
||||
-- more than this number of chars.
|
||||
initial_indent = 0; -- If a number, starts at this level of indentation
|
||||
keywords = { }; -- Set of keywords which must not use Lua's field
|
||||
-- shortcuts {["foo"]=...} -> {foo=...}
|
||||
}
|
||||
|
||||
local function valid_id(cfg, x)
|
||||
if type(x) ~= "string" then return false end
|
||||
if not x:match "^[a-zA-Z_][a-zA-Z0-9_]*$" then return false end
|
||||
if cfg.keywords and cfg.keywords[x] then return false end
|
||||
return true
|
||||
end
|
||||
|
||||
local __tostring_cache = setmetatable({ }, {__mode='k'})
|
||||
|
||||
-- Retrieve the string produced by `__tostring` metamethod if present,
|
||||
-- return `false` otherwise. Cached in `__tostring_cache`.
|
||||
local function __tostring(x)
|
||||
local the_string = __tostring_cache[x]
|
||||
if the_string~=nil then return the_string end
|
||||
local mt = getmetatable(x)
|
||||
if mt then
|
||||
local __tostring = mt.__tostring
|
||||
if __tostring then
|
||||
the_string = __tostring(x)
|
||||
__tostring_cache[x] = the_string
|
||||
return the_string
|
||||
end
|
||||
end
|
||||
if x~=nil then __tostring_cache[x] = false end -- nil is an illegal key
|
||||
return false
|
||||
end
|
||||
|
||||
local xlen -- mutually recursive with `xlen_type`
|
||||
|
||||
local xlen_cache = setmetatable({ }, {__mode='k'})
|
||||
|
||||
-- Helpers for the `xlen` function
|
||||
local xlen_type = {
|
||||
["nil"] = function ( ) return 3 end;
|
||||
number = function (x) return #tostring(x) end;
|
||||
boolean = function (x) return x and 4 or 5 end;
|
||||
string = function (x) return #string.format("%q",x) end;
|
||||
}
|
||||
|
||||
function xlen_type.table (adt, cfg, nested)
|
||||
local custom_string = __tostring(adt)
|
||||
if custom_string then return #custom_string end
|
||||
|
||||
-- Circular referenced objects are printed with the plain
|
||||
-- `tostring` function in nested positions.
|
||||
if nested [adt] then return #tostring(adt) end
|
||||
nested [adt] = true
|
||||
|
||||
local has_tag = cfg.metalua_tag and valid_id(cfg, adt.tag)
|
||||
local alen = #adt
|
||||
local has_arr = alen>0
|
||||
local has_hash = false
|
||||
local x = 0
|
||||
|
||||
if not cfg.hide_hash then
|
||||
-- first pass: count hash-part
|
||||
for k, v in pairs(adt) do
|
||||
if k=="tag" and has_tag then
|
||||
-- this is the tag -> do nothing!
|
||||
elseif type(k)=="number" and k<=alen and math.fmod(k,1)==0 and k>0 then
|
||||
-- array-part pair -> do nothing!
|
||||
else
|
||||
has_hash = true
|
||||
if valid_id(cfg, k) then x=x+#k
|
||||
else x = x + xlen (k, cfg, nested) + 2 end -- count surrounding brackets
|
||||
x = x + xlen (v, cfg, nested) + 5 -- count " = " and ", "
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for i = 1, alen do x = x + xlen (adt[i], nested) + 2 end -- count ", "
|
||||
|
||||
nested[adt] = false -- No more nested calls
|
||||
|
||||
if not (has_tag or has_arr or has_hash) then return 3 end
|
||||
if has_tag then x=x+#adt.tag+1 end
|
||||
if not (has_arr or has_hash) then return x end
|
||||
if not has_hash and alen==1 and type(adt[1])~="table" then
|
||||
return x-2 -- substract extraneous ", "
|
||||
end
|
||||
return x+2 -- count "{ " and " }", substract extraneous ", "
|
||||
end
|
||||
|
||||
|
||||
-- Compute the number of chars it would require to display the table
|
||||
-- on a single line. Helps to decide whether some carriage returns are
|
||||
-- required. Since the size of each sub-table is required many times,
|
||||
-- it's cached in [xlen_cache].
|
||||
xlen = function (x, cfg, nested)
|
||||
-- no need to compute length for 1-line prints
|
||||
if not cfg.line_max then return 0 end
|
||||
nested = nested or { }
|
||||
if x==nil then return #"nil" end
|
||||
local len = xlen_cache[x]
|
||||
if len then return len end
|
||||
local f = xlen_type[type(x)]
|
||||
if not f then return #tostring(x) end
|
||||
len = f (x, cfg, nested)
|
||||
xlen_cache[x] = len
|
||||
return len
|
||||
end
|
||||
|
||||
local function consider_newline(p, len)
|
||||
if not p.cfg.line_max then return end
|
||||
if p.current_offset + len <= p.cfg.line_max then return end
|
||||
if p.indent < p.current_offset then
|
||||
p:acc "\n"; p:acc ((" "):rep(p.indent))
|
||||
p.current_offset = p.indent
|
||||
end
|
||||
end
|
||||
|
||||
local acc_value
|
||||
|
||||
local acc_type = {
|
||||
["nil"] = function(p) p:acc("nil") end;
|
||||
number = function(p, adt) p:acc (tostring (adt)) end;
|
||||
string = function(p, adt) p:acc ((string.format ("%q", adt):gsub("\\\n", "\\n"))) end;
|
||||
boolean = function(p, adt) p:acc (adt and "true" or "false") end }
|
||||
|
||||
-- Indentation:
|
||||
-- * if `cfg.fix_indent` is set to a number:
|
||||
-- * add this number of space for each level of depth
|
||||
-- * return to the line as soon as it flushes things further left
|
||||
-- * if not, tabulate to one space after the opening brace.
|
||||
-- * as a result, it never saves right-space to return before first element
|
||||
|
||||
function acc_type.table(p, adt)
|
||||
if p.nested[adt] then p:acc(tostring(adt)); return end
|
||||
p.nested[adt] = true
|
||||
|
||||
local has_tag = p.cfg.metalua_tag and valid_id(p.cfg, adt.tag)
|
||||
local alen = #adt
|
||||
local has_arr = alen>0
|
||||
local has_hash = false
|
||||
|
||||
local previous_indent = p.indent
|
||||
|
||||
if has_tag then p:acc("`"); p:acc(adt.tag) end
|
||||
|
||||
local function indent(p)
|
||||
if not p.cfg.fix_indent then p.indent = p.current_offset
|
||||
else p.indent = p.indent + p.cfg.fix_indent end
|
||||
end
|
||||
|
||||
-- First pass: handle hash-part
|
||||
if not p.cfg.hide_hash then
|
||||
for k, v in pairs(adt) do
|
||||
|
||||
if has_tag and k=='tag' then -- pass the 'tag' field
|
||||
elseif type(k)=="number" and k<=alen and k>0 and math.fmod(k,1)==0 then
|
||||
-- pass array-part keys (consecutive ints less than `#adt`)
|
||||
else -- hash-part keys
|
||||
if has_hash then p:acc ", " else -- 1st hash-part pair ever found
|
||||
p:acc "{ "; indent(p)
|
||||
end
|
||||
|
||||
-- Determine whether a newline is required
|
||||
local is_id, expected_len=valid_id(p.cfg, k)
|
||||
if is_id then expected_len=#k+xlen(v, p.cfg, p.nested)+#" = , "
|
||||
else expected_len = xlen(k, p.cfg, p.nested)+xlen(v, p.cfg, p.nested)+#"[] = , " end
|
||||
consider_newline(p, expected_len)
|
||||
|
||||
-- Print the key
|
||||
if is_id then p:acc(k); p:acc " = " else
|
||||
p:acc "["; acc_value (p, k); p:acc "] = "
|
||||
end
|
||||
|
||||
acc_value (p, v) -- Print the value
|
||||
has_hash = true
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
-- Now we know whether there's a hash-part, an array-part, and a tag.
|
||||
-- Tag and hash-part are already printed if they're present.
|
||||
if not has_tag and not has_hash and not has_arr then p:acc "{ }";
|
||||
elseif has_tag and not has_hash and not has_arr then -- nothing, tag already in acc
|
||||
else
|
||||
assert (has_hash or has_arr) -- special case { } already handled
|
||||
local no_brace = false
|
||||
if has_hash and has_arr then p:acc ", "
|
||||
elseif has_tag and not has_hash and alen==1 and type(adt[1])~="table" then
|
||||
-- No brace required; don't print "{", remember not to print "}"
|
||||
p:acc (" "); acc_value (p, adt[1]) -- indent= indent+(cfg.fix_indent or 0))
|
||||
no_brace = true
|
||||
elseif not has_hash then
|
||||
-- Braces required, but not opened by hash-part handler yet
|
||||
p:acc "{ "; indent(p)
|
||||
end
|
||||
|
||||
-- 2nd pass: array-part
|
||||
if not no_brace and has_arr then
|
||||
local expected_len = xlen(adt[1], p.cfg, p.nested)
|
||||
consider_newline(p, expected_len)
|
||||
acc_value(p, adt[1]) -- indent+(cfg.fix_indent or 0)
|
||||
for i=2, alen do
|
||||
p:acc ", ";
|
||||
consider_newline(p, xlen(adt[i], p.cfg, p.nested))
|
||||
acc_value (p, adt[i]) --indent+(cfg.fix_indent or 0)
|
||||
end
|
||||
end
|
||||
if not no_brace then p:acc " }" end
|
||||
end
|
||||
p.nested[adt] = false -- No more nested calls
|
||||
p.indent = previous_indent
|
||||
end
|
||||
|
||||
|
||||
function acc_value(p, v)
|
||||
local custom_string = __tostring(v)
|
||||
if custom_string then p:acc(custom_string) else
|
||||
local f = acc_type[type(v)]
|
||||
if f then f(p, v) else p:acc(tostring(v)) end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
-- FIXME: new_indent seems to be always nil?!s detection
|
||||
-- FIXME: accumulator function should be configurable,
|
||||
-- so that print() doesn't need to bufferize the whole string
|
||||
-- before starting to print.
|
||||
function M.tostring(t, cfg)
|
||||
|
||||
cfg = cfg or M.DEFAULT_CFG or { }
|
||||
|
||||
local p = {
|
||||
cfg = cfg;
|
||||
indent = 0;
|
||||
current_offset = cfg.initial_indent or 0;
|
||||
buffer = { };
|
||||
nested = { };
|
||||
acc = function(self, str)
|
||||
table.insert(self.buffer, str)
|
||||
self.current_offset = self.current_offset + #str
|
||||
end;
|
||||
}
|
||||
acc_value(p, t)
|
||||
return table.concat(p.buffer)
|
||||
end
|
||||
|
||||
function M.print(...) return print(M.tostring(...)) end
|
||||
function M.sprintf(fmt, ...)
|
||||
local args={...}
|
||||
for i, v in pairs(args) do
|
||||
local t=type(v)
|
||||
if t=='table' then args[i]=M.tostring(v)
|
||||
elseif t=='nil' then args[i]='nil' end
|
||||
end
|
||||
return string.format(fmt, unpack(args))
|
||||
end
|
||||
|
||||
function M.printf(...) print(M.sprintf(...)) end
|
||||
|
||||
return M
|
Reference in New Issue
Block a user