alunizaje/StartGamedev-160604-osx/tools/zbstudio.app/Contents/ZeroBraneStudio/lualibs/metalua/grammar/lexer.lua

-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
--     Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------

require 'checks'

local M = { }

local lexer = { alpha={ }, sym={ } }
lexer.__index=lexer
lexer.__type='lexer.stream'

M.lexer = lexer


local debugf = function() end
-- local debugf=printf

----------------------------------------------------------------------
-- Some locale settings produce bad results, e.g. French locale
-- expect float numbers to use commas instead of periods.
-- TODO: change number parser into something loclae-independent,
-- locales are nasty.
----------------------------------------------------------------------
os.setlocale('C')

local MT = { }

M.metatables=MT

----------------------------------------------------------------------
-- Create a new metatable, for a new class of objects.
----------------------------------------------------------------------
local function new_metatable(name)
    local mt = { __type = 'lexer.'..name };
    mt.__index = mt
    MT[name] = mt
end


----------------------------------------------------------------------
-- Position: represent a point in a source file.
----------------------------------------------------------------------
new_metatable 'position'

local position_idx=1

function M.new_position(line, column, offset, source)
    checks('number', 'number', 'number', 'string')
    local id = position_idx; position_idx = position_idx+1
    return setmetatable({line=line, column=column, offset=offset,
                         source=source, id=id}, MT.position)
end

function MT.position :__tostring()
    return string.format("<%s%s|L%d|C%d|K%d>",
        self.comments and "C|" or "",
        self.source, self.line, self.column, self.offset)
end


----------------------------------------------------------------------
-- Position factory: convert offsets into line/column/offset positions.
----------------------------------------------------------------------
new_metatable 'position_factory'

function M.new_position_factory(src, src_name)
    -- assert(type(src)=='string')
    -- assert(type(src_name)=='string')
    local lines = { 1 }
    for offset in src :gmatch '\n()' do table.insert(lines, offset) end
    local max = #src+1
    table.insert(lines, max+1) -- +1 includes Eof
    return setmetatable({ src_name=src_name, line2offset=lines, max=max },
        MT.position_factory)
end

function MT.position_factory :get_position (offset)
    -- assert(type(offset)=='number')
    assert(offset<=self.max)
    local line2offset = self.line2offset
    local left  = self.last_left or 1
    if offset<line2offset[left] then left=1 end
    local right = left+1
    if line2offset[right]<=offset then right = right+1 end
    if line2offset[right]<=offset then right = #line2offset end
    while true do
        -- print ("  trying lines "..left.."/"..right..", offsets "..line2offset[left]..
        --        "/"..line2offset[right].." for offset "..offset)
        -- assert(line2offset[left]<=offset)
        -- assert(offset<line2offset[right])
        -- assert(left<right)
        if left+1==right then break end
        local middle = math.floor((left+right)/2)
        if line2offset[middle]<=offset then left=middle else right=middle end
    end
    -- assert(left+1==right)
    -- printf("found that offset %d is between %d and %d, hence on line %d",
    --    offset, line2offset[left], line2offset[right], left)
    local line = left
    local column = offset - line2offset[line] + 1
    self.last_left = left
    return M.new_position(line, column, offset, self.src_name)
end


----------------------------------------------------------------------
-- Lineinfo: represent a node's range in a source file;
-- embed information about prefix and suffix comments.
----------------------------------------------------------------------
new_metatable 'lineinfo'

function M.new_lineinfo(first, last)
    checks('lexer.position', 'lexer.position')
    return setmetatable({first=first, last=last}, MT.lineinfo)
end

function MT.lineinfo :__tostring()
    local fli, lli = self.first, self.last
    local line   = fli.line;   if line~=lli.line     then line  =line  ..'-'..lli.line   end
    local column = fli.column; if column~=lli.column then column=column..'-'..lli.column end
    local offset = fli.offset; if offset~=lli.offset then offset=offset..'-'..lli.offset end
    return string.format("<%s%s|L%s|C%s|K%s%s>",
                         fli.comments and "C|" or "",
                         fli.source, line, column, offset,
                         lli.comments and "|C" or "")
end

----------------------------------------------------------------------
-- Token: atomic Lua language element, with a category, a content,
-- and some lineinfo relating it to its original source.
----------------------------------------------------------------------
new_metatable 'token'

function M.new_token(tag, content, lineinfo)
    --printf("TOKEN `%s{ %q, lineinfo = %s} boundaries %d, %d",
    --       tag, content, tostring(lineinfo), lineinfo.first.id, lineinfo.last.id)
    return setmetatable({tag=tag, lineinfo=lineinfo, content}, MT.token)
end

function MT.token :__tostring()
    --return string.format("`%s{ %q, %s }", self.tag, self[1], tostring(self.lineinfo))
    return string.format("`%s %q", self.tag, self[1])
end


----------------------------------------------------------------------
-- Comment: series of comment blocks with associated lineinfo.
-- To be attached to the tokens just before and just after them.
----------------------------------------------------------------------
new_metatable 'comment'

function M.new_comment(lines)
    local first = lines[1].lineinfo.first
    local last  = lines[#lines].lineinfo.last
    local lineinfo = M.new_lineinfo(first, last)
    return setmetatable({lineinfo=lineinfo, unpack(lines)}, MT.comment)
end

function MT.comment :text()
    local last_line = self[1].lineinfo.last.line
    local acc = { }
    for i, line in ipairs(self) do
        local nreturns = line.lineinfo.first.line - last_line
        table.insert(acc, ("\n"):rep(nreturns))
        table.insert(acc, line[1])
    end
    return table.concat(acc)
end

function M.new_comment_line(text, lineinfo, nequals)
    checks('string', 'lexer.lineinfo', '?number')
    return { lineinfo = lineinfo, text, nequals }
end


----------------------------------------------------------------------
-- Patterns used by [lexer :extract] to decompose the raw string into
-- correctly tagged tokens.
----------------------------------------------------------------------
lexer.patterns = {
   spaces              = "^[ \r\n\t]*()",
   short_comment       = "^%-%-([^\n]*)\n?()",
   --final_short_comment = "^%-%-([^\n]*)()$",
   long_comment        = "^%-%-%[(=*)%[\n?(.-)%]%1%]()",
   long_string         = "^%[(=*)%[\n?(.-)%]%1%]()",
   number_longint      = "^%d+[uU]?[lL][lL]()",
   number_longint_hex  = "^%x+[uU]?[lL][lL]()",
   number_mantissa     = { "^%d+%.?%d*()", "^%d*%.%d+()" },
   number_mantissa_hex = { "^%x+%.?%x*()", "^%x*%.%x+()" }, --Lua5.1 and Lua5.2
   number_exponent     = "^[eE][%+%-]?%d+()",
   number_exponent_hex = "^[pP][%+%-]?%d+()", --Lua5.2
   number_hex          = "^0[xX]()",
   number_imaginary    = "^[iI]()",
   word                = "^([%a_][%w_]*)()",
}

----------------------------------------------------------------------
-- unescape a whole string, applying [unesc_digits] and
-- [unesc_letter] as many times as required.
----------------------------------------------------------------------
local function unescape_string (s)

   -- Turn the digits of an escape sequence into the corresponding
   -- character, e.g. [unesc_digits("123") == string.char(123)].
   local function unesc_digits (backslashes, digits)
      if #backslashes%2==0 then
         -- Even number of backslashes, they escape each other, not the digits.
         -- Return them so that unesc_letter() can treat them
         return backslashes..digits
      else
         -- Remove the odd backslash, which escapes the number sequence.
         -- The rest will be returned and parsed by unesc_letter()
         backslashes = backslashes :sub (1,-2)
      end
      local k, j, i = digits :reverse() :byte(1, 3)
      local z = string.byte "0"
      local code = (k or z) + 10*(j or z) + 100*(i or z) - 111*z
      if code > 255 then
         error ("Illegal escape sequence '\\"..digits..
                "' in string: ASCII codes must be in [0..255]")
      end
      local c = string.char (code)
      if c == '\\' then c = '\\\\' end -- parsed by unesc_letter (test: "\092b" --> "\\b")
      return backslashes..c
   end

   -- Turn hex digits of escape sequence into char.
   local function unesc_hex(backslashes, digits)
     if #backslashes%2==0 then
       return backslashes..'x'..digits
     else
       backslashes = backslashes :sub (1,-2)
     end
     local c = string.char(tonumber(digits,16))
     if c == '\\' then c = '\\\\' end -- parsed by unesc_letter (test: "\x5cb" --> "\\b")
     return backslashes..c
   end

   -- Handle Lua 5.2 \z sequences
   local function unesc_z(backslashes, more)
     if #backslashes%2==0 then
       return backslashes..more
     else
       return backslashes :sub (1,-2)
     end
   end

   -- Take a letter [x], and returns the character represented by the
   -- sequence ['\\'..x], e.g. [unesc_letter "n" == "\n"].
   local function unesc_letter(x)
      local t = {
         a = "\a", b = "\b", f = "\f",
         n = "\n", r = "\r", t = "\t", v = "\v",
         ["\\"] = "\\", ["'"] = "'", ['"'] = '"', ["\n"] = "\n" }
      return t[x] or x
   end

   s = s: gsub ("(\\+)(z%s*)", unesc_z)  -- Lua 5.2
   s = s: gsub ("(\\+)([0-9][0-9]?[0-9]?)", unesc_digits)
   s = s: gsub ("(\\+)x([0-9a-fA-F][0-9a-fA-F])", unesc_hex) -- Lua 5.2
   s = s: gsub ("\\(%D)",unesc_letter)
   return s
end

lexer.extractors = {
   "extract_long_comment", "extract_short_comment",
   "extract_short_string", "extract_word", "extract_number",
   "extract_long_string", "extract_symbol" }


----------------------------------------------------------------------
-- Really extract next token from the raw string
-- (and update the index).
-- loc: offset of the position just after spaces and comments
-- previous_i: offset in src before extraction began
----------------------------------------------------------------------
function lexer :extract ()
   local attached_comments = { }
   local function gen_token(...)
      local token = M.new_token(...)
      if #attached_comments>0 then -- attach previous comments to token
         local comments = M.new_comment(attached_comments)
         token.lineinfo.first.comments = comments
         if self.lineinfo_last_extracted then
            self.lineinfo_last_extracted.comments = comments
         end
         attached_comments = { }
      end
      token.lineinfo.first.facing = self.lineinfo_last_extracted
      self.lineinfo_last_extracted.facing = assert(token.lineinfo.first)
      self.lineinfo_last_extracted = assert(token.lineinfo.last)
      return token
   end
   while true do -- loop until a non-comment token is found

       -- skip whitespaces
       self.i = self.src:match (self.patterns.spaces, self.i)
       if self.i>#self.src then
         local fli = self.posfact :get_position (#self.src+1)
         local lli = self.posfact :get_position (#self.src+1) -- ok?
         local tok = gen_token("Eof", "eof", M.new_lineinfo(fli, lli))
         tok.lineinfo.last.facing = lli
         return tok
       end
       local i_first = self.i -- loc = position after whitespaces

       -- try every extractor until a token is found
       for _, extractor in ipairs(self.extractors) do
           local tag, content, xtra = self [extractor] (self)
           if tag then
               local fli = self.posfact :get_position (i_first)
               local lli = self.posfact :get_position (self.i-1)
               local lineinfo = M.new_lineinfo(fli, lli)
               if tag=='Comment' then
                   local prev_comment = attached_comments[#attached_comments]
                   if not xtra -- new comment is short
                   and prev_comment and not prev_comment[2] -- prev comment is short
                   and prev_comment.lineinfo.last.line+1==fli.line then -- adjascent lines
                       -- concat with previous comment
                       prev_comment[1] = prev_comment[1].."\n"..content -- TODO quadratic, BAD!
                       prev_comment.lineinfo.last = lli
                   else -- accumulate comment
                       local comment = M.new_comment_line(content, lineinfo, xtra)
                       table.insert(attached_comments, comment)
                   end
                   break -- back to skipping spaces
               else -- not a comment: real token, then
                   return gen_token(tag, content, lineinfo)
               end -- if token is a comment
           end -- if token found
       end -- for each extractor
   end -- while token is a comment
end -- :extract()


----------------------------------------------------------------------
-- Extract a short comment.
----------------------------------------------------------------------
function lexer :extract_short_comment()
    -- TODO: handle final_short_comment
    local content, j = self.src :match (self.patterns.short_comment, self.i)
    if content then self.i=j; return 'Comment', content, nil end
end

----------------------------------------------------------------------
-- Extract a long comment.
----------------------------------------------------------------------
function lexer :extract_long_comment()
    local equals, content, j = self.src:match (self.patterns.long_comment, self.i)
    if j then self.i = j; return "Comment", content, #equals end
end

----------------------------------------------------------------------
-- Extract a '...' or "..." short string.
----------------------------------------------------------------------
function lexer :extract_short_string()
   local k = self.src :sub (self.i,self.i)   -- first char
   if k~=[[']] and k~=[["]] then return end  -- no match'
   local i = self.i + 1
   local j = i
   while true do
      local x,y; x, j, y = self.src :match ("([\\\r\n"..k.."])()(.?)", j)  -- next interesting char
      if x == '\\' then
        if y == 'z' then -- Lua 5.2 \z
          j = self.src :match ("^%s*()", j+1)
        else
          j=j+1  -- escaped char
        end
      elseif x == k then break -- end of string
      else
         assert (not x or x=='\r' or x=='\n')
         return nil, 'Unterminated string'
      end
   end
   self.i = j

   return 'String', unescape_string (self.src :sub (i,j-2))
end

----------------------------------------------------------------------
-- Extract Id or Keyword.
----------------------------------------------------------------------
function lexer :extract_word()
   local word, j = self.src:match (self.patterns.word, self.i)
   if word then
      self.i = j
      return (self.alpha [word] and 'Keyword' or 'Id'), word
   end
end

----------------------------------------------------------------------
-- Extract Number.
----------------------------------------------------------------------
function lexer :extract_number()
   local patt = self.patterns
   local s = self.src
   local j = s:match(patt.number_hex, self.i)
   local hex = j ~= nil
   local longint = hex and patt.number_longint_hex or patt.number_longint
   local mantissa1 = hex and patt.number_mantissa_hex[1] or patt.number_mantissa[1]
   local mantissa2 = hex and patt.number_mantissa_hex[2] or patt.number_mantissa[2]
   local exponent = hex and patt.number_exponent_hex or patt.number_exponent
   if not hex then j = self.i end

   local t = s:match(longint, j)
   if t then
     j = t
   else
     j = s:match(mantissa1, j) or s:match(mantissa2, j)
     if not j then return end
     j = s:match(exponent, j) or j
     j = s:match(patt.number_imaginary, j) or j
   end

   local str = self.src:sub (self.i, j-1)
   self.i = j
   -- Number found, interpret with tonumber() and return it
   -- return str as the fallback when processing formats not supported by the current interpreter
   return 'Number', (tonumber (str) or str)
end

----------------------------------------------------------------------
-- Extract long string.
----------------------------------------------------------------------
function lexer :extract_long_string()
   local _, content, j = self.src :match (self.patterns.long_string, self.i)
   if j then self.i = j; return 'String', content end
end

----------------------------------------------------------------------
-- Extract symbol.
----------------------------------------------------------------------
function lexer :extract_symbol()
   local k = self.src:sub (self.i,self.i)
   local symk = self.sym [k]  -- symbols starting with `k`
   if not symk then
      self.i = self.i + 1
      return 'Keyword', k
   end
   for _, sym in pairs (symk) do
      if sym == self.src:sub (self.i, self.i + #sym - 1) then
         self.i = self.i + #sym
         return 'Keyword', sym
      end
   end
   self.i = self.i+1
   return 'Keyword', k
end

----------------------------------------------------------------------
-- Add a keyword to the list of keywords recognized by the lexer.
----------------------------------------------------------------------
function lexer :add (w, ...)
   assert(not ..., "lexer :add() takes only one arg, although possibly a table")
   if type (w) == "table" then
      for _, x in ipairs (w) do self :add (x) end
   else
      if w:match (self.patterns.word .. "$") then self.alpha [w] = true
      elseif w:match "^%p%p+$" then
         local k = w:sub(1,1)
         local list = self.sym [k]
         if not list then list = { }; self.sym [k] = list end
         table.insert (list, w)
      elseif w:match "^%p$" then return
      else error "Invalid keyword" end
   end
end

----------------------------------------------------------------------
-- Return the [n]th next token, without consuming it.
-- [n] defaults to 1. If it goes pass the end of the stream, an EOF
-- token is returned.
----------------------------------------------------------------------
function lexer :peek (n)
    if not n then n=1 end
    if n > #self.peeked then
        for i = #self.peeked+1, n do
            self.peeked [i] = self :extract()
        end
    end
    return self.peeked [n]
end

----------------------------------------------------------------------
-- Return the [n]th next token, removing it as well as the 0..n-1
-- previous tokens. [n] defaults to 1. If it goes pass the end of the
-- stream, an EOF token is returned.
----------------------------------------------------------------------
function lexer :next (n)
   n = n or 1
   self :peek (n)
   local a
   for i=1,n do
      a = table.remove (self.peeked, 1)
      -- TODO: is this used anywhere? I think not.  a.lineinfo.last may be nil.
      --self.lastline = a.lineinfo.last.line
   end
   self.lineinfo_last_consumed = a.lineinfo.last
   return a
end

----------------------------------------------------------------------
-- Returns an object which saves the stream's current state.
----------------------------------------------------------------------
-- FIXME there are more fields than that to save
function lexer :save () return { self.i; {unpack(self.peeked) } } end

----------------------------------------------------------------------
-- Restore the stream's state, as saved by method [save].
----------------------------------------------------------------------
-- FIXME there are more fields than that to restore
function lexer :restore (s) self.i=s[1]; self.peeked=s[2] end

----------------------------------------------------------------------
-- Resynchronize: cancel any token in self.peeked, by emptying the
-- list and resetting the indexes
----------------------------------------------------------------------
function lexer :sync()
   local p1 = self.peeked[1]
   if p1 then
      local li_first = p1.lineinfo.first
      if li_first.comments then li_first=li_first.comments.lineinfo.first end
      self.i = li_first.offset
      self.column_offset = self.i - li_first.column
      self.peeked = { }
      self.attached_comments = p1.lineinfo.first.comments or { }
   end
end

----------------------------------------------------------------------
-- Take the source and offset of an old lexer.
----------------------------------------------------------------------
function lexer :takeover(old)
   self :sync(); old :sync()
   for _, field in ipairs{ 'i', 'src', 'attached_comments', 'posfact' } do
       self[field] = old[field]
   end
   return self
end

----------------------------------------------------------------------
-- Return the current position in the sources. This position is between
-- two tokens, and can be within a space / comment area, and therefore
-- have a non-null width. :lineinfo_left() returns the beginning of the
-- separation area, :lineinfo_right() returns the end of that area.
--
--    ____ last consummed token    ____ first unconsummed token
--   /                            /
-- XXXXX  <spaces and comments> YYYYY
--      \____                    \____
--           :lineinfo_left()         :lineinfo_right()
----------------------------------------------------------------------
function lexer :lineinfo_right()
   return self :peek(1).lineinfo.first
end

function lexer :lineinfo_left()
   return self.lineinfo_last_consumed
end

----------------------------------------------------------------------
-- Create a new lexstream.
----------------------------------------------------------------------
function lexer :newstream (src_or_stream, name)
   name = name or "?"
   if type(src_or_stream)=='table' then -- it's a stream
      return setmetatable ({ }, self) :takeover (src_or_stream)
   elseif type(src_or_stream)=='string' then -- it's a source string
      local src = src_or_stream
      local pos1 = M.new_position(1, 1, 1, name)
      local stream = {
         src_name      = name;   -- Name of the file
         src           = src;    -- The source, as a single string
         peeked        = { };    -- Already peeked, but not discarded yet, tokens
         i             = 1;      -- Character offset in src
         attached_comments = { },-- comments accumulator
         lineinfo_last_extracted = pos1,
         lineinfo_last_consumed  = pos1,
         posfact       = M.new_position_factory (src_or_stream, name)
      }
      setmetatable (stream, self)

      -- Skip initial sharp-bang for Unix scripts
      -- FIXME: redundant with mlp.chunk()
      if src and src :match "^#!" then
         local endofline = src :find "\n"
         stream.i = endofline and (endofline + 1) or #src
      end
      return stream
   else
      assert(false, ":newstream() takes a source string or a stream, not a "..
          type(src_or_stream))
   end
end

----------------------------------------------------------------------
-- If there's no ... args, return the token a (whose truth value is
-- true) if it's a `Keyword{ }, or nil.  If there are ... args, they
-- have to be strings. if the token a is a keyword, and it's content
-- is one of the ... args, then returns it (it's truth value is
-- true). If no a keyword or not in ..., return nil.
----------------------------------------------------------------------
function lexer :is_keyword (a, ...)
   if not a or a.tag ~= "Keyword" then return false end
   local words = {...}
   if #words == 0 then return a[1] end
   for _, w in ipairs (words) do
      if w == a[1] then return w end
   end
   return false
end

----------------------------------------------------------------------
-- Cause an error if the next token isn't a keyword whose content
-- is listed among ... args (which have to be strings).
----------------------------------------------------------------------
function lexer :check (...)
   local words = {...}
   local a = self :next()
   local function err ()
      error ("Got " .. tostring (a) ..
             ", expected one of these keywords : '" ..
             table.concat (words,"', '") .. "'") end
   if not a or a.tag ~= "Keyword" then err () end
   if #words == 0 then return a[1] end
   for _, w in ipairs (words) do
       if w == a[1] then return w end
   end
   err ()
end

----------------------------------------------------------------------
--
----------------------------------------------------------------------
function lexer :clone()
    local alpha_clone, sym_clone = { }, { }
   for word in pairs(self.alpha) do alpha_clone[word]=true end
   for letter, list in pairs(self.sym) do sym_clone[letter] = { unpack(list) } end
   local clone = { alpha=alpha_clone, sym=sym_clone }
   setmetatable(clone, self)
   clone.__index = clone
   return clone
end

----------------------------------------------------------------------
-- Cancel everything left in a lexer, all subsequent attempts at
-- `:peek()` or `:next()` will return `Eof`.
----------------------------------------------------------------------
function lexer :kill()
    self.i = #self.src+1
    self.peeked = { }
    self.attached_comments = { }
    self.lineinfo_last = self.posfact :get_position (#self.src+1)
end

return M