316 lines
8.4 KiB
Lua
316 lines
8.4 KiB
Lua
local io = io
|
|
local core = require 'git.core'
|
|
|
|
local assert, pcall, print, select, setmetatable, string, type, unpack =
|
|
assert, pcall, print, select, setmetatable, string, type, unpack
|
|
|
|
local ord = string.byte
|
|
local fmt = string.format
|
|
local concat, insert = table.concat, table.insert
|
|
|
|
local band = core.band
|
|
local rshift, lshift = core.rshift, core.lshift
|
|
|
|
local to_hex = git.util.to_hex
|
|
local from_hex = git.util.from_hex
|
|
local object_sha = git.util.object_sha
|
|
local binary_sha = git.util.binary_sha
|
|
local readable_sha = git.util.readable_sha
|
|
local tmpfile = git.util.tmpfile
|
|
local reader = git.util.reader
|
|
|
|
module(...)
|
|
|
|
-- read git/Documentation/technical/pack-format.txt for some inspiration
|
|
|
|
-- 1 = commit, 2 = tree ...
|
|
local types = {'commit', 'tree', 'blob', 'tag', '???', 'ofs_delta', 'ref_delta'}
|
|
|
|
-- read a 4 byte unsigned integer stored in network order
|
|
local function read_int(f)
|
|
local s = f:read(4)
|
|
local a,b,c,d = s:byte(1,4)
|
|
return a*256^3 + b*256^2 + c*256 + d
|
|
end
|
|
|
|
-- read in the type and file length
|
|
local function read_object_header(f)
|
|
local b = ord(f:read(1))
|
|
local type = band(rshift(b, 4), 0x7)
|
|
local len = band(b, 0xF)
|
|
local ofs = 0
|
|
while band(b, 0x80) ~= 0 do
|
|
b = ord(f:read(1))
|
|
len = len + lshift(band(b, 0x7F), ofs * 7 + 4)
|
|
ofs = ofs + 1
|
|
end
|
|
return len, type
|
|
end
|
|
|
|
-- reads in the delta header and returns the offset where original data is stored
|
|
local function read_delta_header(f)
|
|
local b = ord(f:read(1))
|
|
local offset = band(b, 0x7F)
|
|
while band(b, 0x80) ~= 0 do
|
|
offset = offset + 1
|
|
b = ord(f:read(1))
|
|
offset = lshift(offset, 7) + band(b, 0x7F)
|
|
end
|
|
return offset
|
|
end
|
|
|
|
-- read just enough of file `f` to uncompress `size` bytes
|
|
local function uncompress_by_len(f, size)
|
|
local z = core.inflate()
|
|
local chunks = {}
|
|
local CHUNK_SIZE = 1024
|
|
local curr_pos = f:seek()
|
|
local inflated, eof, total
|
|
-- read until end of zlib-compresed stream
|
|
while not eof do
|
|
local data = f:read(CHUNK_SIZE)
|
|
inflated, eof, total = z(data)
|
|
insert(chunks, inflated)
|
|
end
|
|
-- repair the current position in stream
|
|
f:seek('set', curr_pos + total)
|
|
return concat(chunks)
|
|
end
|
|
|
|
-- uncompress the object from the current location in `f`
|
|
local function unpack_object(f, len, type)
|
|
local data = uncompress_by_len(f, len)
|
|
return data, len, type
|
|
end
|
|
|
|
-- returns a size value encoded in delta data
|
|
local function delta_size(f)
|
|
local size = 0
|
|
local i = 0
|
|
repeat
|
|
local b = ord(f:read(1))
|
|
size = size + lshift(band(b, 0x7F), i)
|
|
i = i + 7
|
|
until band(b, 0x80) == 0
|
|
return size
|
|
end
|
|
|
|
-- returns a patched object from string `base` according to `delta` data
|
|
local function patch_object(base, delta, base_type)
|
|
-- insert delta codes into temporary file
|
|
local df = reader(delta)
|
|
|
|
-- retrieve original and result size (for checks)
|
|
local orig_size = delta_size(df)
|
|
assert(#base == orig_size, fmt('#base(%d) ~= orig_size(%d)', #base, orig_size))
|
|
|
|
local result_size = delta_size(df)
|
|
local size = result_size
|
|
|
|
local result = {}
|
|
|
|
-- process the delta codes
|
|
local cmd = df:read(1)
|
|
while cmd do
|
|
cmd = ord(cmd)
|
|
if cmd == 0 then
|
|
error('unexpected delta code 0')
|
|
elseif band(cmd, 0x80) ~= 0 then -- copy a selected part of base data
|
|
local cp_off, cp_size = 0, 0
|
|
-- retrieve offset
|
|
if band(cmd, 0x01) ~= 0 then cp_off = ord(df:read(1)) end
|
|
if band(cmd, 0x02) ~= 0 then cp_off = cp_off + ord(df:read(1))*256 end
|
|
if band(cmd, 0x04) ~= 0 then cp_off = cp_off + ord(df:read(1))*256^2 end
|
|
if band(cmd, 0x08) ~= 0 then cp_off = cp_off + ord(df:read(1))*256^3 end
|
|
-- retrieve size
|
|
if band(cmd, 0x10) ~= 0 then cp_size = ord(df:read(1)) end
|
|
if band(cmd, 0x20) ~= 0 then cp_size = cp_size + ord(df:read(1))*256 end
|
|
if band(cmd, 0x40) ~= 0 then cp_size = cp_size + ord(df:read(1))*256^2 end
|
|
if cp_size == 0 then cp_size = 0x10000 end
|
|
if cp_off + cp_size > #base or cp_size > size then break end
|
|
-- get the data and append it to result
|
|
local data = base:sub(cp_off + 1, cp_off + cp_size)
|
|
insert(result, data)
|
|
size = size - cp_size
|
|
else -- insert new data
|
|
if cmd > size then break end
|
|
local data = df:read(cmd)
|
|
insert(result, data)
|
|
size = size - cmd
|
|
end
|
|
cmd = df:read(1)
|
|
end
|
|
|
|
df:close()
|
|
|
|
result = concat(result)
|
|
assert(#result == result_size, fmt('#result(%d) ~= result_size(%d)', #result, result_size))
|
|
return result, result_size, base_type
|
|
end
|
|
|
|
Pack = {}
|
|
Pack.__index = Pack
|
|
|
|
-- read an object from the current location in pack, or from a specific `offset`
|
|
-- if specified
|
|
function Pack:read_object(offset, ignore_data)
|
|
local f = self.pack_file
|
|
if offset then
|
|
f:seek('set', offset)
|
|
end
|
|
local curr_pos = f:seek()
|
|
|
|
local len, type = read_object_header(f)
|
|
if type < 5 then -- commit, tree, blob, tag
|
|
return unpack_object(f, len, type)
|
|
elseif type == 6 then -- ofs_delta
|
|
local offset = read_delta_header(f)
|
|
local delta_data = uncompress_by_len(f, len)
|
|
if not ignore_data then
|
|
-- the offset is negative from the current location
|
|
local base, base_len, base_type = self:read_object(curr_pos - offset)
|
|
return patch_object(base, delta_data, base_type)
|
|
end
|
|
elseif type == 7 then -- ref_delta
|
|
local sha = f:read(20)
|
|
local delta_data = uncompress_by_len(f, len)
|
|
if not ignore_data then
|
|
-- lookup the object in the pack by sha
|
|
-- FIXME: maybe lookup in repo/other packs
|
|
local base_offset = self.index[binary_sha(sha)]
|
|
local base, base_len, base_type = self:read_object(base_offset)
|
|
return patch_object(base, delta_data, base_type)
|
|
end
|
|
else
|
|
error('unknown object type: '..type)
|
|
end
|
|
end
|
|
|
|
-- returns true if this pack contains the given object
|
|
function Pack:has_object(sha)
|
|
return self.index[binary_sha(sha)] ~= nil
|
|
end
|
|
|
|
-- if the object name `sha` exists in the pack, returns a temporary file with the
|
|
-- object content, length and type, otherwise returns nil
|
|
function Pack:get_object(sha)
|
|
local offset = self.index[binary_sha(sha)]
|
|
if not offset then
|
|
print('!!! Failed to find object', readable_sha(sha))
|
|
end
|
|
|
|
local data, len, type = self:read_object(offset)
|
|
print(readable_sha(sha), len, type, data)
|
|
local f = tmpfile()
|
|
f:write(data)
|
|
f:seek('set', 0)
|
|
|
|
return f, len, types[type]
|
|
end
|
|
|
|
function Pack:unpack(repo)
|
|
for i=1, self.nobjects do
|
|
local offset = self.offsets[i]
|
|
local data, len, type = self:read_object(offset)
|
|
repo:store_object(data, len, types[type])
|
|
end
|
|
end
|
|
|
|
-- parses the index
|
|
function Pack:parse_index(index_file)
|
|
local f = index_file
|
|
|
|
local head = f:read(4)
|
|
assert(head == '\255tOc', "Incorrect header: " .. head)
|
|
local version = read_int(f)
|
|
assert(version == 2, "Incorrect version: " .. version)
|
|
|
|
-- first the fanout table (how many objects are in the index, whose
|
|
-- first byte is below or equal to i)
|
|
local fanout = {}
|
|
for i=0, 255 do
|
|
local nobjs = read_int(f)
|
|
fanout[i] = nobjs
|
|
end
|
|
|
|
-- the last element in fanout is the number of all objects in index
|
|
local count = fanout[255]
|
|
|
|
-- then come the sorted object names (=sha hash)
|
|
local tmp = {}
|
|
for i=1,count do
|
|
local sha = f:read(20)
|
|
tmp[i] = { sha = sha }
|
|
end
|
|
|
|
-- then the CRCs (assume ok, skip them)
|
|
for i=1, count do
|
|
local crc = f:read(4)
|
|
end
|
|
|
|
-- then come the offsets - read just the 32bit ones, does not handle packs > 2G
|
|
for i=1, count do
|
|
local offset = read_int(f)
|
|
tmp[i].offset = offset
|
|
end
|
|
|
|
-- construct the lookup table
|
|
local lookup = {}
|
|
for i=1, count do
|
|
lookup[tmp[i].sha] = tmp[i].offset
|
|
end
|
|
self.index = lookup
|
|
end
|
|
|
|
-- constructs the index/offsets if the index file is missing
|
|
function Pack:construct_index(path)
|
|
local index = {}
|
|
for i=1, self.nobjects do
|
|
local offset = self.offsets[i]
|
|
local data, len, type = self:read_object(offset)
|
|
local sha = object_sha(data, len, types[type])
|
|
index[binary_sha(sha)] = offset
|
|
end
|
|
self.index = index
|
|
end
|
|
|
|
function Pack:close()
|
|
self.pack_file:close()
|
|
end
|
|
|
|
function Pack.open(path)
|
|
local fp = assert(io.open(path, 'rb')) -- stays open
|
|
|
|
-- read the pack header
|
|
local head = fp:read(4)
|
|
assert(head == 'PACK', "Incorrect header: " .. head)
|
|
local version = read_int(fp)
|
|
assert(version == 2, "Incorrect version: " .. version)
|
|
local nobj = read_int(fp)
|
|
|
|
local pack = setmetatable({
|
|
offsets = {},
|
|
nobjects = nobj,
|
|
pack_file = fp,
|
|
}, Pack)
|
|
|
|
-- fill the offsets by traversing through the pack
|
|
for i=1,nobj do
|
|
pack.offsets[i] = fp:seek()
|
|
-- ignore the object data, we only need the offset in the pack
|
|
pack:read_object(nil, true)
|
|
end
|
|
|
|
-- read the index
|
|
local fi = io.open((path:gsub('%.pack$', '.idx')), 'rb')
|
|
if fi then
|
|
pack:parse_index(fi)
|
|
fi:close()
|
|
else
|
|
pack:construct_index(path)
|
|
end
|
|
|
|
return pack
|
|
end
|
|
|
|
return Pack |