summary refs log tree commit diff
path: root/LibGit/GitPack.cs
diff options
context:
space:
mode:
Diffstat (limited to 'LibGit/GitPack.cs')
-rw-r--r--LibGit/GitPack.cs242
1 files changed, 182 insertions, 60 deletions
diff --git a/LibGit/GitPack.cs b/LibGit/GitPack.cs

index 58c6edc..bbb53d1 100644 --- a/LibGit/GitPack.cs +++ b/LibGit/GitPack.cs
@@ -1,40 +1,59 @@ +using System.Diagnostics.CodeAnalysis; +using System.IO.Compression; using LibGit.Extensions; namespace LibGit; +// https://shafiul.github.io//gitbook/7_the_packfile.html - easier to digest than the git documentation public class GitPack { public string PackId { get; set; } public GitRepo Repo { get; set; } - + public int Version { get; set; } public int ObjectCount { get; set; } - public List<GitPackObject> Objects { get; set; } = new List<GitPackObject>(); - - public GitPack Read(Stream stream) + public GitPackIndex Index { get; set; } + public List<GitPackObject> Objects { get; set; } = new(); + + public GitPack Read(Stream packStream, Stream idxStream) { - stream.Peek(12).HexDump(16); - - Console.Write(" Header: "); stream.Peek(04).ToArray()[0..].HexDump(4); - Console.Write("Version: "); stream.Peek(08).ToArray()[4..].HexDump(4); - Console.Write(" ObjCnt: "); stream.Peek(12).ToArray()[8..].HexDump(4); - - if(!stream.StartsWith("PACK")) + Console.Write(" Header: "); + packStream.Peek(04).ToArray()[0..].HexDump(4); + Console.Write("Version: "); + packStream.Peek(08).ToArray()[4..].HexDump(4); + Console.Write(" ObjCnt: "); + packStream.Peek(12).ToArray()[8..].HexDump(4); + + if (!packStream.StartsWith("PACK")) throw new Exception("Invalid pack file header"); - stream.Skip(4); - - Version = stream.ReadInt32BE(); - ObjectCount = stream.ReadInt32BE(); + packStream.Skip(4); + + Version = packStream.ReadInt32BE(); + ObjectCount = packStream.ReadInt32BE(); Console.WriteLine($"Got git v{Version} pack with {ObjectCount} objects"); + Console.WriteLine("Reading index..."); + Index = new GitPackIndex().Read(idxStream); + // Console.WriteLine("Pack index entries:"); + // foreach (var entry in Index.Entries.OrderByDescending(x => x.Offset)) + // { + // Console.WriteLine($" - {entry.Sha.AsHexString().Replace(" ", "").ToLower()} @ {entry.Offset}"); + // } + + // Console.WriteLine(string.Join("\n - ", Index.Entries.OrderByDescending(x => x.Offset).Select(x => $"{x.Sha.AsHexString().Replace(" ", "").ToLower()} @ {x.Offset}"))); + Console.WriteLine("Reading pack objects..."); + + var ordered = Index.Entries.OrderBy(x => x.Offset).ToArray(); for (int i = 0; i < ObjectCount; i++) { - Objects.Add(new GitPackObject().Read(stream)); + // Console.WriteLine("Reading object " + (i + 1) + " of " + ObjectCount); + Objects.Add(new GitPackObject().Read(packStream, ordered[i].Offset)); } + return this; } - + public GitPack(string packId, GitRepo repo) { PackId = packId; @@ -46,81 +65,184 @@ public class GitPackIndex { public int Version { get; set; } public int[] fanOutTable = new int[256]; + public List<IndexEntry> Entries { get; set; } = new List<IndexEntry>(); + public Byte[] PackSHA { get; set; } = null!; + public Byte[] IndexSHA { get; set; } = null!; + + public struct IndexEntry + { + public byte[] Sha { get; set; } + public uint Crc32 { get; set; } + public ulong Offset { get; set; } + } + public GitPackIndex Read(Stream stream) { - if(!stream.StartsWith(new byte[]{0xff,0x74,0x4f,0x63})) + if (!stream.StartsWith([0xff, 0x74, 0x4f, 0x63])) throw new Exception("Invalid pack index file header or pack is v1"); stream.Skip(4); Version = stream.ReadInt32BE(); Console.WriteLine($"Got git v{Version} pack index"); - + //fan-out table - for (int i = 0; i < 256; i++) + for (int i = 0; i < 255; i++) { fanOutTable[i] = stream.ReadInt32BE(); } - - - + var size = stream.ReadInt32BE(); // aka "fanout[255]" + Console.WriteLine($"Index contains {size} objects"); + + // Console.WriteLine("Fan-out table:"); + // var tableWidth = 8; + // if (Console.WindowWidth >= 320) tableWidth = 12; + // else if (Console.WindowWidth >= 240) tableWidth = 10; + // else if (Console.WindowWidth >= 160) tableWidth = 8; + // else if (Console.WindowWidth >= 80) tableWidth = 4; + // Console.WriteLine($"TW: {tableWidth}, CW: {Console.WindowWidth}"); + // for (int i = 0; i < 256; i++) + // { + // Console.Write($"[{i:X2}] {fanOutTable[i]:X8} ({fanOutTable[i].ToString(),8}) "); + // if ((i + 1) % tableWidth == 0) + // Console.WriteLine(); + // } + + // Console.WriteLine($"\t\t END OF TABLE @ {stream.Position}"); + + for (int i = 0; i < size; i++) + { + // sha list + var sha = stream.ReadBytes(20).ToArray(); + // Console.WriteLine($"OBJ {i:X4}: {sha.AsHexString()}"); + Entries.Add(new IndexEntry + { + Sha = sha + }); + } + + for (int i = 0; i < size; i++) + { + // crc32 list + var crc = stream.ReadUInt32BE(); + // Console.WriteLine($"CRC {i:X4}: {crc:X8}"); + Entries[i] = new IndexEntry + { + Sha = Entries[i].Sha, + Crc32 = crc + }; + } + + for (int i = 0; i < size; i++) + { + // offset list + var offset = stream.ReadInt32BE(); + // Console.WriteLine($"OFF {i:X4}: {offset}"); + Entries[i] = new IndexEntry + { + Sha = Entries[i].Sha, + Crc32 = Entries[i].Crc32, + Offset = (uint)offset + }; + } + + // for (int i = 0; i < size; i++) + // { + // Console.WriteLine($"ENTRY {i:X4}: {Entries[i].Sha.AsHexString()} | CRC32: {Entries[i].Crc32:X8} | OFF: {Entries[i].Offset}"); + // } + + if (stream.Remaining() > 20) + for (int i = 0; i < size; i++) + { + var entry = Entries[i]; + if ((entry.Offset & 0x80000000) == 0) continue; + + var largeOffset = stream.ReadUInt64BE(); + Console.WriteLine($"LARGE OFF {i:X4}: {largeOffset} (idx: {i})"); + Entries[i] = entry with { Offset = largeOffset }; + // Thread.Sleep(10); + } + + PackSHA = stream.ReadBytes(20).ToArray(); + Console.WriteLine($"Pack SHA: {PackSHA.AsHexString()}"); + IndexSHA = stream.ReadBytes(20).ToArray(); + Console.WriteLine($"Index SHA: {IndexSHA.AsHexString()}"); + return this; } } - public class GitPackObject { - private const bool _debug = true; - public GitPackObject Read(Stream stream) + private const bool _debug = false; + + public GitPackObject Read(Stream stream, ulong offset) { - stream.Peek(64).HexDump(32); - var header = stream.ReadBytes(4).ToArray(); - ObjType = (GitObjectType)((header[0] & 0b0111_0000) >> 4); - if(ObjType == 0 || (int)ObjType == 5 || (int)ObjType > 7) - throw new Exception($"Invalid object type: {(int)ObjType}"); - Size = header[0] & 0b0000_1111; + stream.Seek((long)offset, SeekOrigin.Begin); + if (_debug) Console.WriteLine($"Reading pack object at offset {offset}, stream position {stream.Position}"); + var headerPos = stream.Position; + var data = stream.ReadBytes(1).First(); + if (_debug) Console.WriteLine($"data: {data:X8} ({data}/{data:b8})"); + + //format: 1 bit continue, 3 bits type, 4 bits size (A), continued by up to 3 more bytes of size (B, C and D), A is least significant + ObjType = (GitObjectType)((data >> 4) & 0b0000_0111); + var sizeBits = data & 0b0000_1111; // Lower 4 bits are the initial size + var restOfSize = (data & 0b1000_0000) != 0 ? stream.ReadVLQ() : 0; + UncompressedSize = (restOfSize << 4) | sizeBits; - Offset = 0; - for (int i = 1; i < 4; i++) + // handle delta objects + if (ObjType == GitObjectType.RefDelta) { - Offset <<= 8; - Offset |= header[i]; + RefDeltaBaseObjectId = stream.ReadBytes(20).ToArray(); + if (_debug) Console.WriteLine($"Ref delta base object id: {RefDeltaBaseObjectId.AsHexString()}"); + } + else if (ObjType == GitObjectType.OffsDelta) + { + OffsDeltaBaseOffset = stream.ReadGitPackOffsetModifiedVLQ(); + if (_debug) Console.WriteLine($"Offset delta base offset: {OffsDeltaBaseOffset}"); } - if ((Size & 0b0000_1000) != 0) + var dataPos = stream.Position; + if (_debug) Console.WriteLine($"pack objType: {ObjType} ({(int)ObjType}), uncompressed size: {UncompressedSize}, position: HDR={headerPos}, DATA={dataPos}"); + // stream.Peek(Size).Take(16).ToArray().HexDump(16); + // stream.Skip(Size); + + try { - Size <<= 4; - Size |= stream.ReadVLQ(); + using var zlibStream = new ZLibStream(stream, CompressionMode.Decompress, true); + var decompressedData = new byte[UncompressedSize]; + int totalRead = 0; + while (totalRead < UncompressedSize) + { + int bytesRead = zlibStream.Read(decompressedData, totalRead, UncompressedSize - totalRead); + if (bytesRead == 0) + throw new Exception("Unexpected end of zlib stream"); + totalRead += bytesRead; + } + } + catch (Exception ex) + { + Console.WriteLine($"Error during zlib decompression: {ex.Message}"); + stream.Seek(headerPos - 5, SeekOrigin.Begin); + stream.Peek(32).HexDump(); + throw; } - // ObjType = Type switch - // { - // 1 => GitObjectType.Commit, - // 2 => GitObjectType.Tree, - // 3 => GitObjectType.Blob, - // 4 => GitObjectType.Tag, - // 5 => GitObjectType.Invalid, - // 6 => GitObjectType.OffsDelta, - // 7 => GitObjectType.RefDelta, - // _ => throw new Exception($"Invalid object type {Type}") - // }; - - if(_debug) Console.WriteLine($"pack obj type: {ObjType} ({(int)ObjType}), size: {Size}, offset: {Offset}, sizeBytes: {SizeBytes}"); - Console.WriteLine("Data: "); - stream.Peek(Size).Take(16).ToArray().HexDump(16); - stream.ReadBytes(Size).ZlibDecompress().Take(16).HexDump(16); - + var endPos = stream.Position; + if (_debug) Console.WriteLine($"Decompressed data ({UncompressedSize} bytes/{endPos - dataPos} compressed, stream @ {endPos}):"); + // if (_debug) decompressedData.ToArray().HexDump(32); + + // Environment.Exit(1); return this; } public GitObjectType ObjType { get; set; } - public int SizeBytes { get; set; } - - public int Size { get; set; } + public int UncompressedSize { get; set; } - public int Offset { get; set; } + public byte[]? RefDeltaBaseObjectId { get; set; } + + public int? OffsDeltaBaseOffset { get; set; } } public enum GitObjectType