summary refs log tree commit diff
path: root/LibGit/GitPack.cs
diff options
context:
space:
mode:
Diffstat (limited to 'LibGit/GitPack.cs')
-rw-r--r--LibGit/GitPack.cs229
1 files changed, 114 insertions, 115 deletions
diff --git a/LibGit/GitPack.cs b/LibGit/GitPack.cs

index bbb53d1..9e574ee 100644 --- a/LibGit/GitPack.cs +++ b/LibGit/GitPack.cs
@@ -1,5 +1,7 @@ +using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.IO.Compression; +using System.Runtime.CompilerServices; using LibGit.Extensions; namespace LibGit; @@ -7,6 +9,7 @@ namespace LibGit; // https://shafiul.github.io//gitbook/7_the_packfile.html - easier to digest than the git documentation public class GitPack { + private const bool _log = false; public string PackId { get; set; } public GitRepo Repo { get; set; } @@ -15,14 +18,17 @@ public class GitPack public GitPackIndex Index { get; set; } public List<GitPackObject> Objects { get; set; } = new(); - public GitPack Read(Stream packStream, Stream idxStream) + public GitPack Read(Stream packStream, Stream? idxStream = null, GitPackIndex? index = null) { - Console.Write(" Header: "); - packStream.Peek(04).ToArray()[0..].HexDump(4); - Console.Write("Version: "); - packStream.Peek(08).ToArray()[4..].HexDump(4); - Console.Write(" ObjCnt: "); - packStream.Peek(12).ToArray()[8..].HexDump(4); + if (_log) + { + Console.Write(" Header: "); + packStream.Peek(04).ToArray()[0..].HexDump(4); + Console.Write("Version: "); + packStream.Peek(08).ToArray()[4..].HexDump(4); + Console.Write(" ObjCnt: "); + packStream.Peek(12).ToArray()[8..].HexDump(4); + } if (!packStream.StartsWith("PACK")) throw new Exception("Invalid pack file header"); @@ -31,25 +37,37 @@ public class GitPack Version = packStream.ReadInt32BE(); ObjectCount = packStream.ReadInt32BE(); - Console.WriteLine($"Got git v{Version} pack with {ObjectCount} objects"); - Console.WriteLine("Reading index..."); + if (_log) + { + Console.WriteLine($"Got git v{Version} pack with {ObjectCount} objects"); + Console.WriteLine("Reading index..."); + } + Index = new GitPackIndex().Read(idxStream); - // Console.WriteLine("Pack index entries:"); - // foreach (var entry in Index.Entries.OrderByDescending(x => x.Offset)) - // { - // Console.WriteLine($" - {entry.Sha.AsHexString().Replace(" ", "").ToLower()} @ {entry.Offset}"); - // } - - // Console.WriteLine(string.Join("\n - ", Index.Entries.OrderByDescending(x => x.Offset).Select(x => $"{x.Sha.AsHexString().Replace(" ", "").ToLower()} @ {x.Offset}"))); - Console.WriteLine("Reading pack objects..."); + if (_log) Console.WriteLine("Reading pack objects..."); var ordered = Index.Entries.OrderBy(x => x.Offset).ToArray(); + // prevent spamming the console + var sw = Stopwatch.StartNew(); + var tsw = Stopwatch.StartNew(); for (int i = 0; i < ObjectCount; i++) { - // Console.WriteLine("Reading object " + (i + 1) + " of " + ObjectCount); - Objects.Add(new GitPackObject().Read(packStream, ordered[i].Offset)); + var obj = ordered[i]; + if (sw.ElapsedMilliseconds >= 50 || i == ObjectCount - 1) + { + Console.Write($"\r[{tsw.Elapsed}] Reading object {i + 1}/{ObjectCount} ({ordered[i].Sha}) @ {obj.Offset}"); + sw.Restart(); + } + + Objects.Add(new GitPackObject().ReadHeader(packStream, obj.Offset)); } + Console.WriteLine(); + + foreach (var group in Objects.GroupBy(x => x.ObjType)) + { + Console.WriteLine($" - {group.Key}: {group.Count()} objects"); + } return this; } @@ -63,17 +81,30 @@ public class GitPack public class GitPackIndex { + private const bool _log = false; public int Version { get; set; } public int[] fanOutTable = new int[256]; - public List<IndexEntry> Entries { get; set; } = new List<IndexEntry>(); - public Byte[] PackSHA { get; set; } = null!; - public Byte[] IndexSHA { get; set; } = null!; + public List<IndexEntry> Entries { get; set; } = []; + public Sha1Value PackSHA { get; set; } + public Sha1Value IndexSHA { get; set; } + + [InlineArray(20)] + public struct Sha1Value + { + private byte _e0; + + public override string ToString() + { + ReadOnlySpan<byte> bytes = this; + return Convert.ToHexStringLower(bytes); + } + } public struct IndexEntry { - public byte[] Sha { get; set; } - public uint Crc32 { get; set; } - public ulong Offset { get; set; } + public Sha1Value Sha; + public uint Crc32; + public ulong Offset; } public GitPackIndex Read(Stream stream) @@ -83,91 +114,48 @@ public class GitPackIndex stream.Skip(4); Version = stream.ReadInt32BE(); - Console.WriteLine($"Got git v{Version} pack index"); + if (_log) Console.WriteLine($"Got git v{Version} pack index"); - //fan-out table - for (int i = 0; i < 255; i++) - { - fanOutTable[i] = stream.ReadInt32BE(); - } + fanOutTable = stream.MultiReadInt32BE(255); var size = stream.ReadInt32BE(); // aka "fanout[255]" - Console.WriteLine($"Index contains {size} objects"); - - // Console.WriteLine("Fan-out table:"); - // var tableWidth = 8; - // if (Console.WindowWidth >= 320) tableWidth = 12; - // else if (Console.WindowWidth >= 240) tableWidth = 10; - // else if (Console.WindowWidth >= 160) tableWidth = 8; - // else if (Console.WindowWidth >= 80) tableWidth = 4; - // Console.WriteLine($"TW: {tableWidth}, CW: {Console.WindowWidth}"); - // for (int i = 0; i < 256; i++) - // { - // Console.Write($"[{i:X2}] {fanOutTable[i]:X8} ({fanOutTable[i].ToString(),8}) "); - // if ((i + 1) % tableWidth == 0) - // Console.WriteLine(); - // } - - // Console.WriteLine($"\t\t END OF TABLE @ {stream.Position}"); - - for (int i = 0; i < size; i++) - { - // sha list - var sha = stream.ReadBytes(20).ToArray(); - // Console.WriteLine($"OBJ {i:X4}: {sha.AsHexString()}"); - Entries.Add(new IndexEntry - { - Sha = sha - }); - } + if (_log) Console.WriteLine($"Index contains {size} objects"); + Entries = new List<IndexEntry>(new IndexEntry[size]); - for (int i = 0; i < size; i++) - { - // crc32 list - var crc = stream.ReadUInt32BE(); - // Console.WriteLine($"CRC {i:X4}: {crc:X8}"); - Entries[i] = new IndexEntry - { - Sha = Entries[i].Sha, - Crc32 = crc - }; - } + if (_log) Console.Write("Reading SHA1..."); + var sha1Values = stream.MultiReadInlineArray<Sha1Value>(size); + + if (_log) Console.Write(" CRC..."); + var crcValues = stream.MultiReadUInt32BE(size); + + if (_log) Console.Write(" OFS..."); + var raw32 = stream.MultiReadUInt32BE(size); // uint[] + var raw64 = stream.MultiReadUInt64BE((int)((stream.Remaining() - 40) / sizeof(ulong))); - for (int i = 0; i < size; i++) + if (_log) Console.WriteLine(" Hashes..."); + var h = new Sha1Value(); + stream.ReadBlock(20).CopyTo(h); + PackSHA = h; + if (_log) Console.WriteLine($"Pack SHA: {PackSHA}"); + + h = new Sha1Value(); + stream.ReadBlock(20).CopyTo(h); + IndexSHA = h; + if (_log) Console.WriteLine($"Index SHA: {IndexSHA}"); + + if (_log) Console.WriteLine("Constructing entries..."); + for (var i = 0; i < size; i++) { - // offset list - var offset = stream.ReadInt32BE(); - // Console.WriteLine($"OFF {i:X4}: {offset}"); Entries[i] = new IndexEntry { - Sha = Entries[i].Sha, - Crc32 = Entries[i].Crc32, - Offset = (uint)offset + Sha = sha1Values[i], + Crc32 = crcValues[i], + Offset = (raw32[i] & 0x80000000) == 0 + ? raw32[i] + : raw64[raw32[i] & 0x7FFFFFFF] }; } - // for (int i = 0; i < size; i++) - // { - // Console.WriteLine($"ENTRY {i:X4}: {Entries[i].Sha.AsHexString()} | CRC32: {Entries[i].Crc32:X8} | OFF: {Entries[i].Offset}"); - // } - - if (stream.Remaining() > 20) - for (int i = 0; i < size; i++) - { - var entry = Entries[i]; - if ((entry.Offset & 0x80000000) == 0) continue; - - var largeOffset = stream.ReadUInt64BE(); - Console.WriteLine($"LARGE OFF {i:X4}: {largeOffset} (idx: {i})"); - Entries[i] = entry with { Offset = largeOffset }; - // Thread.Sleep(10); - } - - PackSHA = stream.ReadBytes(20).ToArray(); - Console.WriteLine($"Pack SHA: {PackSHA.AsHexString()}"); - IndexSHA = stream.ReadBytes(20).ToArray(); - Console.WriteLine($"Index SHA: {IndexSHA.AsHexString()}"); - return this; } } @@ -176,24 +164,25 @@ public class GitPackObject { private const bool _debug = false; - public GitPackObject Read(Stream stream, ulong offset) + public GitPackObject ReadHeader(Stream stream, ulong offset) { + Offset = offset; stream.Seek((long)offset, SeekOrigin.Begin); if (_debug) Console.WriteLine($"Reading pack object at offset {offset}, stream position {stream.Position}"); var headerPos = stream.Position; - var data = stream.ReadBytes(1).First(); + var data = stream.ReadByte(); if (_debug) Console.WriteLine($"data: {data:X8} ({data}/{data:b8})"); - + //format: 1 bit continue, 3 bits type, 4 bits size (A), continued by up to 3 more bytes of size (B, C and D), A is least significant ObjType = (GitObjectType)((data >> 4) & 0b0000_0111); var sizeBits = data & 0b0000_1111; // Lower 4 bits are the initial size var restOfSize = (data & 0b1000_0000) != 0 ? stream.ReadVLQ() : 0; UncompressedSize = (restOfSize << 4) | sizeBits; - + // handle delta objects if (ObjType == GitObjectType.RefDelta) { - RefDeltaBaseObjectId = stream.ReadBytes(20).ToArray(); + RefDeltaBaseObjectId = stream.ReadBlock(20); if (_debug) Console.WriteLine($"Ref delta base object id: {RefDeltaBaseObjectId.AsHexString()}"); } else if (ObjType == GitObjectType.OffsDelta) @@ -201,20 +190,27 @@ public class GitPackObject OffsDeltaBaseOffset = stream.ReadGitPackOffsetModifiedVLQ(); if (_debug) Console.WriteLine($"Offset delta base offset: {OffsDeltaBaseOffset}"); } - - var dataPos = stream.Position; - if (_debug) Console.WriteLine($"pack objType: {ObjType} ({(int)ObjType}), uncompressed size: {UncompressedSize}, position: HDR={headerPos}, DATA={dataPos}"); - // stream.Peek(Size).Take(16).ToArray().HexDump(16); - // stream.Skip(Size); + + // var dataPos = stream.Position; + // if (_debug) Console.WriteLine($"pack objType: {ObjType} ({(int)ObjType}), uncompressed size: {UncompressedSize}, position: HDR={headerPos}, DATA={dataPos}"); + + DataOffset = (ulong) stream.Position; + + return this; + } + + public GitPackObject Read(Stream stream, ulong offset) + { + ReadHeader(stream, offset); try { using var zlibStream = new ZLibStream(stream, CompressionMode.Decompress, true); - var decompressedData = new byte[UncompressedSize]; + var decompressedData = new Span<byte>(new byte[UncompressedSize]); int totalRead = 0; while (totalRead < UncompressedSize) { - int bytesRead = zlibStream.Read(decompressedData, totalRead, UncompressedSize - totalRead); + int bytesRead = zlibStream.Read(decompressedData); //, totalRead, UncompressedSize - totalRead); if (bytesRead == 0) throw new Exception("Unexpected end of zlib stream"); totalRead += bytesRead; @@ -223,25 +219,28 @@ public class GitPackObject catch (Exception ex) { Console.WriteLine($"Error during zlib decompression: {ex.Message}"); - stream.Seek(headerPos - 5, SeekOrigin.Begin); + stream.Seek((long)Offset, SeekOrigin.Begin); stream.Peek(32).HexDump(); throw; } - var endPos = stream.Position; - if (_debug) Console.WriteLine($"Decompressed data ({UncompressedSize} bytes/{endPos - dataPos} compressed, stream @ {endPos}):"); + // var endPos = stream.Position; + // if (_debug) Console.WriteLine($"Decompressed data ({UncompressedSize} bytes/{endPos - dataPos} compressed, stream @ {endPos}):"); // if (_debug) decompressedData.ToArray().HexDump(32); // Environment.Exit(1); return this; } + public ulong Offset { get; set; } + public ulong DataOffset { get; set; } + public GitObjectType ObjType { get; set; } public int UncompressedSize { get; set; } public byte[]? RefDeltaBaseObjectId { get; set; } - + public int? OffsDeltaBaseOffset { get; set; } }