summary refs log tree commit diff
path: root/LibGit
diff options
context:
space:
mode:
authorRory& <root@rory.gay>2025-12-11 18:54:56 +0100
committerRory& <root@rory.gay>2025-12-11 18:54:56 +0100
commit6e2f48e24b0c003b4c8547ff88fc1a7f890a1c1e (patch)
tree7aac58a3a1045bde51e989cb0a1ab1c26c28e68f /LibGit
parentnet10 (diff)
downloadGitTools-6e2f48e24b0c003b4c8547ff88fc1a7f890a1c1e.tar.xz
read indexes and packs correctly
Diffstat (limited to 'LibGit')
-rw-r--r--LibGit/Extensions/IEnumerableExtensions.cs2
-rw-r--r--LibGit/Extensions/StreamExtensions.cs80
-rw-r--r--LibGit/GitPack.cs242
-rw-r--r--LibGit/GitRepo.cs13
-rw-r--r--LibGit/Interfaces/IRepoSource.cs2
5 files changed, 263 insertions, 76 deletions
diff --git a/LibGit/Extensions/IEnumerableExtensions.cs b/LibGit/Extensions/IEnumerableExtensions.cs

index d8fc54d..b325538 100644 --- a/LibGit/Extensions/IEnumerableExtensions.cs +++ b/LibGit/Extensions/IEnumerableExtensions.cs
@@ -23,6 +23,8 @@ public static class IEnumerableExtensions .Replace('\n', '.') .Replace('\r', '.') .Replace('\0', '.') + .Replace('\t', '.') + .Replace('\b', '.') ); } } diff --git a/LibGit/Extensions/StreamExtensions.cs b/LibGit/Extensions/StreamExtensions.cs
index 6555783..41e4b4f 100644 --- a/LibGit/Extensions/StreamExtensions.cs +++ b/LibGit/Extensions/StreamExtensions.cs
@@ -38,13 +38,14 @@ public static class StreamExtensions int peek = stream.ReadByte(); if (peek == -1) { - if(_debug) Console.WriteLine($"Can't peek {count} bytes, only {i} bytes remaining"); + if (_debug) Console.WriteLine($"Can't peek {count} bytes, only {i} bytes remaining"); stream.Seek(-i, SeekOrigin.Current); yield break; } yield return (byte)peek; } + stream.Seek(-i, SeekOrigin.Current); } @@ -76,7 +77,7 @@ public static class StreamExtensions Console.WriteLine($"Expected: {sequence.AsHexString()} ({sequence.AsString()})"); Console.WriteLine($"Actual: {stream.Peek(sequence.Count()).AsHexString()} ({stream.Peek(sequence.Count()).AsString()})"); } - + int readCount = 0; foreach (int b in sequence) { @@ -87,7 +88,7 @@ public static class StreamExtensions stream.Seek(-readCount, SeekOrigin.Current); return false; } - + if (read != b) { if (_debug) @@ -101,8 +102,9 @@ public static class StreamExtensions return false; } } + stream.Seek(-readCount, SeekOrigin.Current); - + return true; } @@ -119,8 +121,12 @@ public static class StreamExtensions return stream; } - public static IEnumerable<byte> ReadNullTerminatedField(this Stream stream, IEnumerable<byte>? binaryPrefix = null, string? asciiPrefix = null) => ReadTerminatedField(stream: stream, terminator: 0x00, binaryPrefix: binaryPrefix, asciiPrefix: asciiPrefix); - public static IEnumerable<byte> ReadSpaceTerminatedField(this Stream stream, IEnumerable<byte>? binaryPrefix = null, string? asciiPrefix = null) => ReadTerminatedField(stream: stream, terminator: 0x20, binaryPrefix: binaryPrefix, asciiPrefix: asciiPrefix); + public static IEnumerable<byte> ReadNullTerminatedField(this Stream stream, IEnumerable<byte>? binaryPrefix = null, string? asciiPrefix = null) => + ReadTerminatedField(stream: stream, terminator: 0x00, binaryPrefix: binaryPrefix, asciiPrefix: asciiPrefix); + + public static IEnumerable<byte> ReadSpaceTerminatedField(this Stream stream, IEnumerable<byte>? binaryPrefix = null, string? asciiPrefix = null) => + ReadTerminatedField(stream: stream, terminator: 0x20, binaryPrefix: binaryPrefix, asciiPrefix: asciiPrefix); + public static IEnumerable<byte> ReadTerminatedField(this Stream stream, byte terminator, IEnumerable<byte>? binaryPrefix = null, string? asciiPrefix = null) { if (!stream.CanRead) @@ -131,7 +137,8 @@ public static class StreamExtensions else stream.Skip(binaryPrefix.Count()); else if (asciiPrefix != null) if (!stream.StartsWith(asciiPrefix)) - throw new InvalidDataException($"Text prefix {stream.Peek(asciiPrefix.Length).AsHexString()} ({stream.Peek(asciiPrefix.Length).AsString()}) does not match expected value of {asciiPrefix.AsBytes().AsHexString()} ({asciiPrefix})!"); + throw new InvalidDataException( + $"Text prefix {stream.Peek(asciiPrefix.Length).AsHexString()} ({stream.Peek(asciiPrefix.Length).AsString()}) does not match expected value of {asciiPrefix.AsBytes().AsHexString()} ({asciiPrefix})!"); else stream.Skip(asciiPrefix.Length); var read = 0; @@ -150,7 +157,7 @@ public static class StreamExtensions if (stream.Peek() == terminator) stream.Skip(); } - + public static IEnumerable<byte> ReadToEnd(this Stream stream) { if (!stream.CanRead) @@ -159,21 +166,49 @@ public static class StreamExtensions while (stream.Peek() != -1) yield return (byte)stream.ReadByte(); } - + public static int ReadInt32BE(this Stream stream) { if (!stream.CanRead) throw new InvalidOperationException("Can't read a non-readable stream"); var bytes = stream.ReadBytes(4).ToArray(); - + if (BitConverter.IsLittleEndian) Array.Reverse(bytes); - - Console.WriteLine("ReadInt32BE: " + bytes.AsHexString() + " => " + BitConverter.ToInt32(bytes)); + + // Console.WriteLine("ReadInt32BE: " + bytes.AsHexString() + " => " + BitConverter.ToInt32(bytes)); return BitConverter.ToInt32(bytes); } - + + public static uint ReadUInt32BE(this Stream stream) + { + if (!stream.CanRead) + throw new InvalidOperationException("Can't read a non-readable stream"); + + var bytes = stream.ReadBytes(4).ToArray(); + + if (BitConverter.IsLittleEndian) + Array.Reverse(bytes); + + // Console.WriteLine("ReadUInt32BE: " + bytes.AsHexString() + " => " + BitConverter.ToUInt32(bytes)); + return BitConverter.ToUInt32(bytes); + } + + public static ulong ReadUInt64BE(this Stream stream) + { + if (!stream.CanRead) + throw new InvalidOperationException("Can't read a non-readable stream"); + + var bytes = stream.ReadBytes(8).ToArray(); + + if (BitConverter.IsLittleEndian) + Array.Reverse(bytes); + + // Console.WriteLine("ReadUInt64BE: " + bytes.AsHexString() + " => " + BitConverter.ToUInt64(bytes)); + return BitConverter.ToUInt64(bytes); + } + //read variable length number public static int ReadVLQ(this Stream stream) { @@ -192,6 +227,7 @@ public static class StreamExtensions return result; } + public static int ReadVLQBigEndian(this Stream stream) { if (!stream.CanRead) @@ -208,4 +244,22 @@ public static class StreamExtensions return result; } + + // for some reason this is special... + public static int ReadGitPackOffsetModifiedVLQ(this Stream stream) + { + if (!stream.CanRead) + throw new InvalidOperationException("Can't read a non-readable stream"); + + int b = stream.ReadByte(); + int result = b & 0x7F; + + while ((b & 0x80) != 0) + { + b = stream.ReadByte(); + result = ((result + 1) << 7) | (b & 0x7F); + } + + return result; + } } \ No newline at end of file diff --git a/LibGit/GitPack.cs b/LibGit/GitPack.cs
index 58c6edc..bbb53d1 100644 --- a/LibGit/GitPack.cs +++ b/LibGit/GitPack.cs
@@ -1,40 +1,59 @@ +using System.Diagnostics.CodeAnalysis; +using System.IO.Compression; using LibGit.Extensions; namespace LibGit; +// https://shafiul.github.io//gitbook/7_the_packfile.html - easier to digest than the git documentation public class GitPack { public string PackId { get; set; } public GitRepo Repo { get; set; } - + public int Version { get; set; } public int ObjectCount { get; set; } - public List<GitPackObject> Objects { get; set; } = new List<GitPackObject>(); - - public GitPack Read(Stream stream) + public GitPackIndex Index { get; set; } + public List<GitPackObject> Objects { get; set; } = new(); + + public GitPack Read(Stream packStream, Stream idxStream) { - stream.Peek(12).HexDump(16); - - Console.Write(" Header: "); stream.Peek(04).ToArray()[0..].HexDump(4); - Console.Write("Version: "); stream.Peek(08).ToArray()[4..].HexDump(4); - Console.Write(" ObjCnt: "); stream.Peek(12).ToArray()[8..].HexDump(4); - - if(!stream.StartsWith("PACK")) + Console.Write(" Header: "); + packStream.Peek(04).ToArray()[0..].HexDump(4); + Console.Write("Version: "); + packStream.Peek(08).ToArray()[4..].HexDump(4); + Console.Write(" ObjCnt: "); + packStream.Peek(12).ToArray()[8..].HexDump(4); + + if (!packStream.StartsWith("PACK")) throw new Exception("Invalid pack file header"); - stream.Skip(4); - - Version = stream.ReadInt32BE(); - ObjectCount = stream.ReadInt32BE(); + packStream.Skip(4); + + Version = packStream.ReadInt32BE(); + ObjectCount = packStream.ReadInt32BE(); Console.WriteLine($"Got git v{Version} pack with {ObjectCount} objects"); + Console.WriteLine("Reading index..."); + Index = new GitPackIndex().Read(idxStream); + // Console.WriteLine("Pack index entries:"); + // foreach (var entry in Index.Entries.OrderByDescending(x => x.Offset)) + // { + // Console.WriteLine($" - {entry.Sha.AsHexString().Replace(" ", "").ToLower()} @ {entry.Offset}"); + // } + + // Console.WriteLine(string.Join("\n - ", Index.Entries.OrderByDescending(x => x.Offset).Select(x => $"{x.Sha.AsHexString().Replace(" ", "").ToLower()} @ {x.Offset}"))); + Console.WriteLine("Reading pack objects..."); + + var ordered = Index.Entries.OrderBy(x => x.Offset).ToArray(); for (int i = 0; i < ObjectCount; i++) { - Objects.Add(new GitPackObject().Read(stream)); + // Console.WriteLine("Reading object " + (i + 1) + " of " + ObjectCount); + Objects.Add(new GitPackObject().Read(packStream, ordered[i].Offset)); } + return this; } - + public GitPack(string packId, GitRepo repo) { PackId = packId; @@ -46,81 +65,184 @@ public class GitPackIndex { public int Version { get; set; } public int[] fanOutTable = new int[256]; + public List<IndexEntry> Entries { get; set; } = new List<IndexEntry>(); + public Byte[] PackSHA { get; set; } = null!; + public Byte[] IndexSHA { get; set; } = null!; + + public struct IndexEntry + { + public byte[] Sha { get; set; } + public uint Crc32 { get; set; } + public ulong Offset { get; set; } + } + public GitPackIndex Read(Stream stream) { - if(!stream.StartsWith(new byte[]{0xff,0x74,0x4f,0x63})) + if (!stream.StartsWith([0xff, 0x74, 0x4f, 0x63])) throw new Exception("Invalid pack index file header or pack is v1"); stream.Skip(4); Version = stream.ReadInt32BE(); Console.WriteLine($"Got git v{Version} pack index"); - + //fan-out table - for (int i = 0; i < 256; i++) + for (int i = 0; i < 255; i++) { fanOutTable[i] = stream.ReadInt32BE(); } - - - + var size = stream.ReadInt32BE(); // aka "fanout[255]" + Console.WriteLine($"Index contains {size} objects"); + + // Console.WriteLine("Fan-out table:"); + // var tableWidth = 8; + // if (Console.WindowWidth >= 320) tableWidth = 12; + // else if (Console.WindowWidth >= 240) tableWidth = 10; + // else if (Console.WindowWidth >= 160) tableWidth = 8; + // else if (Console.WindowWidth >= 80) tableWidth = 4; + // Console.WriteLine($"TW: {tableWidth}, CW: {Console.WindowWidth}"); + // for (int i = 0; i < 256; i++) + // { + // Console.Write($"[{i:X2}] {fanOutTable[i]:X8} ({fanOutTable[i].ToString(),8}) "); + // if ((i + 1) % tableWidth == 0) + // Console.WriteLine(); + // } + + // Console.WriteLine($"\t\t END OF TABLE @ {stream.Position}"); + + for (int i = 0; i < size; i++) + { + // sha list + var sha = stream.ReadBytes(20).ToArray(); + // Console.WriteLine($"OBJ {i:X4}: {sha.AsHexString()}"); + Entries.Add(new IndexEntry + { + Sha = sha + }); + } + + for (int i = 0; i < size; i++) + { + // crc32 list + var crc = stream.ReadUInt32BE(); + // Console.WriteLine($"CRC {i:X4}: {crc:X8}"); + Entries[i] = new IndexEntry + { + Sha = Entries[i].Sha, + Crc32 = crc + }; + } + + for (int i = 0; i < size; i++) + { + // offset list + var offset = stream.ReadInt32BE(); + // Console.WriteLine($"OFF {i:X4}: {offset}"); + Entries[i] = new IndexEntry + { + Sha = Entries[i].Sha, + Crc32 = Entries[i].Crc32, + Offset = (uint)offset + }; + } + + // for (int i = 0; i < size; i++) + // { + // Console.WriteLine($"ENTRY {i:X4}: {Entries[i].Sha.AsHexString()} | CRC32: {Entries[i].Crc32:X8} | OFF: {Entries[i].Offset}"); + // } + + if (stream.Remaining() > 20) + for (int i = 0; i < size; i++) + { + var entry = Entries[i]; + if ((entry.Offset & 0x80000000) == 0) continue; + + var largeOffset = stream.ReadUInt64BE(); + Console.WriteLine($"LARGE OFF {i:X4}: {largeOffset} (idx: {i})"); + Entries[i] = entry with { Offset = largeOffset }; + // Thread.Sleep(10); + } + + PackSHA = stream.ReadBytes(20).ToArray(); + Console.WriteLine($"Pack SHA: {PackSHA.AsHexString()}"); + IndexSHA = stream.ReadBytes(20).ToArray(); + Console.WriteLine($"Index SHA: {IndexSHA.AsHexString()}"); + return this; } } - public class GitPackObject { - private const bool _debug = true; - public GitPackObject Read(Stream stream) + private const bool _debug = false; + + public GitPackObject Read(Stream stream, ulong offset) { - stream.Peek(64).HexDump(32); - var header = stream.ReadBytes(4).ToArray(); - ObjType = (GitObjectType)((header[0] & 0b0111_0000) >> 4); - if(ObjType == 0 || (int)ObjType == 5 || (int)ObjType > 7) - throw new Exception($"Invalid object type: {(int)ObjType}"); - Size = header[0] & 0b0000_1111; + stream.Seek((long)offset, SeekOrigin.Begin); + if (_debug) Console.WriteLine($"Reading pack object at offset {offset}, stream position {stream.Position}"); + var headerPos = stream.Position; + var data = stream.ReadBytes(1).First(); + if (_debug) Console.WriteLine($"data: {data:X8} ({data}/{data:b8})"); + + //format: 1 bit continue, 3 bits type, 4 bits size (A), continued by up to 3 more bytes of size (B, C and D), A is least significant + ObjType = (GitObjectType)((data >> 4) & 0b0000_0111); + var sizeBits = data & 0b0000_1111; // Lower 4 bits are the initial size + var restOfSize = (data & 0b1000_0000) != 0 ? stream.ReadVLQ() : 0; + UncompressedSize = (restOfSize << 4) | sizeBits; - Offset = 0; - for (int i = 1; i < 4; i++) + // handle delta objects + if (ObjType == GitObjectType.RefDelta) { - Offset <<= 8; - Offset |= header[i]; + RefDeltaBaseObjectId = stream.ReadBytes(20).ToArray(); + if (_debug) Console.WriteLine($"Ref delta base object id: {RefDeltaBaseObjectId.AsHexString()}"); + } + else if (ObjType == GitObjectType.OffsDelta) + { + OffsDeltaBaseOffset = stream.ReadGitPackOffsetModifiedVLQ(); + if (_debug) Console.WriteLine($"Offset delta base offset: {OffsDeltaBaseOffset}"); } - if ((Size & 0b0000_1000) != 0) + var dataPos = stream.Position; + if (_debug) Console.WriteLine($"pack objType: {ObjType} ({(int)ObjType}), uncompressed size: {UncompressedSize}, position: HDR={headerPos}, DATA={dataPos}"); + // stream.Peek(Size).Take(16).ToArray().HexDump(16); + // stream.Skip(Size); + + try { - Size <<= 4; - Size |= stream.ReadVLQ(); + using var zlibStream = new ZLibStream(stream, CompressionMode.Decompress, true); + var decompressedData = new byte[UncompressedSize]; + int totalRead = 0; + while (totalRead < UncompressedSize) + { + int bytesRead = zlibStream.Read(decompressedData, totalRead, UncompressedSize - totalRead); + if (bytesRead == 0) + throw new Exception("Unexpected end of zlib stream"); + totalRead += bytesRead; + } + } + catch (Exception ex) + { + Console.WriteLine($"Error during zlib decompression: {ex.Message}"); + stream.Seek(headerPos - 5, SeekOrigin.Begin); + stream.Peek(32).HexDump(); + throw; } - // ObjType = Type switch - // { - // 1 => GitObjectType.Commit, - // 2 => GitObjectType.Tree, - // 3 => GitObjectType.Blob, - // 4 => GitObjectType.Tag, - // 5 => GitObjectType.Invalid, - // 6 => GitObjectType.OffsDelta, - // 7 => GitObjectType.RefDelta, - // _ => throw new Exception($"Invalid object type {Type}") - // }; - - if(_debug) Console.WriteLine($"pack obj type: {ObjType} ({(int)ObjType}), size: {Size}, offset: {Offset}, sizeBytes: {SizeBytes}"); - Console.WriteLine("Data: "); - stream.Peek(Size).Take(16).ToArray().HexDump(16); - stream.ReadBytes(Size).ZlibDecompress().Take(16).HexDump(16); - + var endPos = stream.Position; + if (_debug) Console.WriteLine($"Decompressed data ({UncompressedSize} bytes/{endPos - dataPos} compressed, stream @ {endPos}):"); + // if (_debug) decompressedData.ToArray().HexDump(32); + + // Environment.Exit(1); return this; } public GitObjectType ObjType { get; set; } - public int SizeBytes { get; set; } - - public int Size { get; set; } + public int UncompressedSize { get; set; } - public int Offset { get; set; } + public byte[]? RefDeltaBaseObjectId { get; set; } + + public int? OffsDeltaBaseOffset { get; set; } } public enum GitObjectType diff --git a/LibGit/GitRepo.cs b/LibGit/GitRepo.cs
index a58bc38..045df6c 100644 --- a/LibGit/GitRepo.cs +++ b/LibGit/GitRepo.cs
@@ -54,6 +54,12 @@ public class GitRepo public async IAsyncEnumerable<GitPack> GetPacks() { + if (!await RepoSource.FileExists("objects/info/packs")) + { + Console.WriteLine("No pack index found."); + yield break; + } + var fs = await RepoSource.GetFileStream("objects/info/packs"); Console.WriteLine("Found packs file:"); fs.Peek(32).HexDump(32); @@ -62,17 +68,20 @@ public class GitRepo Console.WriteLine("WARNING: No packs found!"); yield break; } + while (fs.Remaining() > 0 && fs.Peek() != 0x0A) { //example: P pack-24bd1c46d657f74f40629503d8e5083a9ad36a67.pack var line = fs.ReadTerminatedField((byte)'\n').AsString(); if (line.StartsWith("P ")) { - new GitPackIndex().Read(await RepoSource.GetFileStream($"objects/pack/{line[2..].Replace(".pack", ".idx")}")); + Console.WriteLine($"Reading pack: {RepoSource.BasePath}/objects/pack/{line[2..]}"); + var packStream = await RepoSource.GetFileStream($"objects/pack/{line[2..]}"); + var idxStream = await RepoSource.GetFileStream($"objects/pack/{line[2..].Replace(".pack", ".idx")}"); yield return new GitPack( packId: line[2..], repo: this - ).Read(await RepoSource.GetFileStream($"objects/pack/{line[2..]}")); + ).Read(packStream, idxStream); } else { diff --git a/LibGit/Interfaces/IRepoSource.cs b/LibGit/Interfaces/IRepoSource.cs
index e276f60..2a29feb 100644 --- a/LibGit/Interfaces/IRepoSource.cs +++ b/LibGit/Interfaces/IRepoSource.cs
@@ -3,7 +3,7 @@ namespace LibGit.Interfaces; public interface IRepoSource { public string BasePath { get; set; } + public Task<bool> FileExists(string path); public Task<Stream> GetFileStream(string path); - public Task<Stream> GetObjectStreamById(string objectId) => GetFileStream(Path.Join("objects", objectId[..2], objectId[2..])); } \ No newline at end of file