/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ /* * This package is based on the work done by Keiron Liddle, Aftex Software * to whom the Ant project is very grateful for his * great code. */ using System; using System.IO; using Org.BouncyCastle.Utilities; using Org.BouncyCastle.Utilities.IO; namespace Org.BouncyCastle.Apache.Bzip2 { /** * An input stream that decompresses from the BZip2 format (with the file * header chars) to be read as any other stream. * * @author Keiron Liddle * * NB: note this class has been modified to read the leading BZ from the * start of the BZIP2 stream to make it compatible with other PGP programs. */ public class CBZip2InputStream : BaseInputStream { private static void Cadvise() { //System.out.Println("CRC Error"); //throw new CCoruptionError(); } // private static void BadBGLengths() { // Cadvise(); // } // // private static void BitStreamEOF() { // Cadvise(); // } private static void CompressedStreamEOF() { Cadvise(); } private void MakeMaps() { nInUse = 0; for (int i = 0; i < 256; i++) { if (inUse[i]) { seqToUnseq[nInUse] = (char)i; unseqToSeq[i] = (char)nInUse; nInUse++; } } } /* index of the last char in the block, so the block size == last + 1. */ private int last; /* index in zptr[] of original string after sorting. */ private int origPtr; /* always: in the range 0 .. 9. The current block size is 100000 * this number. */ private int blockSize100k; private bool blockRandomised; private int bsBuff; private int bsLive; private CRC mCrc = new CRC(); private bool[] inUse = new bool[256]; private int nInUse; private char[] seqToUnseq = new char[256]; private char[] unseqToSeq = new char[256]; private char[] selector = new char[BZip2Constants.MAX_SELECTORS]; private char[] selectorMtf = new char[BZip2Constants.MAX_SELECTORS]; private int[] tt; private char[] ll8; /* freq table collected to save a pass over the data during decompression. */ private int[] unzftab = new int[256]; private int[][] limit = InitIntArray(BZip2Constants.N_GROUPS, BZip2Constants.MAX_ALPHA_SIZE); private int[][] basev = InitIntArray(BZip2Constants.N_GROUPS, BZip2Constants.MAX_ALPHA_SIZE); private int[][] perm = InitIntArray(BZip2Constants.N_GROUPS, BZip2Constants.MAX_ALPHA_SIZE); private int[] minLens = new int[BZip2Constants.N_GROUPS]; private Stream bsStream; private bool streamEnd = false; private int currentChar = -1; private const int START_BLOCK_STATE = 1; private const int RAND_PART_A_STATE = 2; private const int RAND_PART_B_STATE = 3; private const int RAND_PART_C_STATE = 4; private const int NO_RAND_PART_A_STATE = 5; private const int NO_RAND_PART_B_STATE = 6; private const int NO_RAND_PART_C_STATE = 7; private int currentState = START_BLOCK_STATE; private int storedBlockCRC, storedCombinedCRC; private int computedBlockCRC, computedCombinedCRC; int i2, count, chPrev, ch2; int i, tPos; int rNToGo = 0; int rTPos = 0; int j2; char z; public CBZip2InputStream(Stream zStream) { ll8 = null; tt = null; BsSetStream(zStream); Initialize(); InitBlock(); SetupBlock(); } internal static int[][] InitIntArray(int n1, int n2) { int[][] a = new int[n1][]; for (int k = 0; k < n1; ++k) { a[k] = new int[n2]; } return a; } internal static byte[][] InitByteArray(int n1, int n2) { byte[][] a = new byte[n1][]; for (int k = 0; k < n1; ++k) { a[k] = new byte[n2]; } return a; } public override int Read(byte[] buffer, int offset, int count) { Streams.ValidateBufferArguments(buffer, offset, count); /* * TODO The base class implementation allows to return partial data if/when ReadByte throws. That would be * be preferable here too (so don't override), but it would require that exceptions cause this instance to * permanently fail, and that needs review. */ int pos = 0; while (pos < count) { int b = ReadByte(); if (b < 0) break; buffer[offset + pos++] = (byte)b; } return pos; } public override int ReadByte() { if (streamEnd) return -1; int retChar = currentChar; switch (currentState) { case START_BLOCK_STATE: break; case RAND_PART_A_STATE: break; case RAND_PART_B_STATE: SetupRandPartB(); break; case RAND_PART_C_STATE: SetupRandPartC(); break; case NO_RAND_PART_A_STATE: break; case NO_RAND_PART_B_STATE: SetupNoRandPartB(); break; case NO_RAND_PART_C_STATE: SetupNoRandPartC(); break; default: break; } return retChar; } private void Initialize() { char magic3, magic4; magic3 = BsGetUChar(); magic4 = BsGetUChar(); if (magic3 != 'B' && magic4 != 'Z') { throw new IOException("Not a BZIP2 marked stream"); } magic3 = BsGetUChar(); magic4 = BsGetUChar(); if (magic3 != 'h' || magic4 < '1' || magic4 > '9') { BsFinishedWithStream(); streamEnd = true; return; } SetDecompressStructureSizes(magic4 - '0'); computedCombinedCRC = 0; } private void InitBlock() { char magic1, magic2, magic3, magic4; char magic5, magic6; magic1 = BsGetUChar(); magic2 = BsGetUChar(); magic3 = BsGetUChar(); magic4 = BsGetUChar(); magic5 = BsGetUChar(); magic6 = BsGetUChar(); if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45 && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) { Complete(); return; } if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59 || magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59) { BadBlockHeader(); streamEnd = true; return; } storedBlockCRC = BsGetInt32(); blockRandomised = BsR(1) == 1; GetAndMoveToFrontDecode(); mCrc.InitialiseCRC(); currentState = START_BLOCK_STATE; } private void EndBlock() { computedBlockCRC = mCrc.GetFinalCRC(); /* A bad CRC is considered a fatal error. */ if (storedBlockCRC != computedBlockCRC) { CrcError(); } computedCombinedCRC = Integers.RotateLeft(computedCombinedCRC, 1) ^ computedBlockCRC; } private void Complete() { storedCombinedCRC = BsGetInt32(); if (storedCombinedCRC != computedCombinedCRC) { CrcError(); } BsFinishedWithStream(); streamEnd = true; } private static void BlockOverrun() { Cadvise(); } private static void BadBlockHeader() { Cadvise(); } private static void CrcError() { Cadvise(); } private void BsFinishedWithStream() { try { if (this.bsStream != null) { Platform.Dispose(this.bsStream); this.bsStream = null; } } catch { //ignore } } private void BsSetStream(Stream f) { bsStream = f; bsLive = 0; bsBuff = 0; } private int BsR(int n) { int v; while (bsLive < n) { int zzi; char thech = '\0'; try { thech = (char)bsStream.ReadByte(); } catch (IOException) { CompressedStreamEOF(); } if (thech == '\uffff') { CompressedStreamEOF(); } zzi = thech; bsBuff = (bsBuff << 8) | (zzi & 0xff); bsLive += 8; } v = (bsBuff >> (bsLive - n)) & ((1 << n) - 1); bsLive -= n; return v; } private char BsGetUChar() { return (char)BsR(8); } private int BsGetint() { //int u = 0; //u = (u << 8) | BsR(8); //u = (u << 8) | BsR(8); //u = (u << 8) | BsR(8); //u = (u << 8) | BsR(8); //return u; int u = BsR(16) << 16; return u | BsR(16); } private int BsGetIntVS(int numBits) { return BsR(numBits); } private int BsGetInt32() { return BsGetint(); } private void HbCreateDecodeTables(int[] limit, int[] basev, int[] perm, byte[] length, int minLen, int maxLen, int alphaSize) { int i, j, vec; int pp = 0; for (i = minLen; i <= maxLen; i++) { for (j = 0; j < alphaSize; j++) { if (length[j] == i) { perm[pp] = j; pp++; } } } for (i = 0; i < BZip2Constants.MAX_CODE_LEN; i++) { basev[i] = 0; } for (i = 0; i < alphaSize; i++) { basev[length[i] + 1]++; } for (i = 1; i < BZip2Constants.MAX_CODE_LEN; i++) { basev[i] += basev[i - 1]; } for (i = 0; i < BZip2Constants.MAX_CODE_LEN; i++) { limit[i] = 0; } vec = 0; for (i = minLen; i <= maxLen; i++) { vec += (basev[i + 1] - basev[i]); limit[i] = vec - 1; vec <<= 1; } for (i = minLen + 1; i <= maxLen; i++) { basev[i] = ((limit[i - 1] + 1) << 1) - basev[i]; } } private void RecvDecodingTables() { byte[][] len = InitByteArray(BZip2Constants.N_GROUPS, BZip2Constants.MAX_ALPHA_SIZE); int i, j, t, nGroups, nSelectors, alphaSize; int minLen, maxLen; bool[] inUse16 = new bool[16]; /* Receive the mapping table */ for (i = 0; i < 16; i++) { inUse16[i] = BsR(1) == 1; } for (i = 0; i < 16; i++) { int i16 = i * 16; if (inUse16[i]) { for (j = 0; j < 16; j++) { inUse[i16 + j] = BsR(1) == 1; } } else { for (j = 0; j < 16; j++) { inUse[i16 + j] = false; } } } MakeMaps(); alphaSize = nInUse + 2; /* Now the selectors */ nGroups = BsR(3); nSelectors = BsR(15); for (i = 0; i < nSelectors; i++) { j = 0; while (BsR(1) == 1) { j++; } selectorMtf[i] = (char)j; } /* Undo the MTF values for the selectors. */ { char[] pos = new char[BZip2Constants.N_GROUPS]; char tmp, v; for (v = '\0'; v < nGroups; v++) { pos[v] = v; } for (i = 0; i < nSelectors; i++) { v = selectorMtf[i]; tmp = pos[v]; while (v > 0) { pos[v] = pos[v - 1]; v--; } pos[0] = tmp; selector[i] = tmp; } } /* Now the coding tables */ for (t = 0; t < nGroups; t++) { byte[] len_t = len[t]; int curr = BsR(5); for (i = 0; i < alphaSize; i++) { while (BsR(1) == 1) { if (BsR(1) == 0) { curr++; } else { curr--; } } len_t[i] = (byte)curr; } } /* Create the Huffman decoding tables */ for (t = 0; t < nGroups; t++) { minLen = 32; maxLen = 0; byte[] len_t = len[t]; for (i = 0; i < alphaSize; i++) { int lti = len_t[i]; if (lti > maxLen) { maxLen = lti; } if (lti < minLen) { minLen = lti; } } HbCreateDecodeTables(limit[t], basev[t], perm[t], len_t, minLen, maxLen, alphaSize); minLens[t] = minLen; } } private void GetAndMoveToFrontDecode() { char[] yy = new char[256]; int i, j, nextSym, limitLast; int EOB, groupNo, groupPos; limitLast = BZip2Constants.baseBlockSize * blockSize100k; origPtr = BsGetIntVS(24); RecvDecodingTables(); EOB = nInUse + 1; groupNo = -1; groupPos = 0; /* Setting up the unzftab entries here is not strictly necessary, but it does save having to do it later in a separate pass, and so saves a block's worth of cache misses. */ for (i = 0; i <= 255; i++) { unzftab[i] = 0; } for (i = 0; i <= 255; i++) { yy[i] = (char)i; } last = -1; { int zt, zn, zvec, zj; if (groupPos == 0) { groupNo++; groupPos = BZip2Constants.G_SIZE; } groupPos--; zt = selector[groupNo]; zn = minLens[zt]; zvec = BsR(zn); while (zvec > limit[zt][zn]) { zn++; { { while (bsLive < 1) { int zzi; char thech = '\0'; try { thech = (char)bsStream.ReadByte(); } catch (IOException) { CompressedStreamEOF(); } if (thech == '\uffff') { CompressedStreamEOF(); } zzi = thech; bsBuff = (bsBuff << 8) | (zzi & 0xff); bsLive += 8; } } zj = (bsBuff >> (bsLive - 1)) & 1; bsLive--; } zvec = (zvec << 1) | zj; } nextSym = perm[zt][zvec - basev[zt][zn]]; } while (nextSym != EOB) { if (nextSym == BZip2Constants.RUNA || nextSym == BZip2Constants.RUNB) { char ch; int s = -1; int N = 1; do { if (nextSym == BZip2Constants.RUNA) { s += (0 + 1) * N; } else if (nextSym == BZip2Constants.RUNB) { s += (1 + 1) * N; } N = N * 2; { int zt, zn, zvec, zj; if (groupPos == 0) { groupNo++; groupPos = BZip2Constants.G_SIZE; } groupPos--; zt = selector[groupNo]; zn = minLens[zt]; zvec = BsR(zn); while (zvec > limit[zt][zn]) { zn++; { { while (bsLive < 1) { int zzi; char thech = '\0'; try { thech = (char)bsStream.ReadByte(); } catch (IOException) { CompressedStreamEOF(); } if (thech == '\uffff') { CompressedStreamEOF(); } zzi = thech; bsBuff = (bsBuff << 8) | (zzi & 0xff); bsLive += 8; } } zj = (bsBuff >> (bsLive - 1)) & 1; bsLive--; } zvec = (zvec << 1) | zj; } nextSym = perm[zt][zvec - basev[zt][zn]]; } } while (nextSym == BZip2Constants.RUNA || nextSym == BZip2Constants.RUNB); s++; ch = seqToUnseq[yy[0]]; unzftab[ch] += s; while (s > 0) { last++; ll8[last] = ch; s--; } if (last >= limitLast) { BlockOverrun(); } continue; } else { if (++last >= limitLast) { BlockOverrun(); } char tmp = yy[nextSym - 1]; unzftab[seqToUnseq[tmp]]++; ll8[last] = seqToUnseq[tmp]; /* * This loop is hammered during decompression, hence avoid * native method call overhead of Array.Copy for very * small ranges to copy. */ if (nextSym <= 16) { for (j = nextSym - 1; j > 0; --j) { yy[j] = yy[j - 1]; } } else { Array.Copy(yy, 0, yy, 1, nextSym - 1); } yy[0] = tmp; { int zt, zn, zvec, zj; if (groupPos == 0) { groupNo++; groupPos = BZip2Constants.G_SIZE; } groupPos--; zt = selector[groupNo]; zn = minLens[zt]; zvec = BsR(zn); while (zvec > limit[zt][zn]) { zn++; { { while (bsLive < 1) { int zzi; char thech = '\0'; try { thech = (char)bsStream.ReadByte(); } catch (IOException) { CompressedStreamEOF(); } zzi = thech; bsBuff = (bsBuff << 8) | (zzi & 0xff); bsLive += 8; } } zj = (bsBuff >> (bsLive - 1)) & 1; bsLive--; } zvec = (zvec << 1) | zj; } nextSym = perm[zt][zvec - basev[zt][zn]]; } continue; } } } private void SetupBlock() { int[] cftab = new int[257]; char ch; cftab[0] = 0; for (i = 1; i <= 256; i++) { cftab[i] = unzftab[i - 1]; } for (i = 1; i <= 256; i++) { cftab[i] += cftab[i - 1]; } for (i = 0; i <= last; i++) { ch = ll8[i]; tt[cftab[ch]] = i; cftab[ch]++; } cftab = null; tPos = tt[origPtr]; count = 0; i2 = 0; ch2 = 256; /* not a char and not EOF */ if (blockRandomised) { rNToGo = 0; rTPos = 0; SetupRandPartA(); } else { SetupNoRandPartA(); } } private void SetupRandPartA() { if (i2 <= last) { chPrev = ch2; ch2 = ll8[tPos]; tPos = tt[tPos]; if (rNToGo == 0) { rNToGo = BZip2Constants.rNums[rTPos]; rTPos++; if (rTPos == 512) { rTPos = 0; } } rNToGo--; ch2 ^= (int) ((rNToGo == 1) ? 1 : 0); i2++; currentChar = ch2; currentState = RAND_PART_B_STATE; mCrc.UpdateCRC(ch2); } else { EndBlock(); InitBlock(); SetupBlock(); } } private void SetupNoRandPartA() { if (i2 <= last) { chPrev = ch2; ch2 = ll8[tPos]; tPos = tt[tPos]; i2++; currentChar = ch2; currentState = NO_RAND_PART_B_STATE; mCrc.UpdateCRC(ch2); } else { EndBlock(); InitBlock(); SetupBlock(); } } private void SetupRandPartB() { if (ch2 != chPrev) { currentState = RAND_PART_A_STATE; count = 1; SetupRandPartA(); } else { count++; if (count >= 4) { z = ll8[tPos]; tPos = tt[tPos]; if (rNToGo == 0) { rNToGo = BZip2Constants.rNums[rTPos]; rTPos++; if (rTPos == 512) { rTPos = 0; } } rNToGo--; z ^= (char)((rNToGo == 1) ? 1 : 0); j2 = 0; currentState = RAND_PART_C_STATE; SetupRandPartC(); } else { currentState = RAND_PART_A_STATE; SetupRandPartA(); } } } private void SetupRandPartC() { if (j2 < (int) z) { currentChar = ch2; mCrc.UpdateCRC(ch2); j2++; } else { currentState = RAND_PART_A_STATE; i2++; count = 0; SetupRandPartA(); } } private void SetupNoRandPartB() { if (ch2 != chPrev) { currentState = NO_RAND_PART_A_STATE; count = 1; SetupNoRandPartA(); } else { count++; if (count >= 4) { z = ll8[tPos]; tPos = tt[tPos]; currentState = NO_RAND_PART_C_STATE; j2 = 0; SetupNoRandPartC(); } else { currentState = NO_RAND_PART_A_STATE; SetupNoRandPartA(); } } } private void SetupNoRandPartC() { if (j2 < (int) z) { currentChar = ch2; mCrc.UpdateCRC(ch2); j2++; } else { currentState = NO_RAND_PART_A_STATE; i2++; count = 0; SetupNoRandPartA(); } } private void SetDecompressStructureSizes(int newSize100k) { if (!(0 <= newSize100k && newSize100k <= 9 && 0 <= blockSize100k && blockSize100k <= 9)) { // throw new IOException("Invalid block size"); } blockSize100k = newSize100k; if (newSize100k == 0) { return; } int n = BZip2Constants.baseBlockSize * newSize100k; ll8 = new char[n]; tt = new int[n]; } } }