decompress(); */ class MicroBzip2Hack { const BLOCK_HEADER_HI = 0x00003141; const BLOCK_HEADER_LO = 0x59265359; const BLOCK_ENDMARK_HI = 0x00001772; const BLOCK_ENDMARK_LO = 0x45385090; static $BITMASK = array( 0, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F, 0xFF); var $filename = NULL; var $fh = NULL; var $is_eof = true; var $header = ''; var $blocksize = 0; // For reading bits from a file. var $bytes = ''; var $bytes_offset = 0; var $byte = 0; var $bit = 0; var $bits_left = 0; // For keeping blocks in memory. var $data = ''; var $buff_hi = 0; var $buff_lo = 0; var $buffered_bits = 0; function __construct($filename) { $this->filename = $filename; $this->fh = fopen($filename, 'r'); $this->is_eof = false; $this->blocksize = $this->read_header(); } function __wakeup() { if(!$this->is_eof) { $this->fh = fopen($this->filename, 'r'); fseek($this->fh, $this->offset); } } function is_eof() { return $this->is_eof; } function close() { fclose($this->fh); } function read_header() { $this->header = fread($this->fh, 4); if (substr($this->header, 0, 3) != 'BZh') { throw new MBZ2Exception('Bad magic number'); } $blocksize = intval($this->header[3]); if($blocksize < 1 || $blocksize > 9) { throw new MBZ2Exception("Invalid block size"); } return $blocksize; } function decompress_block() { $bits_read = $this->buffered_bits; $end = 0; while (true) { $bit = $this->bits(1); $bits_read += 1; if($bit == 2) { break; } $this->buffer_bit($bit); if($bits_read == 96) { $block_CRC = (($this->buff_hi << 16) & 0xffff0000) | (($this->buff_lo >> 16) & 0xffff); } if((($this->buff_hi & 0xffff) == self::BLOCK_HEADER_HI && $this->buff_lo == self::BLOCK_HEADER_LO) || (($this->buff_hi & 0xffff) == self::BLOCK_ENDMARK_HI && $this->buff_lo == self::BLOCK_ENDMARK_LO)) { if($bits_read > 49) { $end = $bits_read - 49; } if($end >= 130) { break; } } } if($end > 0) { // TODO: need to do something with beginning of next block, which // is already there. Also need to clear out anything left in the // buffer. $old_hi = $this->buff_hi; $old_lo = $this->buff_lo; $this->buff_hi = 0x1772; $this->buff_lo = 0x45385090; $this->buffer_int($block_CRC); $this->buffer_char($old_hi >> 16); $this->buffer_char($old_hi & 0xffff); $this->buffer_int($old_lo); $this->clear_data_buffer(); $data = $this->header . $this->data; file_put_contents('block_at_'.time().'.txt', $data); $this->data = ''; return bzdecompress($data); } } function buffer_bit($bit) { $this->buff_hi = ($this->buff_hi << 1) | ($this->buff_lo >> 31); $this->buff_lo = (($this->buff_lo << 1) | ($bit & 0x1)) & 0xffffffff; $this->buffered_bits += 1; if($this->buffered_bits == 56) { $this->data .= pack('C', ($this->buff_hi >> 16) & 0xff); $this->buffered_bits = 48; } } function buffer_char($c) { for($i = 7; $i >= 0; $i--) { $this->buffer_bit(($c >> $i) & 0x1); } } function buffer_int($c) { for($i = 31; $i >= 0; $i--) { $this->buffer_bit(($c >> $i) & 0x1); } } function clear_data_buffer() { $bits_left = 56 - $this->buffered_bits; $byte = ($this->buff_hi >> (16 + $bits_left)) & 0xff; $this->data .= pack('C', $byte); $this->buff_hi &= 0xffff; } function get_bit() { if ($this->bits_left == 0) { if (feof($this->fh)) { return 2; } $bytes = fread($this->fh, 8192); $bytes_read = strlen($bytes); $this->bytes = substr($this->bytes, $this->bytes_offset) . $bytes; $this->bytes_offset = 0; $this->byte = ord($this->bytes[$this->bytes_offset]); $this->offset += $bytes_read; $this->bits_left += $bytes_read * 8; } if($this->bit > 0) { $this->bit -= 1; } else { $this->bytes_offset += 1; $this->byte = ord($this->bytes[$this->bytes_offset]); $this->bit = 7; } $this->bits_left -= 1; return ($this->byte >> $this->bit) & 0x1; } function bits($bits_to_read) { if ($bits_to_read > $this->bits_left) { if (feof($this->fh)) { return 2; } $bytes = fread($this->fh, 8192); $bytes_read = strlen($bytes); $this->bytes = substr($this->bytes, $this->byte) . $bytes; $this->byte = 0; $this->offset += $bytes_read; $this->bits_left += $bytes_read * 8; } $result = 0; $n = $bits_to_read; while($n > 0) { $left = 8 - $this->bit; if($n >= $left) { $result <<= $left; $result |= (self::$BITMASK[$left] & ord($this->bytes[$this->byte++])); $this->bit = 0; $n -= $left; } else { $result <<= $n; $result |= ((ord($this->bytes[$this->byte]) & (self::$BITMASK[$n] << (8 - $n - $this->bit))) >> (8 - $n - $this->bit)); $this->bit += $n; $n = 0; $n = 0; } } $this->bits_left -= $bits_to_read; return $result; } } class MicroBzip2 { static $BITMASK = array( 0, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F, 0xFF); var $filename = ''; var $fh = NULL; var $is_eof = true; var $offset = 0; var $bytes = ''; var $byte = 0; var $bits_left = 0; var $bit = 0; var $blocksize = 0; const MAX_HUFCODE_BITS = 20; const MAX_SYMBOLS = 258; const SYMBOL_RUNA = 0; const SYMBOL_RUNB = 1; const GROUP_SIZE = 50; function __construct($filename) { $this->filename = $filename; $this->fh = fopen($filename, 'r'); $this->is_eof = false; $this->blocksize = $this->read_header(); } function __wakeup() { if(!$this->is_eof) { $this->fh = fopen($this->filename, 'r'); fseek($this->fh, $this->offset); } } function is_eof() { return $this->is_eof; } function close() { fclose($this->fh); } function decompress() { $all = ''; $chunk = ''; do { $all .= $chunk; $chunk = $this->decompress_block(); } while($chunk !== false); return $all; } function read_header() { if($this->bits(8*3) != 4348520) { throw new MBZ2Exception('Bad magic number'); } $blocksize = $this->bits(8) - 48; if($blocksize < 1 || $blocksize > 9) { throw new MBZ2Exception("Invalid block size"); } return $blocksize; } function decompress_block($max_length=0) { $bufsize = 100000 * $this->blocksize; $header = ''; for($i = 0; $i < 6; $i++) { $header .= sprintf('%X', $this->bits(8)); } if($header == "177245385090") { $this->is_eof = true; return false; // Last block } if($header != "314159265359") { throw new MBZ2Exception('Invalid block header'); } $this->bits(32); // Ignore CRC codes if($this->bits(1)) { throw MBZ2Exception('Obsolete version'); } $origPtr = $this->bits(24); if($origPtr > $bufsize) { throw new MBZ2Exception('Initial position larger than buffer size'); } $t = $this->bits(16); $symToByte = new SplFixedArray(256); $symTotal = 0; for($i = 0; $i < 16; $i++) { if($t & (1 << (15 - $i))) { $k = $this->bits(16); for($j = 0; $j < 16; $j++) { if($k & (1 << (15 - $j))) { $symToByte[$symTotal++] = (16 * $i) + $j; } } } } $groupCount = $this->bits(3); if($groupCount < 2 || $groupCount > 6) { throw new MBZ2Exception(); } $nSelectors = $this->bits(15); if($nSelectors == 0) { throw new MBZ2Exception(); } $mtfSymbol = array(); for($i = 0; $i < $groupCount; $i++) { $mtfSymbol[$i] = $i; } $selectors = new SplFixedArray(32768); for($i = 0; $i < $nSelectors; $i++) { for($j = 0; $this->bits(1); $j++) { if($j >= $groupCount) { throw new MBZ2Exception(); } } $uc = $mtfSymbol[$j]; array_splice($mtfSymbol, $j, 1); array_unshift($mtfSymbol, $uc); $selectors[$i] = $uc; } $symCount = $symTotal + 2; $groups = array(); for($j = 0; $j < $groupCount; $j++) { $length = new SplFixedArray(self::MAX_SYMBOLS); $temp = new SplFixedArray(self::MAX_HUFCODE_BITS + 1); $t = $this->bits(5); // lengths for($i = 0; $i < $symCount; $i++) { while(true) { if ($t < 1 || $t > self::MAX_HUFCODE_BITS) { throw new MBZ2Exception(); } if(!$this->bits(1)) { break; } if(!$this->bits(1)) { $t++; } else { $t--; } } $length[$i] = $t; } $minLen = $length[0]; $maxLen = $length[0]; for($i = 1; $i < $symCount; $i++) { if($length[$i] > $maxLen) { $maxLen = $length[$i]; } else if($length[$i] < $minLen) { $minLen = $length[$i]; } } $groups[$j] = array(); $hufGroup =& $groups[$j]; $hufGroup['permute'] = new SplFixedArray(self::MAX_SYMBOLS); $hufGroup['limit'] = new SplFixedArray(self::MAX_HUFCODE_BITS + 1); $hufGroup['base'] = new SplFixedArray(self::MAX_HUFCODE_BITS + 1); $hufGroup['minLen'] = $minLen; $hufGroup['maxLen'] = $maxLen; $pp = 0; for($i = $minLen; $i <= $maxLen; $i++) { for($t = 0; $t < $symCount; $t++) { if($length[$t] == $i) { $hufGroup['permute'][$pp++] = $t; } } } for($i = $minLen; $i <= $maxLen; $i++) { $temp[$i] = $hufGroup['limit'][$i + 1] = 0; } for($i = 0; $i < $symCount; $i++) { $temp[$length[$i]] += 1; } $pp = $t = 0; for($i = $minLen; $i < $maxLen; $i++) { $pp += $temp[$i]; $hufGroup['limit'][$i + 1] = $pp - 1; $pp <<= 1; $hufGroup['base'][$i + 2] = $pp - ($t += $temp[$i]); } $hufGroup['limit'][$maxLen + 1] = $pp + $temp[$maxLen] - 1; $hufGroup['base'][$minLen + 1] = 0; } $byteCount = new SplFixedArray(256); for($i = 0; $i < 256; $i++) { $mtfSymbol[$i] = $i; } $runPos = $count = $symCount = $selector = 0; $buf = new SplFixedArray($bufsize); while(true) { if(!($symCount--)) { $symCount = self::GROUP_SIZE - 1; if ($selector >= $nSelectors) { throw new MBZ2Exception(); } $hufGroup =& $groups[$selectors[$selector++]]; } $i = $hufGroup['minLen']; $j = $this->bits($i); while(true) { if($i > $hufGroup['maxLen']) { throw new MBZ2Exception(); } if($j <= $hufGroup['limit'][$i + 1]) { break; } $i++; $j = ($j << 1) | $this->bits(1); } $j -= $hufGroup['base'][$i + 1]; if($j < 0 || $j >= self::MAX_SYMBOLS) { throw new MBZ2Exception(); } $nextSym = $hufGroup['permute'][$j]; if($nextSym == self::SYMBOL_RUNA || $nextSym == self::SYMBOL_RUNB) { if(!$runPos) { $runPos = 1; $t = 0; } if($nextSym == self::SYMBOL_RUNA) { $t += $runPos; } else { $t += 2 * $runPos; } $runPos <<= 1; continue; } if($runPos) { $runPos = 0; if($count + $t >= $bufsize) { throw new MBZ2Exception(); } $uc = $symToByte[$mtfSymbol[0]]; $byteCount[$uc] = $byteCount[$uc] + $t; while($t--) { $buf[$count++] = $uc; } } if($nextSym > $symTotal) { break; } if($count >= $bufsize) { throw new MBZ2Exception(); } $i = $nextSym - 1; $uc = $mtfSymbol[$i]; array_splice($mtfSymbol, $i, 1); array_unshift($mtfSymbol, $uc); $uc = $symToByte[$uc]; $byteCount[$uc] = $byteCount[$uc] + 1; $buf[$count++] = $uc; } if($origPtr < 0 || $origPtr >= $count) { throw new MBZ2Exception(); } $j = 0; for($i = 0; $i < 256; $i++) { $k = $j + $byteCount[$i]; $byteCount[$i] = $j; $j = $k; } for($i = 0; $i < $count; $i++) { $uc = $buf[$i] & 0xff; $buf[$byteCount[$uc]] |= ($i << 8); $byteCount[$uc] += 1; } $pos = $current = $run = 0; if($count) { $pos = $buf[$origPtr]; $current = ($pos & 0xff); $pos >>= 8; $run = -1; } $output = ''; $copies = $previous = $outbyte = 0; while($count) { $count--; $previous = $current; $pos = $buf[$pos]; $current = $pos & 0xff; $pos >>= 8; if($run++ == 3) { $copies = $current; $outbyte = $previous; $current = -1; } else { $copies = 1; $outbyte = $current; } while($copies--) { $output .= chr($outbyte); if ($max_length > -1 && --$max_length == 0) { return $output; } } if($current != $previous) { $run = 0; } } return $output; } function bits($bits_to_read) { if ($bits_to_read > $this->bits_left) { if (feof($this->fh)) { throw new MBZ2Exception('Out of bits'); } $bytes = fread($this->fh, 8192); $bytes_read = strlen($bytes); $this->bytes = substr($this->bytes, $this->byte) . $bytes; $this->byte = 0; $this->offset += $bytes_read; $this->bits_left += $bytes_read * 8; } $result = 0; $n = $bits_to_read; while($n > 0) { $left = 8 - $this->bit; if($n >= $left) { $result <<= $left; $result |= (self::$BITMASK[$left] & ord($this->bytes[$this->byte++])); $this->bit = 0; $n -= $left; } else { $result <<= $n; $result |= ((ord($this->bytes[$this->byte]) & (self::$BITMASK[$n] << (8 - $n - $this->bit))) >> (8 - $n - $this->bit)); $this->bit += $n; $n = 0; $n = 0; } } $this->bits_left -= $bits_to_read; return $result; } } class MBZ2Exception extends Exception {} ?>