001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.bzip2;
020
021import java.io.IOException;
022import java.io.OutputStream;
023
024import org.apache.commons.compress.compressors.CompressorOutputStream;
025
026/**
027 * An output stream that compresses into the BZip2 format into another stream.
028 *
029 * <p>
030 * The compression requires large amounts of memory. Thus you should call the
031 * {@link #close() close()} method as soon as possible, to force
032 * {@code BZip2CompressorOutputStream} to release the allocated memory.
033 * </p>
034 *
035 * <p> You can shrink the amount of allocated memory and maybe raise
036 * the compression speed by choosing a lower blocksize, which in turn
037 * may cause a lower compression ratio. You can avoid unnecessary
038 * memory allocation by avoiding using a blocksize which is bigger
039 * than the size of the input.  </p>
040 *
041 * <p> You can compute the memory usage for compressing by the
042 * following formula: </p>
043 *
044 * <pre>
045 * &lt;code&gt;400k + (9 * blocksize)&lt;/code&gt;.
046 * </pre>
047 *
048 * <p> To get the memory required for decompression by {@link
049 * BZip2CompressorInputStream} use </p>
050 *
051 * <pre>
052 * &lt;code&gt;65k + (5 * blocksize)&lt;/code&gt;.
053 * </pre>
054 *
055 * <table style="width:100%" border="1">
056 * <caption>Memory usage by blocksize</caption>
057 * <tr>
058 * <th colspan="3">Memory usage by blocksize</th>
059 * </tr>
060 * <tr>
061 * <th style="text-align: right">Blocksize</th> <th style="text-align: right">Compression<br>
062 * memory usage</th> <th style="text-align: right">Decompression<br>
063 * memory usage</th>
064 * </tr>
065 * <tr>
066 * <td style="text-align: right">100k</td>
067 * <td style="text-align: right">1300k</td>
068 * <td style="text-align: right">565k</td>
069 * </tr>
070 * <tr>
071 * <td style="text-align: right">200k</td>
072 * <td style="text-align: right">2200k</td>
073 * <td style="text-align: right">1065k</td>
074 * </tr>
075 * <tr>
076 * <td style="text-align: right">300k</td>
077 * <td style="text-align: right">3100k</td>
078 * <td style="text-align: right">1565k</td>
079 * </tr>
080 * <tr>
081 * <td style="text-align: right">400k</td>
082 * <td style="text-align: right">4000k</td>
083 * <td style="text-align: right">2065k</td>
084 * </tr>
085 * <tr>
086 * <td style="text-align: right">500k</td>
087 * <td style="text-align: right">4900k</td>
088 * <td style="text-align: right">2565k</td>
089 * </tr>
090 * <tr>
091 * <td style="text-align: right">600k</td>
092 * <td style="text-align: right">5800k</td>
093 * <td style="text-align: right">3065k</td>
094 * </tr>
095 * <tr>
096 * <td style="text-align: right">700k</td>
097 * <td style="text-align: right">6700k</td>
098 * <td style="text-align: right">3565k</td>
099 * </tr>
100 * <tr>
101 * <td style="text-align: right">800k</td>
102 * <td style="text-align: right">7600k</td>
103 * <td style="text-align: right">4065k</td>
104 * </tr>
105 * <tr>
106 * <td style="text-align: right">900k</td>
107 * <td style="text-align: right">8500k</td>
108 * <td style="text-align: right">4565k</td>
109 * </tr>
110 * </table>
111 *
112 * <p>
113 * For decompression {@code BZip2CompressorInputStream} allocates less memory if the
114 * bzipped input is smaller than one block.
115 * </p>
116 *
117 * <p>
118 * Instances of this class are not threadsafe.
119 * </p>
120 *
121 * <p>
122 * TODO: Update to BZip2 1.0.1
123 * </p>
124 * @NotThreadSafe
125 */
126public class BZip2CompressorOutputStream extends CompressorOutputStream
127    implements BZip2Constants {
128
129    /**
130     * The minimum supported blocksize {@code  == 1}.
131     */
132    public static final int MIN_BLOCKSIZE = 1;
133
134    /**
135     * The maximum supported blocksize {@code  == 9}.
136     */
137    public static final int MAX_BLOCKSIZE = 9;
138
139    private static final int GREATER_ICOST = 15;
140    private static final int LESSER_ICOST = 0;
141
142    private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
143                                          final Data dat, final int alphaSize,
144                                          final int maxLen) {
145        /*
146         * Nodes and heap entries run from 1. Entry 0 for both the heap and
147         * nodes is a sentinel.
148         */
149        final int[] heap = dat.heap;
150        final int[] weight = dat.weight;
151        final int[] parent = dat.parent;
152
153        for (int i = alphaSize; --i >= 0;) {
154            weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
155        }
156
157        for (boolean tooLong = true; tooLong;) {
158            tooLong = false;
159
160            int nNodes = alphaSize;
161            int nHeap = 0;
162            heap[0] = 0;
163            weight[0] = 0;
164            parent[0] = -2;
165
166            for (int i = 1; i <= alphaSize; i++) {
167                parent[i] = -1;
168                nHeap++;
169                heap[nHeap] = i;
170
171                int zz = nHeap;
172                final int tmp = heap[zz];
173                while (weight[tmp] < weight[heap[zz >> 1]]) {
174                    heap[zz] = heap[zz >> 1];
175                    zz >>= 1;
176                }
177                heap[zz] = tmp;
178            }
179
180            while (nHeap > 1) {
181                final int n1 = heap[1];
182                heap[1] = heap[nHeap];
183                nHeap--;
184
185                int yy = 0;
186                int zz = 1;
187                int tmp = heap[1];
188
189                while (true) {
190                    yy = zz << 1;
191
192                    if (yy > nHeap) {
193                        break;
194                    }
195
196                    if ((yy < nHeap)
197                        && (weight[heap[yy + 1]] < weight[heap[yy]])) {
198                        yy++;
199                    }
200
201                    if (weight[tmp] < weight[heap[yy]]) {
202                        break;
203                    }
204
205                    heap[zz] = heap[yy];
206                    zz = yy;
207                }
208
209                heap[zz] = tmp;
210
211                final int n2 = heap[1];
212                heap[1] = heap[nHeap];
213                nHeap--;
214
215                yy = 0;
216                zz = 1;
217                tmp = heap[1];
218
219                while (true) {
220                    yy = zz << 1;
221
222                    if (yy > nHeap) {
223                        break;
224                    }
225
226                    if ((yy < nHeap)
227                        && (weight[heap[yy + 1]] < weight[heap[yy]])) {
228                        yy++;
229                    }
230
231                    if (weight[tmp] < weight[heap[yy]]) {
232                        break;
233                    }
234
235                    heap[zz] = heap[yy];
236                    zz = yy;
237                }
238
239                heap[zz] = tmp;
240                nNodes++;
241                parent[n1] = parent[n2] = nNodes;
242
243                final int weight_n1 = weight[n1];
244                final int weight_n2 = weight[n2];
245                weight[nNodes] = ((weight_n1 & 0xffffff00)
246                                  + (weight_n2 & 0xffffff00))
247                    | (1 + (((weight_n1 & 0x000000ff)
248                             > (weight_n2 & 0x000000ff))
249                            ? (weight_n1 & 0x000000ff)
250                            : (weight_n2 & 0x000000ff)));
251
252                parent[nNodes] = -1;
253                nHeap++;
254                heap[nHeap] = nNodes;
255
256                tmp = 0;
257                zz = nHeap;
258                tmp = heap[zz];
259                final int weight_tmp = weight[tmp];
260                while (weight_tmp < weight[heap[zz >> 1]]) {
261                    heap[zz] = heap[zz >> 1];
262                    zz >>= 1;
263                }
264                heap[zz] = tmp;
265
266            }
267
268            for (int i = 1; i <= alphaSize; i++) {
269                int j = 0;
270                int k = i;
271
272                for (int parent_k; (parent_k = parent[k]) >= 0;) {
273                    k = parent_k;
274                    j++;
275                }
276
277                len[i - 1] = (byte) j;
278                if (j > maxLen) {
279                    tooLong = true;
280                }
281            }
282
283            if (tooLong) {
284                for (int i = 1; i < alphaSize; i++) {
285                    int j = weight[i] >> 8;
286                    j = 1 + (j >> 1);
287                    weight[i] = j << 8;
288                }
289            }
290        }
291    }
292
293    /**
294     * Index of the last char in the block, so the block size == last + 1.
295     */
296    private int last;
297
298    /**
299     * Always: in the range 0 .. 9. The current block size is 100000 * this
300     * number.
301     */
302    private final int blockSize100k;
303
304    private int bsBuff;
305    private int bsLive;
306    private final CRC crc = new CRC();
307
308    private int nInUse;
309
310    private int nMTF;
311
312    private int currentChar = -1;
313    private int runLength;
314
315    private int blockCRC;
316    private int combinedCRC;
317    private final int allowableBlockSize;
318
319    /**
320     * All memory intensive stuff.
321     */
322    private Data data;
323    private BlockSort blockSorter;
324
325    private OutputStream out;
326    private volatile boolean closed;
327
328    /**
329     * Chooses a blocksize based on the given length of the data to compress.
330     *
331     * @return The blocksize, between {@link #MIN_BLOCKSIZE} and
332     *         {@link #MAX_BLOCKSIZE} both inclusive. For a negative
333     *         {@code inputLength} this method returns {@code MAX_BLOCKSIZE}
334     *         always.
335     *
336     * @param inputLength
337     *            The length of the data which will be compressed by
338     *            {@code BZip2CompressorOutputStream}.
339     */
340    public static int chooseBlockSize(final long inputLength) {
341        return (inputLength > 0) ? (int) Math
342            .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE;
343    }
344
345    /**
346     * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k.
347     *
348     * @param out
349     *            the destination stream.
350     *
351     * @throws IOException
352     *             if an I/O error occurs in the specified stream.
353     * @throws NullPointerException
354     *             if <code>out == null</code>.
355     */
356    public BZip2CompressorOutputStream(final OutputStream out)
357        throws IOException {
358        this(out, MAX_BLOCKSIZE);
359    }
360
361    /**
362     * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize.
363     *
364     * @param out
365     *            the destination stream.
366     * @param blockSize
367     *            the blockSize as 100k units.
368     *
369     * @throws IOException
370     *             if an I/O error occurs in the specified stream.
371     * @throws IllegalArgumentException
372     *             if <code>(blockSize &lt; 1) || (blockSize &gt; 9)</code>.
373     * @throws NullPointerException
374     *             if <code>out == null</code>.
375     *
376     * @see #MIN_BLOCKSIZE
377     * @see #MAX_BLOCKSIZE
378     */
379    public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException {
380        if (blockSize < 1) {
381            throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1");
382        }
383        if (blockSize > 9) {
384            throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9");
385        }
386
387        this.blockSize100k = blockSize;
388        this.out = out;
389
390        /* 20 is just a paranoia constant */
391        this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20;
392        init();
393    }
394
395    @Override
396    public void write(final int b) throws IOException {
397        if (closed) {
398            throw new IOException("Closed");
399        }
400        write0(b);
401    }
402
403    /**
404     * Writes the current byte to the buffer, run-length encoding it
405     * if it has been repeated at least four times (the first step
406     * RLEs sequences of four identical bytes).
407     *
408     * <p>Flushes the current block before writing data if it is
409     * full.</p>
410     *
411     * <p>"write to the buffer" means adding to data.buffer starting
412     * two steps "after" this.last - initially starting at index 1
413     * (not 0) - and updating this.last to point to the last index
414     * written minus 1.</p>
415     */
416    private void writeRun() throws IOException {
417        final int lastShadow = this.last;
418
419        if (lastShadow < this.allowableBlockSize) {
420            final int currentCharShadow = this.currentChar;
421            final Data dataShadow = this.data;
422            dataShadow.inUse[currentCharShadow] = true;
423            final byte ch = (byte) currentCharShadow;
424
425            int runLengthShadow = this.runLength;
426            this.crc.updateCRC(currentCharShadow, runLengthShadow);
427
428            switch (runLengthShadow) {
429            case 1:
430                dataShadow.block[lastShadow + 2] = ch;
431                this.last = lastShadow + 1;
432                break;
433
434            case 2:
435                dataShadow.block[lastShadow + 2] = ch;
436                dataShadow.block[lastShadow + 3] = ch;
437                this.last = lastShadow + 2;
438                break;
439
440            case 3: {
441                final byte[] block = dataShadow.block;
442                block[lastShadow + 2] = ch;
443                block[lastShadow + 3] = ch;
444                block[lastShadow + 4] = ch;
445                this.last = lastShadow + 3;
446            }
447                break;
448
449            default: {
450                runLengthShadow -= 4;
451                dataShadow.inUse[runLengthShadow] = true;
452                final byte[] block = dataShadow.block;
453                block[lastShadow + 2] = ch;
454                block[lastShadow + 3] = ch;
455                block[lastShadow + 4] = ch;
456                block[lastShadow + 5] = ch;
457                block[lastShadow + 6] = (byte) runLengthShadow;
458                this.last = lastShadow + 5;
459            }
460                break;
461
462            }
463        } else {
464            endBlock();
465            initBlock();
466            writeRun();
467        }
468    }
469
470    /**
471     * Overridden to warn about an unclosed stream.
472     */
473    @Override
474    protected void finalize() throws Throwable {
475        if (!closed) {
476            System.err.println("Unclosed BZip2CompressorOutputStream detected, will *not* close it");
477        }
478        super.finalize();
479    }
480
481
482    public void finish() throws IOException {
483        if (!closed) {
484            closed = true;
485            try {
486                if (this.runLength > 0) {
487                    writeRun();
488                }
489                this.currentChar = -1;
490                endBlock();
491                endCompression();
492            } finally {
493                this.out = null;
494                this.blockSorter = null;
495                this.data = null;
496            }
497        }
498    }
499
500    @Override
501    public void close() throws IOException {
502        if (!closed) {
503            try (OutputStream outShadow = this.out) {
504                finish();
505            }
506        }
507    }
508
509    @Override
510    public void flush() throws IOException {
511        final OutputStream outShadow = this.out;
512        if (outShadow != null) {
513            outShadow.flush();
514        }
515    }
516
517    /**
518     * Writes magic bytes like BZ on the first position of the stream
519     * and bytes indicating the file-format, which is
520     * huffmanised, followed by a digit indicating blockSize100k.
521     * @throws IOException if the magic bytes could not been written
522     */
523    private void init() throws IOException {
524        bsPutUByte('B');
525        bsPutUByte('Z');
526
527        this.data = new Data(this.blockSize100k);
528        this.blockSorter = new BlockSort(this.data);
529
530        // huffmanised magic bytes
531        bsPutUByte('h');
532        bsPutUByte('0' + this.blockSize100k);
533
534        this.combinedCRC = 0;
535        initBlock();
536    }
537
538    private void initBlock() {
539        // blockNo++;
540        this.crc.initializeCRC();
541        this.last = -1;
542        // ch = 0;
543
544        final boolean[] inUse = this.data.inUse;
545        for (int i = 256; --i >= 0;) {
546            inUse[i] = false;
547        }
548
549    }
550
551    private void endBlock() throws IOException {
552        this.blockCRC = this.crc.getFinalCRC();
553        this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
554        this.combinedCRC ^= this.blockCRC;
555
556        // empty block at end of file
557        if (this.last == -1) {
558            return;
559        }
560
561        /* sort the block and establish posn of original string */
562        blockSort();
563
564        /*
565         * A 6-byte block header, the value chosen arbitrarily as 0x314159265359
566         * :-). A 32 bit value does not really give a strong enough guarantee
567         * that the value will not appear by chance in the compressed
568         * datastream. Worst-case probability of this event, for a 900k block,
569         * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48
570         * bits. For a compressed file of size 100Gb -- about 100000 blocks --
571         * only a 48-bit marker will do. NB: normal compression/ decompression
572         * donot rely on these statistical properties. They are only important
573         * when trying to recover blocks from damaged files.
574         */
575        bsPutUByte(0x31);
576        bsPutUByte(0x41);
577        bsPutUByte(0x59);
578        bsPutUByte(0x26);
579        bsPutUByte(0x53);
580        bsPutUByte(0x59);
581
582        /* Now the block's CRC, so it is in a known place. */
583        bsPutInt(this.blockCRC);
584
585        /* Now a single bit indicating no randomisation. */
586        bsW(1, 0);
587
588        /* Finally, block's contents proper. */
589        moveToFrontCodeAndSend();
590    }
591
592    private void endCompression() throws IOException {
593        /*
594         * Now another magic 48-bit number, 0x177245385090, to indicate the end
595         * of the last block. (sqrt(pi), if you want to know. I did want to use
596         * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me
597         * to feel statistically comfortable. Call me paranoid.)
598         */
599        bsPutUByte(0x17);
600        bsPutUByte(0x72);
601        bsPutUByte(0x45);
602        bsPutUByte(0x38);
603        bsPutUByte(0x50);
604        bsPutUByte(0x90);
605
606        bsPutInt(this.combinedCRC);
607        bsFinishedWithStream();
608    }
609
610    /**
611     * Returns the blocksize parameter specified at construction time.
612     * @return the blocksize parameter specified at construction time
613     */
614    public final int getBlockSize() {
615        return this.blockSize100k;
616    }
617
618    @Override
619    public void write(final byte[] buf, int offs, final int len)
620        throws IOException {
621        if (offs < 0) {
622            throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
623        }
624        if (len < 0) {
625            throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
626        }
627        if (offs + len > buf.length) {
628            throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
629                                                + len + ") > buf.length("
630                                                + buf.length + ").");
631        }
632        if (closed) {
633            throw new IOException("Stream closed");
634        }
635
636        for (final int hi = offs + len; offs < hi;) {
637            write0(buf[offs++]);
638        }
639    }
640
641    /**
642     * Keeps track of the last bytes written and implicitly performs
643     * run-length encoding as the first step of the bzip2 algorithm.
644     */
645    private void write0(int b) throws IOException {
646        if (this.currentChar != -1) {
647            b &= 0xff;
648            if (this.currentChar == b) {
649                if (++this.runLength > 254) {
650                    writeRun();
651                    this.currentChar = -1;
652                    this.runLength = 0;
653                }
654                // else nothing to do
655            } else {
656                writeRun();
657                this.runLength = 1;
658                this.currentChar = b;
659            }
660        } else {
661            this.currentChar = b & 0xff;
662            this.runLength++;
663        }
664    }
665
666    private static void hbAssignCodes(final int[] code, final byte[] length,
667                                      final int minLen, final int maxLen,
668                                      final int alphaSize) {
669        int vec = 0;
670        for (int n = minLen; n <= maxLen; n++) {
671            for (int i = 0; i < alphaSize; i++) {
672                if ((length[i] & 0xff) == n) {
673                    code[i] = vec;
674                    vec++;
675                }
676            }
677            vec <<= 1;
678        }
679    }
680
681    private void bsFinishedWithStream() throws IOException {
682        while (this.bsLive > 0) {
683            final int ch = this.bsBuff >> 24;
684            this.out.write(ch); // write 8-bit
685            this.bsBuff <<= 8;
686            this.bsLive -= 8;
687        }
688    }
689
690    private void bsW(final int n, final int v) throws IOException {
691        final OutputStream outShadow = this.out;
692        int bsLiveShadow = this.bsLive;
693        int bsBuffShadow = this.bsBuff;
694
695        while (bsLiveShadow >= 8) {
696            outShadow.write(bsBuffShadow >> 24); // write 8-bit
697            bsBuffShadow <<= 8;
698            bsLiveShadow -= 8;
699        }
700
701        this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
702        this.bsLive = bsLiveShadow + n;
703    }
704
705    private void bsPutUByte(final int c) throws IOException {
706        bsW(8, c);
707    }
708
709    private void bsPutInt(final int u) throws IOException {
710        bsW(8, (u >> 24) & 0xff);
711        bsW(8, (u >> 16) & 0xff);
712        bsW(8, (u >> 8) & 0xff);
713        bsW(8, u & 0xff);
714    }
715
716    private void sendMTFValues() throws IOException {
717        final byte[][] len = this.data.sendMTFValues_len;
718        final int alphaSize = this.nInUse + 2;
719
720        for (int t = N_GROUPS; --t >= 0;) {
721            final byte[] len_t = len[t];
722            for (int v = alphaSize; --v >= 0;) {
723                len_t[v] = GREATER_ICOST;
724            }
725        }
726
727        /* Decide how many coding tables to use */
728        // assert (this.nMTF > 0) : this.nMTF;
729        final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3
730            : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6;
731
732        /* Generate an initial set of coding tables */
733        sendMTFValues0(nGroups, alphaSize);
734
735        /*
736         * Iterate up to N_ITERS times to improve the tables.
737         */
738        final int nSelectors = sendMTFValues1(nGroups, alphaSize);
739
740        /* Compute MTF values for the selectors. */
741        sendMTFValues2(nGroups, nSelectors);
742
743        /* Assign actual codes for the tables. */
744        sendMTFValues3(nGroups, alphaSize);
745
746        /* Transmit the mapping table. */
747        sendMTFValues4();
748
749        /* Now the selectors. */
750        sendMTFValues5(nGroups, nSelectors);
751
752        /* Now the coding tables. */
753        sendMTFValues6(nGroups, alphaSize);
754
755        /* And finally, the block data proper */
756        sendMTFValues7();
757    }
758
759    private void sendMTFValues0(final int nGroups, final int alphaSize) {
760        final byte[][] len = this.data.sendMTFValues_len;
761        final int[] mtfFreq = this.data.mtfFreq;
762
763        int remF = this.nMTF;
764        int gs = 0;
765
766        for (int nPart = nGroups; nPart > 0; nPart--) {
767            final int tFreq = remF / nPart;
768            int ge = gs - 1;
769            int aFreq = 0;
770
771            for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) {
772                aFreq += mtfFreq[++ge];
773            }
774
775            if ((ge > gs) && (nPart != nGroups) && (nPart != 1)
776                && (((nGroups - nPart) & 1) != 0)) {
777                aFreq -= mtfFreq[ge--];
778            }
779
780            final byte[] len_np = len[nPart - 1];
781            for (int v = alphaSize; --v >= 0;) {
782                if ((v >= gs) && (v <= ge)) {
783                    len_np[v] = LESSER_ICOST;
784                } else {
785                    len_np[v] = GREATER_ICOST;
786                }
787            }
788
789            gs = ge + 1;
790            remF -= aFreq;
791        }
792    }
793
794    private int sendMTFValues1(final int nGroups, final int alphaSize) {
795        final Data dataShadow = this.data;
796        final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
797        final int[] fave = dataShadow.sendMTFValues_fave;
798        final short[] cost = dataShadow.sendMTFValues_cost;
799        final char[] sfmap = dataShadow.sfmap;
800        final byte[] selector = dataShadow.selector;
801        final byte[][] len = dataShadow.sendMTFValues_len;
802        final byte[] len_0 = len[0];
803        final byte[] len_1 = len[1];
804        final byte[] len_2 = len[2];
805        final byte[] len_3 = len[3];
806        final byte[] len_4 = len[4];
807        final byte[] len_5 = len[5];
808        final int nMTFShadow = this.nMTF;
809
810        int nSelectors = 0;
811
812        for (int iter = 0; iter < N_ITERS; iter++) {
813            for (int t = nGroups; --t >= 0;) {
814                fave[t] = 0;
815                final int[] rfreqt = rfreq[t];
816                for (int i = alphaSize; --i >= 0;) {
817                    rfreqt[i] = 0;
818                }
819            }
820
821            nSelectors = 0;
822
823            for (int gs = 0; gs < this.nMTF;) {
824                /* Set group start & end marks. */
825
826                /*
827                 * Calculate the cost of this group as coded by each of the
828                 * coding tables.
829                 */
830
831                final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
832
833                if (nGroups == N_GROUPS) {
834                    // unrolled version of the else-block
835
836                    short cost0 = 0;
837                    short cost1 = 0;
838                    short cost2 = 0;
839                    short cost3 = 0;
840                    short cost4 = 0;
841                    short cost5 = 0;
842
843                    for (int i = gs; i <= ge; i++) {
844                        final int icv = sfmap[i];
845                        cost0 += len_0[icv] & 0xff;
846                        cost1 += len_1[icv] & 0xff;
847                        cost2 += len_2[icv] & 0xff;
848                        cost3 += len_3[icv] & 0xff;
849                        cost4 += len_4[icv] & 0xff;
850                        cost5 += len_5[icv] & 0xff;
851                    }
852
853                    cost[0] = cost0;
854                    cost[1] = cost1;
855                    cost[2] = cost2;
856                    cost[3] = cost3;
857                    cost[4] = cost4;
858                    cost[5] = cost5;
859
860                } else {
861                    for (int t = nGroups; --t >= 0;) {
862                        cost[t] = 0;
863                    }
864
865                    for (int i = gs; i <= ge; i++) {
866                        final int icv = sfmap[i];
867                        for (int t = nGroups; --t >= 0;) {
868                            cost[t] += len[t][icv] & 0xff;
869                        }
870                    }
871                }
872
873                /*
874                 * Find the coding table which is best for this group, and
875                 * record its identity in the selector table.
876                 */
877                int bt = -1;
878                for (int t = nGroups, bc = 999999999; --t >= 0;) {
879                    final int cost_t = cost[t];
880                    if (cost_t < bc) {
881                        bc = cost_t;
882                        bt = t;
883                    }
884                }
885
886                fave[bt]++;
887                selector[nSelectors] = (byte) bt;
888                nSelectors++;
889
890                /*
891                 * Increment the symbol frequencies for the selected table.
892                 */
893                final int[] rfreq_bt = rfreq[bt];
894                for (int i = gs; i <= ge; i++) {
895                    rfreq_bt[sfmap[i]]++;
896                }
897
898                gs = ge + 1;
899            }
900
901            /*
902             * Recompute the tables based on the accumulated frequencies.
903             */
904            for (int t = 0; t < nGroups; t++) {
905                hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
906            }
907        }
908
909        return nSelectors;
910    }
911
912    private void sendMTFValues2(final int nGroups, final int nSelectors) {
913        // assert (nGroups < 8) : nGroups;
914
915        final Data dataShadow = this.data;
916        final byte[] pos = dataShadow.sendMTFValues2_pos;
917
918        for (int i = nGroups; --i >= 0;) {
919            pos[i] = (byte) i;
920        }
921
922        for (int i = 0; i < nSelectors; i++) {
923            final byte ll_i = dataShadow.selector[i];
924            byte tmp = pos[0];
925            int j = 0;
926
927            while (ll_i != tmp) {
928                j++;
929                final byte tmp2 = tmp;
930                tmp = pos[j];
931                pos[j] = tmp2;
932            }
933
934            pos[0] = tmp;
935            dataShadow.selectorMtf[i] = (byte) j;
936        }
937    }
938
939    private void sendMTFValues3(final int nGroups, final int alphaSize) {
940        final int[][] code = this.data.sendMTFValues_code;
941        final byte[][] len = this.data.sendMTFValues_len;
942
943        for (int t = 0; t < nGroups; t++) {
944            int minLen = 32;
945            int maxLen = 0;
946            final byte[] len_t = len[t];
947            for (int i = alphaSize; --i >= 0;) {
948                final int l = len_t[i] & 0xff;
949                if (l > maxLen) {
950                    maxLen = l;
951                }
952                if (l < minLen) {
953                    minLen = l;
954                }
955            }
956
957            // assert (maxLen <= 20) : maxLen;
958            // assert (minLen >= 1) : minLen;
959
960            hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
961        }
962    }
963
964    private void sendMTFValues4() throws IOException {
965        final boolean[] inUse = this.data.inUse;
966        final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
967
968        for (int i = 16; --i >= 0;) {
969            inUse16[i] = false;
970            final int i16 = i * 16;
971            for (int j = 16; --j >= 0;) {
972                if (inUse[i16 + j]) {
973                    inUse16[i] = true;
974                    break;
975                }
976            }
977        }
978
979        for (int i = 0; i < 16; i++) {
980            bsW(1, inUse16[i] ? 1 : 0);
981        }
982
983        final OutputStream outShadow = this.out;
984        int bsLiveShadow = this.bsLive;
985        int bsBuffShadow = this.bsBuff;
986
987        for (int i = 0; i < 16; i++) {
988            if (inUse16[i]) {
989                final int i16 = i * 16;
990                for (int j = 0; j < 16; j++) {
991                    // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
992                    while (bsLiveShadow >= 8) {
993                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
994                        bsBuffShadow <<= 8;
995                        bsLiveShadow -= 8;
996                    }
997                    if (inUse[i16 + j]) {
998                        bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
999                    }
1000                    bsLiveShadow++;
1001                }
1002            }
1003        }
1004
1005        this.bsBuff = bsBuffShadow;
1006        this.bsLive = bsLiveShadow;
1007    }
1008
1009    private void sendMTFValues5(final int nGroups, final int nSelectors)
1010        throws IOException {
1011        bsW(3, nGroups);
1012        bsW(15, nSelectors);
1013
1014        final OutputStream outShadow = this.out;
1015        final byte[] selectorMtf = this.data.selectorMtf;
1016
1017        int bsLiveShadow = this.bsLive;
1018        int bsBuffShadow = this.bsBuff;
1019
1020        for (int i = 0; i < nSelectors; i++) {
1021            for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
1022                // inlined: bsW(1, 1);
1023                while (bsLiveShadow >= 8) {
1024                    outShadow.write(bsBuffShadow >> 24);
1025                    bsBuffShadow <<= 8;
1026                    bsLiveShadow -= 8;
1027                }
1028                bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1029                bsLiveShadow++;
1030            }
1031
1032            // inlined: bsW(1, 0);
1033            while (bsLiveShadow >= 8) {
1034                outShadow.write(bsBuffShadow >> 24);
1035                bsBuffShadow <<= 8;
1036                bsLiveShadow -= 8;
1037            }
1038            // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1039            bsLiveShadow++;
1040        }
1041
1042        this.bsBuff = bsBuffShadow;
1043        this.bsLive = bsLiveShadow;
1044    }
1045
1046    private void sendMTFValues6(final int nGroups, final int alphaSize)
1047        throws IOException {
1048        final byte[][] len = this.data.sendMTFValues_len;
1049        final OutputStream outShadow = this.out;
1050
1051        int bsLiveShadow = this.bsLive;
1052        int bsBuffShadow = this.bsBuff;
1053
1054        for (int t = 0; t < nGroups; t++) {
1055            final byte[] len_t = len[t];
1056            int curr = len_t[0] & 0xff;
1057
1058            // inlined: bsW(5, curr);
1059            while (bsLiveShadow >= 8) {
1060                outShadow.write(bsBuffShadow >> 24); // write 8-bit
1061                bsBuffShadow <<= 8;
1062                bsLiveShadow -= 8;
1063            }
1064            bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
1065            bsLiveShadow += 5;
1066
1067            for (int i = 0; i < alphaSize; i++) {
1068                final int lti = len_t[i] & 0xff;
1069                while (curr < lti) {
1070                    // inlined: bsW(2, 2);
1071                    while (bsLiveShadow >= 8) {
1072                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
1073                        bsBuffShadow <<= 8;
1074                        bsLiveShadow -= 8;
1075                    }
1076                    bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
1077                    bsLiveShadow += 2;
1078
1079                    curr++; /* 10 */
1080                }
1081
1082                while (curr > lti) {
1083                    // inlined: bsW(2, 3);
1084                    while (bsLiveShadow >= 8) {
1085                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
1086                        bsBuffShadow <<= 8;
1087                        bsLiveShadow -= 8;
1088                    }
1089                    bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
1090                    bsLiveShadow += 2;
1091
1092                    curr--; /* 11 */
1093                }
1094
1095                // inlined: bsW(1, 0);
1096                while (bsLiveShadow >= 8) {
1097                    outShadow.write(bsBuffShadow >> 24); // write 8-bit
1098                    bsBuffShadow <<= 8;
1099                    bsLiveShadow -= 8;
1100                }
1101                // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1102                bsLiveShadow++;
1103            }
1104        }
1105
1106        this.bsBuff = bsBuffShadow;
1107        this.bsLive = bsLiveShadow;
1108    }
1109
1110    private void sendMTFValues7() throws IOException {
1111        final Data dataShadow = this.data;
1112        final byte[][] len = dataShadow.sendMTFValues_len;
1113        final int[][] code = dataShadow.sendMTFValues_code;
1114        final OutputStream outShadow = this.out;
1115        final byte[] selector = dataShadow.selector;
1116        final char[] sfmap = dataShadow.sfmap;
1117        final int nMTFShadow = this.nMTF;
1118
1119        int selCtr = 0;
1120
1121        int bsLiveShadow = this.bsLive;
1122        int bsBuffShadow = this.bsBuff;
1123
1124        for (int gs = 0; gs < nMTFShadow;) {
1125            final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
1126            final int selector_selCtr = selector[selCtr] & 0xff;
1127            final int[] code_selCtr = code[selector_selCtr];
1128            final byte[] len_selCtr = len[selector_selCtr];
1129
1130            while (gs <= ge) {
1131                final int sfmap_i = sfmap[gs];
1132
1133                //
1134                // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
1135                // code_selCtr[sfmap_i]);
1136                //
1137                while (bsLiveShadow >= 8) {
1138                    outShadow.write(bsBuffShadow >> 24);
1139                    bsBuffShadow <<= 8;
1140                    bsLiveShadow -= 8;
1141                }
1142                final int n = len_selCtr[sfmap_i] & 0xFF;
1143                bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
1144                bsLiveShadow += n;
1145
1146                gs++;
1147            }
1148
1149            gs = ge + 1;
1150            selCtr++;
1151        }
1152
1153        this.bsBuff = bsBuffShadow;
1154        this.bsLive = bsLiveShadow;
1155    }
1156
1157    private void moveToFrontCodeAndSend() throws IOException {
1158        bsW(24, this.data.origPtr);
1159        generateMTFValues();
1160        sendMTFValues();
1161    }
1162
1163    private void blockSort() {
1164        blockSorter.blockSort(data, last);
1165    }
1166
1167    /*
1168     * Performs Move-To-Front on the Burrows-Wheeler transformed
1169     * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB
1170     * run-length-encoded form.
1171     *
1172     * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
1173     */
1174    private void generateMTFValues() {
1175        final int lastShadow = this.last;
1176        final Data dataShadow = this.data;
1177        final boolean[] inUse = dataShadow.inUse;
1178        final byte[] block = dataShadow.block;
1179        final int[] fmap = dataShadow.fmap;
1180        final char[] sfmap = dataShadow.sfmap;
1181        final int[] mtfFreq = dataShadow.mtfFreq;
1182        final byte[] unseqToSeq = dataShadow.unseqToSeq;
1183        final byte[] yy = dataShadow.generateMTFValues_yy;
1184
1185        // make maps
1186        int nInUseShadow = 0;
1187        for (int i = 0; i < 256; i++) {
1188            if (inUse[i]) {
1189                unseqToSeq[i] = (byte) nInUseShadow;
1190                nInUseShadow++;
1191            }
1192        }
1193        this.nInUse = nInUseShadow;
1194
1195        final int eob = nInUseShadow + 1;
1196
1197        for (int i = eob; i >= 0; i--) {
1198            mtfFreq[i] = 0;
1199        }
1200
1201        for (int i = nInUseShadow; --i >= 0;) {
1202            yy[i] = (byte) i;
1203        }
1204
1205        int wr = 0;
1206        int zPend = 0;
1207
1208        for (int i = 0; i <= lastShadow; i++) {
1209            final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
1210            byte tmp = yy[0];
1211            int j = 0;
1212
1213            while (ll_i != tmp) {
1214                j++;
1215                final byte tmp2 = tmp;
1216                tmp = yy[j];
1217                yy[j] = tmp2;
1218            }
1219            yy[0] = tmp;
1220
1221            if (j == 0) {
1222                zPend++;
1223            } else {
1224                if (zPend > 0) {
1225                    zPend--;
1226                    while (true) {
1227                        if ((zPend & 1) == 0) {
1228                            sfmap[wr] = RUNA;
1229                            wr++;
1230                            mtfFreq[RUNA]++;
1231                        } else {
1232                            sfmap[wr] = RUNB;
1233                            wr++;
1234                            mtfFreq[RUNB]++;
1235                        }
1236
1237                        if (zPend < 2) {
1238                            break;
1239                        }
1240                        zPend = (zPend - 2) >> 1;
1241                    }
1242                    zPend = 0;
1243                }
1244                sfmap[wr] = (char) (j + 1);
1245                wr++;
1246                mtfFreq[j + 1]++;
1247            }
1248        }
1249
1250        if (zPend > 0) {
1251            zPend--;
1252            while (true) {
1253                if ((zPend & 1) == 0) {
1254                    sfmap[wr] = RUNA;
1255                    wr++;
1256                    mtfFreq[RUNA]++;
1257                } else {
1258                    sfmap[wr] = RUNB;
1259                    wr++;
1260                    mtfFreq[RUNB]++;
1261                }
1262
1263                if (zPend < 2) {
1264                    break;
1265                }
1266                zPend = (zPend - 2) >> 1;
1267            }
1268        }
1269
1270        sfmap[wr] = (char) eob;
1271        mtfFreq[eob]++;
1272        this.nMTF = wr + 1;
1273    }
1274
1275    static final class Data {
1276
1277        // with blockSize 900k
1278        /* maps unsigned byte => "does it occur in block" */
1279        final boolean[] inUse = new boolean[256]; // 256 byte
1280        final byte[] unseqToSeq = new byte[256]; // 256 byte
1281        final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
1282        final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
1283        final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
1284
1285        final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
1286        final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
1287        // byte
1288        final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1289        // byte
1290        final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
1291        final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
1292        final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1293        // byte
1294        final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
1295        final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
1296
1297        final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
1298        final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1299        final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1300
1301        // ------------
1302        // 333408 byte
1303
1304        /* holds the RLEd block of original data starting at index 1.
1305         * After sorting the last byte added to the buffer is at index
1306         * 0. */
1307        final byte[] block; // 900021 byte
1308        /* maps index in Burrows-Wheeler transformed block => index of
1309         * byte in original block */
1310        final int[] fmap; // 3600000 byte
1311        final char[] sfmap; // 3600000 byte
1312        // ------------
1313        // 8433529 byte
1314        // ============
1315
1316        /**
1317         * Index of original line in Burrows-Wheeler table.
1318         *
1319         * <p>This is the index in fmap that points to the last byte
1320         * of the original data.</p>
1321         */
1322        int origPtr;
1323
1324        Data(final int blockSize100k) {
1325            final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE;
1326            this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
1327            this.fmap = new int[n];
1328            this.sfmap = new char[2 * n];
1329        }
1330
1331    }
1332
1333}