Consider the code example that put a single file test_file.pdf
into zip archive test.zip
and then read this archive:
import java.io.*;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
public class Main {
public static void main(String[] args) {
File infile = new File("test_file.pdf");
try (
FileInputStream fis = new FileInputStream(infile);
ZipOutputStream zos = new ZipOutputStream(new FileOutputStream("test.zip"));
) {
int bytesRead;
byte[] buffer = new byte[1024];
ZipEntry entry = new ZipEntry("data");
entry.setSize(infile.length());
zos.putNextEntry(entry);
while ((bytesRead = fis.read(buffer)) >= 0)
{
zos.write(buffer, 0, bytesRead);
}
zos.closeEntry();
} catch (IOException e) {
e.printStackTrace();
}
try (
ZipInputStream zis = new ZipInputStream(new BufferedInputStream(
new FileInputStream(new File("test.zip"))));
) {
ZipEntry entry = zis.getNextEntry();
System.out.println("Entry size: " + entry.getSize());
zis.closeEntry();
} catch (IOException e) {
e.printStackTrace();
}
}
}
Output:
Entry size: -1
But if create uncompressed zip archive (method ZipEntry.STORED
), getSize() returns correct size:
import java.io.*;
import java.util.zip.CRC32;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
public class Main {
public static void main(String[] args) {
File infile = new File("test_file.pdf");
try (
FileInputStream fis = new FileInputStream(infile);
ZipOutputStream zos = new ZipOutputStream(new FileOutputStream("test.zip"));
) {
int bytesRead;
byte[] buffer = new byte[1024];
CRC32 crc = new CRC32();
try (
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(infile));
) {
crc.reset();
while ((bytesRead = bis.read(buffer)) != -1) {
crc.update(buffer, 0, bytesRead);
}
}
ZipEntry entry = new ZipEntry("data");
entry.setMethod(ZipEntry.STORED);
entry.setCompressedSize(infile.length());
entry.setSize(infile.length());
entry.setCrc(crc.getValue());
zos.putNextEntry(entry);
while ((bytesRead = fis.read(buffer)) >= 0)
{
zos.write(buffer, 0, bytesRead);
}
zos.closeEntry();
} catch (IOException e) {
e.printStackTrace();
}
try (
ZipInputStream zis = new ZipInputStream(new BufferedInputStream(
new FileInputStream(new File("test.zip"))));
) {
ZipEntry entry = zis.getNextEntry();
System.out.println("Entry size: " + entry.getSize());
zis.closeEntry();
} catch (IOException e) {
e.printStackTrace();
}
}
}
Output (for example but correct):
Entry size: 9223192
Compressed zip archives with correct entry.getSize()
exists (e.g. zip archives by Ark program).
So question: how to create compressed (ZipEntry.DEFLATED
or another if exists) zip archive that returns correct size of the entry using only the standard libraries?
I tried this recommendation but it also does not work:
import java.io.*;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
public class Main {
public static void main(String[] args) {
File infile = new File("test_file.pdf");
try (
FileInputStream fis = new FileInputStream(infile);
ZipOutputStream zos = new ZipOutputStream(new FileOutputStream("test.zip"));
) {
int bytesRead;
byte[] buffer = new byte[1024];
ZipEntry entry = new ZipEntry("data");
entry.setSize(infile.length());
zos.putNextEntry(entry);
while ((bytesRead = fis.read(buffer)) >= 0)
{
zos.write(buffer, 0, bytesRead);
}
zos.closeEntry();
} catch (IOException e) {
e.printStackTrace();
}
try (
ZipInputStream zis = new ZipInputStream(new BufferedInputStream(
new FileInputStream(new File("test.zip"))));
) {
ZipEntry entry = zis.getNextEntry();
byte[] buffer = new byte[1];
zis.read(buffer);
System.out.println("Entry size: " + entry.getSize());
zis.closeEntry();
} catch (IOException e) {
e.printStackTrace();
}
}
}
Output:
Entry size: -1
You can only set the uncompressed size if you also set the CRC and compressed size as well. Since these information are stored before in a header before the actual data and ZipOutputStream
can’t rewind arbitrary OutputStream
s, it can’t calculate these values while writing and store them afterwards (but it will calculate them for verifying the provided values).
Here is a solution for calculating the values in one pass before the writing. It utilizes the fact that you can rewind a stream if it is backed by a file.
public static void main(String[] args) throws IOException {
File infile = new File("test_file.pdf");
File outfile = new File("test.zip");
try (FileInputStream fis = new FileInputStream(infile);
FileOutputStream fos = new FileOutputStream(outfile);
ZipOutputStream zos = new ZipOutputStream(fos) ) {
byte[] buffer = new byte[1024];
ZipEntry entry = new ZipEntry("data");
precalc(entry, fis.getChannel());
zos.putNextEntry(entry);
for(int bytesRead; (bytesRead = fis.read(buffer)) >= 0; )
zos.write(buffer, 0, bytesRead);
zos.closeEntry();
}
try(FileInputStream fin = new FileInputStream(outfile);
ZipInputStream zis = new ZipInputStream(fin) ) {
ZipEntry entry = zis.getNextEntry();
System.out.println("Entry size: " + entry.getSize());
System.out.println("Compressed size: " + entry.getCompressedSize());
System.out.println("CRC: " + entry.getCrc());
zis.closeEntry();
}
}
private static void precalc(ZipEntry entry, FileChannel fch) throws IOException {
long uncompressed = fch.size();
int method = entry.getMethod();
CRC32 crc = new CRC32();
Deflater def;
byte[] drain;
if(method != ZipEntry.STORED) {
def = new Deflater(Deflater.DEFAULT_COMPRESSION, true);
drain = new byte[1024];
}
else {
def = null;
drain = null;
}
ByteBuffer buf = ByteBuffer.allocate((int)Math.min(uncompressed, 4096));
for(int bytesRead; (bytesRead = fch.read(buf)) != -1; buf.clear()) {
crc.update(buf.array(), buf.arrayOffset(), bytesRead);
if(def!=null) {
def.setInput(buf.array(), buf.arrayOffset(), bytesRead);
while(!def.needsInput()) def.deflate(drain, 0, drain.length);
}
}
entry.setSize(uncompressed);
if(def!=null) {
def.finish();
while(!def.finished()) def.deflate(drain, 0, drain.length);
entry.setCompressedSize(def.getBytesWritten());
}
entry.setCrc(crc.getValue());
fch.position(0);
}
It handles both, uncompressed and compressed entries, but unfortunately, only with the default compression level as ZipOutputStream
has no method for querying the current level. So if you change the compression level you have to keep the precalc code in sync. Alternatively, you could move the logic into a subclass of ZipOutputStream
and use the same Deflater
so it will automatically have the same configuration.
A solution working with arbitrary source input streams would require buffering of the entire entry data.