compress pdf with large images via java

Daniel Mulcahy picture Daniel Mulcahy · Dec 16, 2013 · Viewed 23.5k times · Source

Looking for a way to compress images in a pdf and to output a pdf for archiving. I cannot compress the images before creation as it would compromise the quality of the print.

The size of each pdf is around 8MB with the bulk of this being made up of 2 images. Images are in png format and are brought into pdf during generation(3rd party generator used)

Is there a way to compress these in java without using a 3rd party tool. I have tried with pdfbox, itext and a 3rd party exe(neevia), the 3rd party tool the only one that has given me any results so far(Down to around half a MB) but I do not want to relinquish control to an exe. Sample code is below.

import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;

import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDStream;

import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import com.itextpdf.text.pdf.PdfWriter;

public class compressPDF {

public static void main (String[] args) throws IOException, DocumentException, COSVisitorException {


    /*
     * Using PDF Box
     */

    PDDocument doc; // = new PDDocument();

    doc = PDDocument.load("C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF");

    PDStream stream= new PDStream(doc);
    stream.addCompression();

    doc.save("C:/_dev_env_/TEMP/compressPDF/compressed_pdfBox.pdf");

    doc.close();

    /*
     * Using itext
     */

    PdfReader reader = new PdfReader("C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF");

    PdfStamper stamper = new PdfStamper(reader, new FileOutputStream("C:/_dev_env_/TEMP/compressPDF/compressed_Itext.pdf"), PdfWriter.VERSION_1_5);
    stamper.setFullCompression();
    stamper.getWriter().setCompressionLevel(50);
    int total = reader.getNumberOfPages() + 1;
    for (int i = 1; i < total; i++) {
        reader.setPageContent(i, reader.getPageContent(i));
    }
    stamper.close();
    reader.close();

    /*
     * Using 3rd party - Neevia 
     */
    try {
    Process process = new ProcessBuilder("C:/Program Files (x86)/neeviaPDF.com/PDFcompress/cmdLine/CLcompr.exe","C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF", "C:/_dev_env_/TEMP/compressPDF/compressed_Neevia.pdf").start();
    InputStream is = process.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);
    String line;

    System.out.printf("Output of running %s is:", Arrays.toString(args));

    while ((line = br.readLine()) != null) {
      System.out.println(line);
    }
    } catch (Exception e) {
        System.out.println(e);
    } finally {
        System.out.println("Created!!");
    }

}

}

Answer

Daniel Mulcahy picture Daniel Mulcahy · Dec 16, 2013

I used code below for a proof of concept... Works a treat :) Thanks to Bruno for setting me on the right path :)

package compressPDF;

import java.awt.Graphics2D;
import java.awt.geom.AffineTransform;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;

import javax.imageio.ImageIO;

import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PRStream;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfNumber;
import com.itextpdf.text.pdf.PdfObject;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import com.itextpdf.text.pdf.parser.PdfImageObject;

public class ResizeImage {

/** The resulting PDF file. */
//public static String RESULT = "results/part4/chapter16/resized_image.pdf";
/** The multiplication factor for the image. */
public static float FACTOR = 0.5f;

/**
 * Manipulates a PDF file src with the file dest as result
 * @param src the original PDF
 * @param dest the resulting PDF
 * @throws IOException
 * @throws DocumentException 
 */
public void manipulatePdf(String src, String dest) throws IOException, DocumentException {
    PdfName key = new PdfName("ITXT_SpecialId");
    PdfName value = new PdfName("123456789");
    // Read the file
    PdfReader reader = new PdfReader(src);
    int n = reader.getXrefSize();
    PdfObject object;
    PRStream stream;
    // Look for image and manipulate image stream
    for (int i = 0; i < n; i++) {
        object = reader.getPdfObject(i);
        if (object == null || !object.isStream())
            continue;
        stream = (PRStream)object;
       // if (value.equals(stream.get(key))) {
        PdfObject pdfsubtype = stream.get(PdfName.SUBTYPE);
        System.out.println(stream.type());
        if (pdfsubtype != null && pdfsubtype.toString().equals(PdfName.IMAGE.toString())) {
            PdfImageObject image = new PdfImageObject(stream);
            BufferedImage bi = image.getBufferedImage();
            if (bi == null) continue;
            int width = (int)(bi.getWidth() * FACTOR);
            int height = (int)(bi.getHeight() * FACTOR);
            BufferedImage img = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
            AffineTransform at = AffineTransform.getScaleInstance(FACTOR, FACTOR);
            Graphics2D g = img.createGraphics();
            g.drawRenderedImage(bi, at);
            ByteArrayOutputStream imgBytes = new ByteArrayOutputStream();
            ImageIO.write(img, "JPG", imgBytes);
            stream.clear();
            stream.setData(imgBytes.toByteArray(), false, PRStream.BEST_COMPRESSION);
            stream.put(PdfName.TYPE, PdfName.XOBJECT);
            stream.put(PdfName.SUBTYPE, PdfName.IMAGE);
            stream.put(key, value);
            stream.put(PdfName.FILTER, PdfName.DCTDECODE);
            stream.put(PdfName.WIDTH, new PdfNumber(width));
            stream.put(PdfName.HEIGHT, new PdfNumber(height));
            stream.put(PdfName.BITSPERCOMPONENT, new PdfNumber(8));
            stream.put(PdfName.COLORSPACE, PdfName.DEVICERGB);
        }
    }
    // Save altered PDF
    PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest));
    stamper.close();
    reader.close();
}

/**
 * Main method.
 *
 * @param    args    no arguments needed
 * @throws DocumentException 
 * @throws IOException
 */
public static void main(String[] args) throws IOException, DocumentException {
    //createPdf(RESULT);
    new ResizeImage().manipulatePdf("C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF", "C:/_dev_env_/TEMP/compressPDF/compressTest.pdf");
}

}