Chris Pollett > Students >
Long

    ( Print View )

    [Bio]

    [Project Blog]

    [CS297 Proposal]

    [Del 1]

    [Del 2]

    [Del 3]

    [CS297Report-PDF]

    [CS298 Proposal]

                          

























Deliverable_3 Extracting images in a PDF file and saving them into image formats that are specified by the user. The image formats that are supported in this program are TIFF, JPEG, PNG and GIF

Description: This program reads a PDF file. It then goes through the contents of each page and gets the image elements. For each image, it is saved in a file with user's specfified type format such as TIF, JPG, PNG, or GIF. In this project, I have learned more about PDF file structure, extracting objs in PDF and saving them as different image formats.

Example:This is what my code outputs on these inputs.

the cover of iText ebook was extracted

The cover of "iText in Action" ebook was extracted from a PDF file and saved as JPG image.

a barcode of iText ebook was extracted

A barcode was extracted from "iText in Action" ebook and saved as JPG image.

/*
* Project   : Extracting images in PDF files (Deliverable #3)
* File Name : Deliverable_3.java
* Purpose   : This program extracts images from a PDF file and save them in
*          TIFF or PNG or JPEG that is specified by the user.
* Create : Nov 1, 2006
* Last Modified: Nov 14, 2006
* Java Version:   1.5.0_08
*/

package myclasses;

import java.awt.Image;
import java.awt.image.BufferedImage;
import java.io.*;
import javax.imageio.ImageIO;
import javax.media.jai.JAI;
import org.jpedal.PdfDecoder;
import org.jpedal.io.*;
import org.jpedal.objects.PdfImageData;
import com.sun.media.jai.codec.TIFFEncodeParam;
import org.shetline.io.*;

/**
 * author: Long N Vuong
 * reference: JPedal example by Mark Stephens
 *
 */
public class Deliverable_3
{
   /*the decoder object which decodes the pdf and returns a data object*/
   PdfDecoder decode_pdf = null;

   //type of image to save
   private static String prefix = "jpg";

   /**
    * Constructor
    * */
   public Deliverable_3( String file_name )
   {
      decode(file_name);
   }

   /**
    * @purpose: open and decode a pdf pages
    * @param:   file_name
    * @return
    */
   private void decode(String file_name){
      long fileCounter = 0;

      //get PdfDecoder
      try
      {
         decode_pdf = new PdfDecoder( false );

         //tell JPedal what we want it to extract
         String opiFlag=System.getProperty("opi");
         if(opiFlag==null)
            decode_pdf.setExtractionMode(PdfDecoder.RAWIMAGES+PdfDecoder.FINALIMAGES);
         else
            decode_pdf.setExtractionMode(PdfDecoder.RAWIMAGES
            +PdfDecoder.FINALIMAGES+PdfDecoder.XFORMMETADATA);

         decode_pdf.openPdfFile( file_name );
      }
      catch( Exception e )
      {
         System.err.println( "Exception " + e + " in pdf code" );
      }

      //check if the pdf allows to extract info
      if ((decode_pdf.isEncrypted()&&(!decode_pdf.isPasswordSupplied()))
         &&(!decode_pdf.isExtractionAllowed())) {
            System.out.println("Encrypted settings");

      }else{

      //Get number of pages in the PDF file
      int first_page = 1, last_page =decode_pdf.getPageCount();

          // extract data from pdf and then write out the images

         try
         {
            for( int page = first_page;page < last_page + 1;page++ )
            {

               //decode the page
               decode_pdf.decodePage( page );

               //get the PdfImages object which now holds the images.
               //the image name and other info in this object
               PdfImageData pdf_images = decode_pdf.getPdfImageData();

               //image count (note image 1 is item 0, so any loop runs 0 to count-1)
               int image_count = pdf_images.getImageCount();

               //work through and save each image
               for( int i = 0;i < image_count;i++ )
               {
                  String image_name = pdf_images.getImageName( i );
                  BufferedImage image_to_save;

                  System.out.println("Processing: image " + (i + 1) +
                                 " of " + image_count + " on page " + page);
                  try{

                     //get raw version of image (R prefix for raw image)
                     image_to_save = decode_pdf.getObjectStore().loadStoredImage( "R"
                     + image_name );


                     saveImage(image_to_save, image_name + fileCounter +"."+prefix,prefix);
                     //increase file output counter
                     fileCounter++;

                  }
                  catch( Exception e )
                  {
                     System.err.println( "Exception " + e + " in extracting images" );
                  }
               }

               //flush images in case we do more than 1 page so only contains
               //images from current page
               decode_pdf.flushObjectValues(true);
            }
         }
         catch( Exception e )
         {
            decode_pdf.closePdfFile();
            System.err.println( "Exception " + e.getMessage() );
         }
      }//end else

      /**close the pdf file*/
      decode_pdf.closePdfFile();

      System.out.println("Closed PDF file");
   }

   /*
    * @purpose : save an image to a specified picture file
    * @param : image_to_save, fileName, prefix
    *
    */
   private void saveImage(BufferedImage image_to_save, String fileName,String prefix)
   {

      //Save as TIFF Format
      if(prefix.equalsIgnoreCase("tif")|prefix.equalsIgnoreCase("tiff")){


         try {
            JAIHelper.confirmJAIOnClasspath();


            FileOutputStream fileoutput = new FileOutputStream(fileName);

            //get tiff compression
            String tiffFlag=System.getProperty("compress_tiff");
            boolean compressTiffs = tiffFlag!=null;

            TIFFEncodeParam params = null;
            if(compressTiffs){
               params = new TIFFEncodeParam();
               params.setCompression(TIFFEncodeParam.COMPRESSION_DEFLATE);
            }

            JAI.create("encode", image_to_save, fileoutput, "TIFF", params);
         } catch (FileNotFoundException e) {
            e.printStackTrace();
         }

      }
      //Save as PNG Format
      else if (prefix.equalsIgnoreCase("png")){
         try {

            ImageIO.write(image_to_save,prefix,new File(fileName));

         } catch (IOException e) {
            e.printStackTrace();
         }
      }

      //Save as GIF Format
      else if (prefix.equalsIgnoreCase("gif")){
         try {

            OutputStream output = new BufferedOutputStream(
                   new FileOutputStream(fileName));

            GIFOutputStream.writeGIF(output, (Image) image_to_save,
            GIFOutputStream.STANDARD_256_COLORS);


         } catch (Exception e) {
            e.printStackTrace();
         }
      }

      //Save as JPEG Format
      else {
         try {

            ImageIO.write(image_to_save,prefix,new File(fileName));

         } catch (IOException e) {
            e.printStackTrace();
         }
      }
   }

   /*
    * @Purpose: main method of Deliverable_3
    */
   public static void main( String[] args )
   {

      String file_name;

      //check arguments
      int len=args.length;
      if (len != 2){
         System.out.println("Usage:");
         System.out.println("The program need two arguments to run.");
         System.out.println("--- 1st argument is a PDF file name to process");
         System.out.println("--- 2nd argument is the image type that you want to get");
         System.out.println("\t\t" + " tiff for TIFF image format.");
         System.out.println("\t\t" + " png for PNG image format.");
         System.out.println("\t\t" + " gif for GIF image format.");
         System.out.println("\t\t" + " jpg for JPEG image format. + " +
                              "(JPG is default if anything else is entered)");
         System.exit(1);
      }
      else {

         //get input file
         file_name = args[0];

         String img_type=args[1];

         if(img_type.equalsIgnoreCase("tif")|img_type.equalsIgnoreCase("tiff"))
            prefix = "tif";

         else if(img_type.equalsIgnoreCase("png"))
            prefix = "png";

         else if(img_type.equalsIgnoreCase("gif"))
            prefix = "gif";

         else
            prefix = "jpg";

         //check file exists
         File pdf_file = new File( file_name );

         if(!file_name.toLowerCase().endsWith(".pdf")){
            System.out.println( "File " + file_name + " is not PDF file" );
            System.exit(1);
         }
         // if file exists, open and get number of pages
         if( pdf_file.exists() == false )
         {
            System.out.println( "File " + file_name + " not found" );
         }
         else {
            System.out.println("Start reading: " + file_name);
            Deliverable_3 img = new Deliverable_3( file_name );
         }
      }
   }
}