|
Chris Pollett >
Students > [Bio] [Del 1] [Del 2] [Del 3] |
Deliverable_3 Extracting images in a PDF file and saving them into image formats that are specified by the user. The image formats that are supported in this program are TIFF, JPEG, PNG and GIFDescription: This program reads a PDF file. It then goes through the contents of each page and gets the image elements. For each image, it is saved in a file with user's specfified type format such as TIF, JPG, PNG, or GIF. In this project, I have learned more about PDF file structure, extracting objs in PDF and saving them as different image formats. Example:This is what my code outputs on these inputs.
The cover of "iText in Action" ebook was extracted from a PDF file and saved as JPG image.
A barcode was extracted from "iText in Action" ebook and saved as JPG image.
/*
* Project : Extracting images in PDF files (Deliverable #3)
* File Name : Deliverable_3.java
* Purpose : This program extracts images from a PDF file and save them in
* TIFF or PNG or JPEG that is specified by the user.
* Create : Nov 1, 2006
* Last Modified: Nov 14, 2006
* Java Version: 1.5.0_08
*/
package myclasses;
import java.awt.Image;
import java.awt.image.BufferedImage;
import java.io.*;
import javax.imageio.ImageIO;
import javax.media.jai.JAI;
import org.jpedal.PdfDecoder;
import org.jpedal.io.*;
import org.jpedal.objects.PdfImageData;
import com.sun.media.jai.codec.TIFFEncodeParam;
import org.shetline.io.*;
/**
* author: Long N Vuong
* reference: JPedal example by Mark Stephens
*
*/
public class Deliverable_3
{
/*the decoder object which decodes the pdf and returns a data object*/
PdfDecoder decode_pdf = null;
//type of image to save
private static String prefix = "jpg";
/**
* Constructor
* */
public Deliverable_3( String file_name )
{
decode(file_name);
}
/**
* @purpose: open and decode a pdf pages
* @param: file_name
* @return
*/
private void decode(String file_name){
long fileCounter = 0;
//get PdfDecoder
try
{
decode_pdf = new PdfDecoder( false );
//tell JPedal what we want it to extract
String opiFlag=System.getProperty("opi");
if(opiFlag==null)
decode_pdf.setExtractionMode(PdfDecoder.RAWIMAGES+PdfDecoder.FINALIMAGES);
else
decode_pdf.setExtractionMode(PdfDecoder.RAWIMAGES
+PdfDecoder.FINALIMAGES+PdfDecoder.XFORMMETADATA);
decode_pdf.openPdfFile( file_name );
}
catch( Exception e )
{
System.err.println( "Exception " + e + " in pdf code" );
}
//check if the pdf allows to extract info
if ((decode_pdf.isEncrypted()&&(!decode_pdf.isPasswordSupplied()))
&&(!decode_pdf.isExtractionAllowed())) {
System.out.println("Encrypted settings");
}else{
//Get number of pages in the PDF file
int first_page = 1, last_page =decode_pdf.getPageCount();
// extract data from pdf and then write out the images
try
{
for( int page = first_page;page < last_page + 1;page++ )
{
//decode the page
decode_pdf.decodePage( page );
//get the PdfImages object which now holds the images.
//the image name and other info in this object
PdfImageData pdf_images = decode_pdf.getPdfImageData();
//image count (note image 1 is item 0, so any loop runs 0 to count-1)
int image_count = pdf_images.getImageCount();
//work through and save each image
for( int i = 0;i < image_count;i++ )
{
String image_name = pdf_images.getImageName( i );
BufferedImage image_to_save;
System.out.println("Processing: image " + (i + 1) +
" of " + image_count + " on page " + page);
try{
//get raw version of image (R prefix for raw image)
image_to_save = decode_pdf.getObjectStore().loadStoredImage( "R"
+ image_name );
saveImage(image_to_save, image_name + fileCounter +"."+prefix,prefix);
//increase file output counter
fileCounter++;
}
catch( Exception e )
{
System.err.println( "Exception " + e + " in extracting images" );
}
}
//flush images in case we do more than 1 page so only contains
//images from current page
decode_pdf.flushObjectValues(true);
}
}
catch( Exception e )
{
decode_pdf.closePdfFile();
System.err.println( "Exception " + e.getMessage() );
}
}//end else
/**close the pdf file*/
decode_pdf.closePdfFile();
System.out.println("Closed PDF file");
}
/*
* @purpose : save an image to a specified picture file
* @param : image_to_save, fileName, prefix
*
*/
private void saveImage(BufferedImage image_to_save, String fileName,String prefix)
{
//Save as TIFF Format
if(prefix.equalsIgnoreCase("tif")|prefix.equalsIgnoreCase("tiff")){
try {
JAIHelper.confirmJAIOnClasspath();
FileOutputStream fileoutput = new FileOutputStream(fileName);
//get tiff compression
String tiffFlag=System.getProperty("compress_tiff");
boolean compressTiffs = tiffFlag!=null;
TIFFEncodeParam params = null;
if(compressTiffs){
params = new TIFFEncodeParam();
params.setCompression(TIFFEncodeParam.COMPRESSION_DEFLATE);
}
JAI.create("encode", image_to_save, fileoutput, "TIFF", params);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
//Save as PNG Format
else if (prefix.equalsIgnoreCase("png")){
try {
ImageIO.write(image_to_save,prefix,new File(fileName));
} catch (IOException e) {
e.printStackTrace();
}
}
//Save as GIF Format
else if (prefix.equalsIgnoreCase("gif")){
try {
OutputStream output = new BufferedOutputStream(
new FileOutputStream(fileName));
GIFOutputStream.writeGIF(output, (Image) image_to_save,
GIFOutputStream.STANDARD_256_COLORS);
} catch (Exception e) {
e.printStackTrace();
}
}
//Save as JPEG Format
else {
try {
ImageIO.write(image_to_save,prefix,new File(fileName));
} catch (IOException e) {
e.printStackTrace();
}
}
}
/*
* @Purpose: main method of Deliverable_3
*/
public static void main( String[] args )
{
String file_name;
//check arguments
int len=args.length;
if (len != 2){
System.out.println("Usage:");
System.out.println("The program need two arguments to run.");
System.out.println("--- 1st argument is a PDF file name to process");
System.out.println("--- 2nd argument is the image type that you want to get");
System.out.println("\t\t" + " tiff for TIFF image format.");
System.out.println("\t\t" + " png for PNG image format.");
System.out.println("\t\t" + " gif for GIF image format.");
System.out.println("\t\t" + " jpg for JPEG image format. + " +
"(JPG is default if anything else is entered)");
System.exit(1);
}
else {
//get input file
file_name = args[0];
String img_type=args[1];
if(img_type.equalsIgnoreCase("tif")|img_type.equalsIgnoreCase("tiff"))
prefix = "tif";
else if(img_type.equalsIgnoreCase("png"))
prefix = "png";
else if(img_type.equalsIgnoreCase("gif"))
prefix = "gif";
else
prefix = "jpg";
//check file exists
File pdf_file = new File( file_name );
if(!file_name.toLowerCase().endsWith(".pdf")){
System.out.println( "File " + file_name + " is not PDF file" );
System.exit(1);
}
// if file exists, open and get number of pages
if( pdf_file.exists() == false )
{
System.out.println( "File " + file_name + " not found" );
}
else {
System.out.println("Start reading: " + file_name);
Deliverable_3 img = new Deliverable_3( file_name );
}
}
}
}
|