Share this page 

Convert HTML to PDF using YAHP Tag(s): IO OpenSource


As seen in this HowTo, iText can be used to transform an HTML document to PDF. The result is good with simple HTML but if you get fancy then the result is not so good.

For better result, the Open Source package YAHP (Yet another Html to Pdf converter) is a good choice. YAHP is based on iText, FlyingSaucer and JTidy.

import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;


// http://www.allcolor.org/YaHPConverter/
import org.allcolor.yahp.converter.CYaHPConverter;
import org.allcolor.yahp.converter.IHtmlToPdfTransformer;

public class HtmlToPdf_yahp {
  public  static void main(String ... args ) throws Exception {
    htmlToPdfFile();
  }

  public static void htmlToPdfFile() throws Exception {
    CYaHPConverter converter = new CYaHPConverter();
    File fout = new File("c:/temp/x.pdf");
    FileOutputStream out = new FileOutputStream(fout);
    Map properties = new HashMap();
    List headerFooterList = new ArrayList();

    String str = "<HTML><HEAD></HEAD><BODY><H1>Testing</H1><FORM>" +
                 "check : <INPUT TYPE='checkbox' checked=checked/><br/>"   +
                 "</FORM></BODY></HTML>";

    properties.put(IHtmlToPdfTransformer.PDF_RENDERER_CLASS,
                   IHtmlToPdfTransformer.FLYINGSAUCER_PDF_RENDERER);
    //properties.put(IHtmlToPdfTransformer.FOP_TTF_FONT_PATH, fontPath);
    converter.convertToPdf(str,
          IHtmlToPdfTransformer.A4P,
          headerFooterList,
          "file:///temp/", // root for relative external CSS and IMAGE
          out,
          properties);
    out.flush();
    out.close();
  }
}
In the next example, we read an existing HTML file and convert it to a PDF file.
import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;


// http://www.allcolor.org/YaHPConverter/
import org.allcolor.yahp.converter.CYaHPConverter;
import org.allcolor.yahp.converter.IHtmlToPdfTransformer;

public class HtmlToPdf_yahp_2 {
  public  static void main(String ... args ) throws Exception {
    String root = "c:/temp/html";
    String input = "file_1659686.htm";  // need to be charset utf-8
    htmlToPdfFile(new File(root, input),
                  new File(root, input + ".pdf"));
    System.out.println("Done");
  }

  public static void htmlToPdfFile(File htmlIn, File pdfOut) throws Exception {
    Scanner scanner =
       new Scanner(htmlIn).useDelimiter("\\Z");
    String htmlContents = scanner.next();

    CYaHPConverter converter = new CYaHPConverter();
    FileOutputStream out = new FileOutputStream(pdfOut);
    Map properties = new HashMap();
    List headerFooterList = new ArrayList();

    properties.put(IHtmlToPdfTransformer.PDF_RENDERER_CLASS,
                   IHtmlToPdfTransformer.FLYINGSAUCER_PDF_RENDERER);
    //properties.put(IHtmlToPdfTransformer.FOP_TTF_FONT_PATH, fontPath);
    converter.convertToPdf(htmlContents,
                IHtmlToPdfTransformer.A4P,
                headerFooterList,
                "file:///temp/html/",
                out,
                properties);
    out.flush();
    out.close();
  }
}
NOTE : After downloading YAHP (and its dependencies), you still need to build a YAHP.JAR, it's done easily with Eclipse... but you can get it here.