Convert a flat file to XML (DOM)Tag(s): XML
Consider the following data file (data.txt):
Java|http://www.rgagnon/javahowto.htm PowerBuilder|http://www.rgagnon/pbhowto.htm Javascript|http://www.rgagnon/jshowto.htm VBScript|http://www.rgagnon/vbshowto.htm
<?xml version="1.0" encoding="ISO-8859-1"?> <HOWTOS> <TOPIC> <TITLE>Java</TITLE> <URL>http://www.rgagnon/javahowto.htm</URL> </TOPIC> <TOPIC> <TITLE>PowerBuilder</TITLE> <URL>http://www.rgagnon/pbhowto.htm</URL> </TOPIC> <TOPIC> <TITLE>Javascript</TITLE> <URL>http://www.rgagnon/jshowto.htm</URL> </TOPIC> <TOPIC> <TITLE>VBScript</TITLE> <URL>http://www.rgagnon/vbshowto.htm</URL> </TOPIC> </HOWTOS>
NOTE: Since DOM constructs the XML tree in memory, it may be more appropriate to use SAX instead if you have to deal with big data files.
import java.io.*; import org.w3c.dom.*; import javax.xml.parsers.*; import javax.xml.transform.*; import javax.xml.transform.stream.*; import javax.xml.transform.dom.*; public class ToXML { BufferedReader in; StreamResult out; Document xmldoc; Element root; public static void main (String args[]) { new ToXML().doit(); } public void doit () { try{ in = new BufferedReader(new FileReader("data.txt")); out = new StreamResult("data.xml"); initXML(); String str; while ((str = in.readLine()) != null) { process(str); } in.close(); writeXML(); } catch (Exception e) { e.printStackTrace(); } } public void initXML() throws ParserConfigurationException{ // JAXP + DOM DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); DOMImplementation impl = builder.getDOMImplementation(); xmldoc = impl.createDocument(null, "HOWTOS", null); root = xmldoc.getDocumentElement(); } public void process(String s) { // Since the separator character "|" has special meaning // with regular expression, we need to escape it. String [] elements = s.split("\\|"); Element e0 = xmldoc.createElement("TOPIC"); Element e1 = xmldoc.createElement("TITLE"); Node n1 = xmldoc.createTextNode(elements[0]); e1.appendChild(n1); Element e2 = xmldoc.createElement("URL"); Node n2 = xmldoc.createTextNode(elements[1]); e2.appendChild(n2); e0.appendChild(e1); e0.appendChild(e2); root.appendChild(e0); } public void writeXML() throws TransformerConfigurationException, TransformerException { DOMSource domSource = new DOMSource(xmldoc); TransformerFactory tf = TransformerFactory.newInstance(); Transformer transformer = tf.newTransformer(); //transformer.setOutputProperty (OutputKeys.OMIT_XML_DECLARATION, "yes"); transformer.setOutputProperty(OutputKeys.METHOD, "xml"); transformer.setOutputProperty(OutputKeys.ENCODING,"ISO-8859-1"); // we want to pretty format the XML output // note : this is broken in jdk1.5 beta! transformer.setOutputProperty ("{http://xml.apache.org/xslt}indent-amount", "4"); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); // transformer.transform(domSource, out); /* get the XML in a String java.io.StringWriter sw = new java.io.StringWriter(); StreamResult sr = new StreamResult(sw); transformer.transform(domSource, sr); return sw.toString(); */ } }