Listing 1: Java code for CobolConverter.java

import java.io.*;
import org.apache.oro.text.awk.*;
import org.apache.oro.text.regex.MalformedPatternException;
import org.apache.oro.text.regex.Pattern;

public class CobolConverter
{
	static PrintWriter outputfile = null;

  	public static void main (String[] args) {

		String irecord = null; String recordTrim = null;
		AwkCompiler compiler = new AwkCompiler();
		AwkMatcher matcher = new AwkMatcher();
		Pattern pattern;
		int level=0, lastLevel=0;
		String[] line=null;
		int[] complexTypes= new int[100]; 
		try {
	BufferedReader buffr = new BufferedReader(new FileReader(new File(args[0])));
	Outputfile = new PrintWriter(new BufferedWriter(new FileWriter(new File(args[1]))));
			outputfile.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
			outputfile.println("<schema xmlns=\"http://www.w3.org/2001/XMLSchema\"");
	outputfile.println("targetNamespace=\http://www.ibm.com\"" +
			"xmlns:router=\"http://www.ibm.com\">");

			while ( (irecord=buffr.readLine()) != null) {
				pattern = compiler.compile("(\\sPIC)|(\\sVALUE)|(^ *$)|(\\sCOPY\\s)");
				recordTrim = irecord.trim();

				// records without PIC map to complexType attributes in the XSD
				// there is some logic also to close </ types opened before with
				 levels lower
				 // if I find a level n I need to close the levels n+1 opened before that
				  are 
				  // nested with a complexType opened. To do that I keep the levels in an
				   array  
				   // with an index for each level and
				   // the content equal to the actual open complexTypes
				 if ((!matcher.contains(irecord,pattern)) && (irecord.length()>0)){
				 	line = recordTrim.split("\\s+|\\.+");
					level = Integer.parseInt(line[0]);
					if (level<lastLevel) close(complexTypes,level);
					outputfile.println("<element
					name=\""+line[1]+"\"><complexType><sequence>");
					lastLevel = level; complexTypes[level]=complexTypes[level]+1;
				} // if not PIC...

		// records with PIC map to elements the name is the name of the field andthere
		// is a restriction based in the format X for string 9 for decimal and a 
		// length 
		// according to the field length. The split function allows to split based in 
		// spaces or dots or parenthesis that spares many instructions
				pattern = compiler.compile("(\\sPIC)");
				if ((matcher.contains(irecord,pattern)) && (irecord.length()>0)){
					line = recordTrim.split("\\s+|\\.+|\\(|\\)");
					level = Integer.parseInt(line[0]);
					if (level<lastLevel) close(complexTypes,level); 
					// close open levels
					if (line[3].equals("X")) // case alphanumeric
					outputfile.println("<element name=\""+line[1]+"\"><simpleType>"
					 + "<restriction base=\"string\"><length " +
					 "value=\""+line[4]+"\"></length></restriction>
					 </simpleType></element>");
					if (line[3].equals("9")) // case numeric
			outputfile.println(
			"<element
			name=\""+line[1]+"\"><simpleType><restriction " +
			"base=\"positiveInteger\"><totalDigits " +
		"value=\""+line[4]+"\"></totalDigits></restriction>
		</simpleType></element>");
					lastLevel = level;
				} // if PIC...

			} // while...
			close(complexTypes,-1);
			outputfile.println("</schema>");
    		outputfile.close(); // close output file
		} catch (MalformedPatternException e) {
			System.out.println("IOException processing file.."+ e.toString()+"\n");
		} catch (Exception e) {
			System.out.println("IOException processing file.."+ e.toString()+"\n");
		}
	} // main...

	/*
	 *	close -- close all the previously open xml schema tags  with a level bigger than
	 *  the actual level
	 */
	private static void close(int[] complexTypes, int level) {
   		for (int i=0; i<complexTypes.length; i++) {
   			if ((i >= level) && (complexTypes[i] > 0)) {
   				outputfile.println("</sequence></complexType></element>");
   			 	complexTypes[i]--;
   			} // if
   		} // for...
  	} // close...


} // class


Listing 2: Cobol copybook

01 COURSES.
	02 COURSE-ID.
		03 COURSE-TYPE		PIC X(3).
		03	COURSE-NUMBER	PIC 9(5).
  	02 COURSE-NAME			PIC X(20).
	02 COURSE-DATE.
		03 COURSE-DAY		PIC 9(2).
		03 COURSE-MONTH	PIC 9(2).
		03 COURSE-YEAR 	PIC 9(4).
		03 COURSE-HOUR		PIC 9(2).
	02	COURSE-DETAILS		PIC X(30).


Listing 3: XML Schema

<?xml version="1.0" encoding="UTF-8"?>
<schema xmlns="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://www.ibm.com" xmlns:router="http://www.ibm.com">
<element name="COURSES"><complexType><sequence>
	<element name="COURSE-ID"><complexType><sequence>
		<element name="COURSE-TYPE"><simpleType><restriction base="string">
		<length value="3"></length></restriction></simpleType></element>
		<element name="COURSE-NUMBER"><simpleType>
		<restriction base="positiveInteger"><totalDigits value="5">
		</totalDigits></restriction></simpleType></element>
	</sequence></complexType></element>
	<element name="COURSE-NAME"><simpleType><restriction base="string">
	<length value="20"></length></restriction></simpleType></element>
	<element name="COURSE-DATE"><complexType><sequence>
		<element name="COURSE-DAY"><simpleType><restriction
		 base="positiveInteger">
		<totalDigits
		 value="2"></totalDigits></restriction></simpleType></element>
		<element name="COURSE-MONTH"><simpleType>
		<restriction base="positiveInteger"><totalDigits value="2">
		</totalDigits></restriction></simpleType></element>
		<element name="COURSE-YEAR"><simpleType><restriction
		 base="positiveInteger">
		<totalDigits
		 value="4"></totalDigits></restriction></simpleType></element>
		<element name="COURSE-HOUR"><simpleType><restriction
		 base="positiveInteger">
		<totalDigits
		 value="2"></totalDigits></restriction></simpleType></element>
	</sequence></complexType></element>
	<element name="COURSE-DETAILS"><simpleType><restriction base="string">
	<length value="30"></length></restriction></simpleType></element>
</sequence></complexType></element>
</schema>