CSpell

Application: CSpellFiles

I. Objective

To use CSpell APIs to correct spelling errors from text in a set of files in a directory.

In NLP applications, we often want to correct spelling errors in the pre-process before further core process (such as name entity recognition, concept mapping, question answering, etc.). CSpell API can be implemented in Java for this purpose. This example illustrates how to use CSpell APIs to correct spelling errors in the text of files in a directory.

II. Pre-Requirements
install cSpell.${YEAR} package to "/Projects/cSpell${YEAR}"

III. Source Code

package cSpellExample;

import java.util.*;
import java.io.*;
import java.nio.file.*;
import java.nio.charset.*;

import gov.nih.nlm.nls.cSpell.Util.*;
import gov.nih.nlm.nls.cSpell.Api.*;
/*****************************************************************************
* This class is to use CSpellApi to process all files in a directory.
*
* @author chlu
*
* @version    V-2018
*****************************************************************************/
public class CSpellFiles
{
	// test driver
	public static void main(String[] args)
	{
		String configFile = "../../data/Config/cSpell.properties";
		String inDir = "../data/inData/";
		String outDir = "../data/outData/";
		int funcMode = CSpellApi.FUNC_MODE_RW_A;	
		int rankMode = CSpellApi.RANK_MODE_CSPELL;	

		if(args.length == 5)
		{
			configFile = args[0];
			inDir = args[1];
			outDir = args[2];
			funcMode = Integer.parseInt(args[3]);
			rankMode = Integer.parseInt(args[4]);
		}
		else if(args.length > 0)
		{
			System.out.println("Usage: java CSpell <configFile> <inDir> <outFir> <funcMode> <rankMode>");
			System.exit(0);
		}
		
		// init
		String matchPattern = "*\\.txt";
		ArrayList inFileList = FileDir.GetFilesInADirectoryToList(
			inDir, matchPattern);

		CSpellApi cSpellApi = new CSpellApi(configFile);
		System.out.println("----- Sample Code: CSpellFiles( ) -----");
		System.out.println("- inFile Dir: [" + inDir + "]"); 
		System.out.println("- outFile Dir: [" + outDir + "]"); 

		// test
		String curFileName = new String();
		try
		{
			// go through all files
			for(String inFile:inFileList)
			{
				curFileName = inFile;
				String outFile = outDir + inFile;
				BufferedWriter outWriter = Files.newBufferedWriter(
					Paths.get(outFile), Charset.forName("UTF-8"));

				String inFileAbs = inDir + inFile;
				String inText = FileIo.GetStrFromFileAddNewLineAtTheEnd(
					inFileAbs);
				// process inFile	
				String outText = cSpellApi.ProcessToStr(inText, funcMode, 
					rankMode);

				// print out
				outWriter.write(outText);
				outWriter.close();
				cSpellApi.Close();
			}
		}
		catch(Exception e)
		{
			System.err.println("**ERR@CSpellFiles( ): [" + curFileName
				+ "], exception: " + e.toString());
		}
	}
}

IV. Compile

shell>javac -classpath ../lib/cSpell2018dist.jar CSpellFiles.java

V. Run & Results

shell>java -classpath ./:../lib/cSpell2018dist.jar:/Projects/cSpell2018/ CSpellFiles