devdaily home | apple | java | perl | unix | directory | blog

What this is

This file is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Other links

The source code

/* Created by bgalbs on Jan 30, 2003 at 11:45:25 PM */
package com.swabunga.spell.engine;

import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.security.InvalidParameterException;
import java.util.*;

/**
 * Container for various methods that any SpellDictionary will use.
 * Based on the original Jazzy aspell port.
 * 

* * */ public abstract class SpellDictionaryASpell implements SpellDictionary { /** The reference to a Transformator, used to transform a word into it's phonetic code. */ protected Transformator tf; public SpellDictionaryASpell(File phonetic) throws IOException { if (phonetic == null) tf = new DoubleMeta(); else tf = new GenericTransformator(phonetic); } public SpellDictionaryASpell(File phonetic, String encoding) throws IOException { if (phonetic == null) tf = new DoubleMeta(); else tf = new GenericTransformator(phonetic, encoding); } public SpellDictionaryASpell(Reader phonetic) throws IOException { if (phonetic == null) tf = new DoubleMeta(); else tf = new GenericTransformator(phonetic); } /** * Returns a list of Word objects that are the suggestions to an * incorrect word. *

* @param word Suggestions for given mispelt word * @param threshold The lower boundary of similarity to mispelt word * @return Vector a List of suggestions */ public List getSuggestions(String word, int threshold) { Hashtable nearmisscodes = new Hashtable(); String code = getCode(word); // add all words that have the same phonetics nearmisscodes.put(code, code); Vector phoneticList = getWordsFromCode(word, nearmisscodes); // do some tranformations to pick up more results //interchange nearmisscodes = new Hashtable(); char[] charArray = word.toCharArray(); for (int i = 0; i < word.length() - 1; i++) { char a = charArray[i]; char b = charArray[i + 1]; charArray[i] = b; charArray[i + 1] = a; String s = getCode(new String(charArray)); nearmisscodes.put(s, s); charArray[i] = a; charArray[i + 1] = b; } char[] replacelist = tf.getReplaceList(); //change charArray = word.toCharArray(); for (int i = 0; i < word.length(); i++) { char original = charArray[i]; for (int j = 0; j < replacelist.length; j++) { charArray[i] = replacelist[j]; String s = getCode(new String(charArray)); nearmisscodes.put(s, s); } charArray[i] = original; } //add charArray = (word += " ").toCharArray(); int iy = charArray.length - 1; while (true) { for (int j = 0; j < replacelist.length; j++) { charArray[iy] = replacelist[j]; String s = getCode(new String(charArray)); nearmisscodes.put(s, s); } if (iy == 0) break; charArray[iy] = charArray[iy - 1]; --iy; } //delete word = word.trim(); charArray = word.toCharArray(); char[] charArray2 = new char[charArray.length - 1]; for (int ix = 0; ix < charArray2.length; ix++) { charArray2[ix] = charArray[ix]; } char a, b; a = charArray[charArray.length - 1]; int ii = charArray2.length; while (true) { String s = getCode(new String(charArray)); nearmisscodes.put(s, s); if (ii == 0) break; b = a; a = charArray2[ii - 1]; charArray2[ii - 1] = b; --ii; } nearmisscodes.remove(code); //already accounted for in phoneticList Vector wordlist = getWordsFromCode(word, nearmisscodes); if (wordlist.size() == 0 && phoneticList.size() == 0) addBestGuess(word, phoneticList); // We sort a Vector at the end instead of maintaining a // continously sorted TreeSet because everytime you add a collection // to a treeset it has to be resorted. It's better to do this operation // once at the end. Collections.sort(phoneticList, new Word()); //always sort phonetic matches along the top Collections.sort(wordlist, new Word()); //the non-phonetic matches can be listed below phoneticList.addAll(wordlist); return phoneticList; } /** * When we don't come up with any suggestions (probably because the threshold was too strict), * then pick the best guesses from the those words that have the same phonetic code. * @param word - the word we are trying spell correct * @param wordList - the linked list that will get the best guess */ private void addBestGuess(String word, Vector wordList) { if (wordList.size() != 0) throw new InvalidParameterException("the wordList vector must be empty"); int bestScore = Integer.MAX_VALUE; String code = getCode(word); List simwordlist = getWords(code); LinkedList candidates = new LinkedList(); for (Iterator j = simwordlist.iterator(); j.hasNext();) { String similar = (String) j.next(); int distance = EditDistance.getDistance(word, similar); if (distance <= bestScore) { bestScore = distance; Word goodGuess = new Word(similar, distance); candidates.add(goodGuess); } } //now, only pull out the guesses that had the best score for (Iterator iter = candidates.iterator(); iter.hasNext();) { Word candidate = (Word) iter.next(); if (candidate.getCost() == bestScore) wordList.add(candidate); } } private Vector getWordsFromCode(String word, Hashtable codes) { Configuration config = Configuration.getConfiguration(); Vector result = new Vector(); final int configDistance = config.getInteger(Configuration.SPELL_THRESHOLD); for (Enumeration i = codes.keys(); i.hasMoreElements();) { String code = (String) i.nextElement(); List simwordlist = getWords(code); for (Iterator iter = simwordlist.iterator(); iter.hasNext();) { String similar = (String) iter.next(); int distance = EditDistance.getDistance(word, similar); if (distance < configDistance) { Word w = new Word(similar, distance); result.addElement(w); } } } return result; } /** * Returns the phonetic code representing the word. */ public String getCode(String word) { return tf.transform(word); } /** * Returns a list of words that have the same phonetic code. */ protected abstract List getWords(String phoneticCode); /** * Returns true if the word is correctly spelled against the current word list. */ public boolean isCorrect(String word) { List possible = getWords(getCode(word)); if (possible.contains(word)) return true; //JMH should we always try the lowercase version. If I dont then capitalised //words are always returned as incorrect. else if (possible.contains(word.toLowerCase())) return true; return false; } }




Copyright 1998-2008 Alvin Alexander
All Rights Reserved.
 
devdaily.com is based in louisville, kentucky, and this web site is hosted by godaddy.com