devdaily home | apple | java | perl | unix | directory | blog

What this is

This file is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Other links

The source code

package com.swabunga.spell.engine;



import java.io.*;

import java.util.*;



/**

 * Another implementation of SpellDictionary that doesn't cache any words in memory. Avoids the huge

 * footprint of SpellDictionaryHashMap at the cost of relatively minor latency. A future version

 * of this class that implements some caching strategies might be a good idea in the future, if there's any

 * demand for it.

 *

 * This implementation requires a special dictionary file, with "code*word" lines sorted by code.

 * It's using a dichotomy algorithm to search for words in the dictionary

 *

 * @author Damien Guillaume

 * @version 0.1

 */

public class SpellDictionaryDichoDisk extends SpellDictionaryASpell {



  /** Holds the dictionary file for reading*/

  private RandomAccessFile dictFile = null;

  

  /** dictionary and phonetic file encoding */

  private String encoding = null;



  /**

   * Dictionary Convienence Constructor.

   */

  public SpellDictionaryDichoDisk(File wordList)

    throws FileNotFoundException, IOException {

    super((File) null);

    dictFile = new RandomAccessFile(wordList, "r");

  }



  /**

   * Dictionary Convienence Constructor.

   */

  public SpellDictionaryDichoDisk(File wordList, String encoding)

    throws FileNotFoundException, IOException {

    super((File) null);

    this.encoding = encoding;

    dictFile = new RandomAccessFile(wordList, "r");

  }



  /**

  * Dictionary constructor that uses an aspell phonetic file to

  * build the transformation table.

  */

  public SpellDictionaryDichoDisk(File wordList, File phonetic)

    throws FileNotFoundException, IOException {

    super(phonetic);

    dictFile = new RandomAccessFile(wordList, "r");

  }

  

  /**

  * Dictionary constructor that uses an aspell phonetic file to

  * build the transformation table.

  */

  public SpellDictionaryDichoDisk(File wordList, File phonetic, String encoding)

    throws FileNotFoundException, IOException {

    super(phonetic, encoding);

    this.encoding = encoding;

    dictFile = new RandomAccessFile(wordList, "r");

  }

  

  /**

   * Add a word permanantly to the dictionary (and the dictionary file).

   * not implemented !

   */

  public void addWord(String word) {

    System.err.println("error: addWord is not implemented for SpellDictionaryDichoDisk");

  }



  /**

    * Search the dictionary file for the words corresponding to the code

    * within positions p1 - p2

    */

   private LinkedList dichoFind(String code, long p1, long p2) throws IOException {

     //System.out.println("dichoFind("+code+","+p1+","+p2+")");

     long pm = (p1 + p2) / 2;

    dictFile.seek(pm);

    String l;

    if (encoding == null)

      l = dictFile.readLine();

    else

      l = dictReadLine();

    pm = dictFile.getFilePointer();

    if (encoding == null)

      l = dictFile.readLine();

    else

      l = dictReadLine();

    long pm2 = dictFile.getFilePointer();

    if (pm2 >= p2)

      return(seqFind(code, p1, p2));

    int istar = l.indexOf('*');

    if (istar == -1)

      throw new IOException("bad format: no * !");

    String testcode = l.substring(0, istar);

    int comp = code.compareTo(testcode);

    if (comp < 0)

      return(dichoFind(code, p1, pm-1));

    else if (comp > 0)

      return(dichoFind(code, pm2, p2));

    else {

      LinkedList l1 = dichoFind(code, p1, pm-1);

      LinkedList l2 = dichoFind(code, pm2, p2);

      String word = l.substring(istar+1);

      l1.add(word);

      l1.addAll(l2);

      return(l1);

    }

   }

   

   private LinkedList seqFind(String code, long p1, long p2) throws IOException {

     //System.out.println("seqFind("+code+","+p1+","+p2+")");

     LinkedList list = new LinkedList();

    dictFile.seek(p1);

    while (dictFile.getFilePointer() < p2) {

      String l;

      if (encoding == null)

        l = dictFile.readLine();

      else

        l = dictReadLine();

      int istar = l.indexOf('*');

      if (istar == -1)

        throw new IOException("bad format: no * !");

      String testcode = l.substring(0, istar);

      if (testcode.equals(code)) {

        String word = l.substring(istar+1);

        list.add(word);

      }

    }

    return(list);

   }

   

   /**

     * Read a line of dictFile with a specific encoding

     */

   private String dictReadLine() throws IOException {

     int max = 255;

     byte b=0;

    byte[] buf = new byte[max];

    int i=0;

     try {

       for (; b != '\n' && b != '\r' && i




Copyright 1998-2008 Alvin Alexander
All Rights Reserved.
 
devdaily.com is based in louisville, kentucky, and this web site is hosted by godaddy.com