1% This file is part of the Attempto Parsing Engine (APE). 2% Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch). 3% 4% The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it 5% under the terms of the GNU Lesser General Public License as published by the Free Software 6% Foundation, either version 3 of the License, or (at your option) any later version. 7% 8% The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT 9% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 10% PURPOSE. See the GNU Lesser General Public License for more details. 11% 12% You should have received a copy of the GNU Lesser General Public License along with the Attempto 13% Parsing Engine (APE). If not, see http://www.gnu.org/licenses/. 14 15 16%--------------------------------------------------------------------------------------------------- 17% 18% The program corrects misspellings that are the result of: 19% - transposition of two letters 20% - one letter extra 21% - one letter missing 22% - one letter wrong 23% 24% Input: 25% spelling_corrector([yolandee,is,kooking,a,diner,fro,tri,persons],Candidates,NoSolutions). 26% 27% 28% Output (first solution): 29% Candidates = [yolandee-yolande, kooking-cooking, diner-dinner, fro-for], 30% NoSolutions = [tri] 31% 32% 33% Output (all solutions): 34% Candidates = [yolandee-[yolande], kooking-[cooking, looking], 35% diner-[dinner, dinar], fro-[for]], 36% NoSolutions = [tri] 37% 38%--------------------------------------------------------------------------------------------------- 39 40:- module(spellcheck, [ 41 spelling_corrector/3, 42 damerau_rules/3 43 ]). 44 45:- use_module(is_in_lexicon). 46 47 48spelling_corrector(TokenList,Candidates,NoSolutions) :- 49 lexicon_lookup(TokenList,MissingTokens), 50 damerau_rules(MissingTokens,Candidates,NoSolutions). 51 52% ------------------------------------------------------------------------------ 53% lexicon_lookup/2 collects all tokens that are not in the lexicon. 54% ------------------------------------------------------------------------------ 55 56lexicon_lookup([],[]). 57 58lexicon_lookup([Token|Tokens],MissingTokens) :- 59 lexicon(Token), 60 lexicon_lookup(Tokens,MissingTokens). 61 62lexicon_lookup([Token|Tokens],[Token|MissingTokens]) :- 63 lexicon_lookup(Tokens,MissingTokens). 64 65% ------------------------------------------------------------------------------ 66% First solution: 67% 68% For every missing token the first candidate (Token-NewToken) is generated 69% on account of the four Damerau rules. If the token is written completely 70% wrong so that no Damerau rule comes into question then the token is given 71% back in the argument NoSolutions. 72% ------------------------------------------------------------------------------ 73 74damerau_rules([],[],[]). 75 76damerau_rules([Token|Tokens],[Token-NewToken|Candidates],NoSolutions) :- 77 convert_token_char(Token,CharList), 78 ( 79 transposition(CharList,NewCharList) 80 ; 81 one_letter_extra(CharList,NewCharList) 82 ; 83 one_letter_missing(CharList,NewCharList) 84 ; 85 one_letter_wrong(CharList,NewCharList) 86 ), 87 convert_token_char(NewToken,NewCharList), 88 lexicon(NewToken), 89 damerau_rules(Tokens,Candidates,NoSolutions). 90 91damerau_rules([Token|Tokens],Candidates,[Token|NoSolutions]) :- 92 damerau_rules(Tokens,Candidates,NoSolutions). 93 94% ------------------------------------------------------------------------------ 95% convert_token_char/2 converts either a token into a list of characters or 96% vice versa. 97% ------------------------------------------------------------------------------ 98 99convert_token_char(Token,CharList) :- 100 var(CharList), 101 atom_codes(Token,AsciiList), 102 convert_ascii_char(AsciiList,CharList). 103 104convert_token_char(Token,CharList) :- 105 var(Token), 106 convert_ascii_char(AsciiList,CharList), 107 atom_codes(Token,AsciiList). 108 109 110convert_ascii_char([],[]). 111 112convert_ascii_char([Ascii|Asciis],[Char|Chars]) :- 113 atom_codes(Char,[Ascii]), 114 convert_ascii_char(Asciis,Chars). 115 116% ------------------------------------------------------------------------------ 117% transposition/2 transposes two letters. 118% ------------------------------------------------------------------------------ 119 120transposition([Char1,Char2|Chars],[Char2,Char1|Chars]). 121 122transposition([Char|Chars1],[Char|Chars2]) :- 123 transposition(Chars1,Chars2). 124 125% ------------------------------------------------------------------------------ 126% one_letter_extra/2 deletes one letter. 127% ------------------------------------------------------------------------------ 128 129one_letter_extra([_|Chars],Chars). 130 131one_letter_extra([Char|Chars1],[Char|Chars2]) :- 132 one_letter_extra(Chars1,Chars2). 133 134% ------------------------------------------------------------------------------ 135% one_letter_missing/2 adds one letter. 136% ------------------------------------------------------------------------------ 137 138one_letter_missing(Chars,[Char|Chars]) :- 139 propose_letter(Char). 140 141one_letter_missing([Char|Chars1],[Char|Chars2]) :- 142 one_letter_missing(Chars1,Chars2). 143 144% ------------------------------------------------------------------------------ 145% one_letter_wrong/2 exchanges one letter. 146% ------------------------------------------------------------------------------ 147 148one_letter_wrong([_|Chars],[Char2|Chars]) :- 149 propose_letter(Char2). 150 151one_letter_wrong([Char|Chars1],[Char|Chars2]) :- 152 one_letter_wrong(Chars1,Chars2). 153 154% ------------------------------------------------------------------------------ 155% propose_letter/1 provides a new letter. 156% ------------------------------------------------------------------------------ 157 158/* 159propose_letter(Char) :- 160 member(Char,[a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z]). 161*/ 162 163propose_letter(a). 164propose_letter(b). 165propose_letter(c). 166propose_letter(d). 167propose_letter(e). 168propose_letter(f). 169propose_letter(g). 170propose_letter(h). 171propose_letter(i). 172propose_letter(j). 173propose_letter(k). 174propose_letter(l). 175propose_letter(m). 176propose_letter(n). 177propose_letter(o). 178propose_letter(p). 179propose_letter(q). 180propose_letter(r). 181propose_letter(s). 182propose_letter(t). 183propose_letter(u). 184propose_letter(v). 185propose_letter(w). 186propose_letter(x). 187propose_letter(y). 188propose_letter(z). 189 190 191% ------------------------------------------------------------------------------ 192% Lexicon 193% ------------------------------------------------------------------------------ 194 195lexicon(Word) :- 196 is_in_lexicon:is_contentword(Word)