// $Id: spell.h,v 1.2 1999/02/25 18:11:37 shields Exp $ copyright notice #ifndef spell_INCLUDED #define spell_INCLUDED #include "config.h" #include "case.h" class Spell { static inline int Min(int x, int y) { return (x < y ? x : y); } public: static int Index(wchar_t *str1, wchar_t *str2) { int len1 = wcslen(str1), len2 = wcslen(str2); wchar_t *s1 = new wchar_t[len1 + 1], *s2 = new wchar_t[len2 + 1]; for (int i = 0; i < len1; i++) s1[i] = Case::ToAsciiLower(str1[i]); s1[len1] = U_NULL; for (int j = 0; j < len2; j++) s2[j] = Case::ToAsciiLower(str2[j]); s2[len2] = U_NULL; if (len1 == 1 && len2 == 1) { // // Singleton mispellings: // // ; <----> , // // ; <----> : // // . <----> , // // ' <----> " // if ((s1[0] == U_SEMICOLON && s2[0] == U_COMMA) || (s1[0] == U_COMMA && s2[0] == U_SEMICOLON) || (s1[0] == U_SEMICOLON && s2[0] == U_COLON) || (s1[0] == U_COLON && s2[0] == U_SEMICOLON) || (s1[0] == U_DOT && s2[0] == U_COMMA) || (s1[0] == U_COMMA && s2[0] == U_DOT) || (s1[0] == U_SINGLE_QUOTE && s2[0] == U_DOUBLE_QUOTE) || (s1[0] == U_DOUBLE_QUOTE && s2[0] == U_SINGLE_QUOTE)) return 3; } // // Scan the two strings. Increment "match" count for each match. // When a transposition is encountered, increase "match" count // by two but count it as an error. When a typo is found, skip // it and count it as an error. Otherwise we have a mismatch; if // one of the strings is longer, increment its index, otherwise, // increment both indices and continue. // // This algorithm is an adaptation of a boolean misspelling // algorithm proposed by Juergen Uhl. // int count = 0, prefix_length = 0, num_errors = 0, i1 = 0, i2 = 0; while ((i1 < len1) && (i2 < len2)) { if (s1[i1] == s2[i2]) { count++; i1++; i2++; if (num_errors == 0) prefix_length++; } else if (s1[i1 + 1] == s2[i2] && s1[i1] == s2[i2 + 1]) { count += 2; i1 += 2; i2 += 2; num_errors++; } else if (s1[i1 + 1] == s2[i2 + 1]) { i1++; i2++; num_errors++; } else { if ((len1 - i1) > (len2 - i2)) i1++; else if ((len2 - i2) > (len1 - i1)) i2++; else { i1++; i2++; } num_errors++; } } if (i1 < len1 || i2 < len2) num_errors++; if (num_errors > (Min(len1, len2) / 6 + 1)) count = prefix_length; delete [] s1; delete [] s2; return (count * 10 / (len1 + num_errors)); } }; #endif // #ifndef spell_INCLUDED