| iMatix home page | << | < | > | >> |
SFL Version 2.11 |
#include "sflstr.h" int strmatch ( const char *string1, const char *string2)
Calculates a similarity index for the two strings. This is a value from 0 to 32767 with higher values indicating a closer match. The two strings are compared without regard for case. The algorithm was designed by Leif Svalgaard leif@ibm.net.
{ static int name_weight [30] = { 20, 15, 13, 11, 10, 9, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; int comp_index, name_index, start_of_string, longest_so_far, substring_contribution, substring_length, compare_length, longest_length, length_difference, name_length, char_index, similarity_index, similarity_weight; char cur_name_char; ASSERT (string1); ASSERT (string2); name_length = strlen (string1); compare_length = strlen (string2); if (name_length > compare_length) { length_difference = name_length - compare_length; longest_length = name_length; } else { length_difference = compare_length - name_length; longest_length = compare_length; } if (compare_length) { similarity_weight = 0; substring_contribution = 0; for (char_index = 0; char_index < name_length; char_index++) { start_of_string = char_index; cur_name_char = (char) tolower (string1 [char_index]); longest_so_far = 0; comp_index = 0; while (comp_index < compare_length) { while ((comp_index < compare_length) && (tolower (string2 [comp_index]) != cur_name_char)) comp_index++; substring_length = 0; name_index = start_of_string; while ((comp_index < compare_length) && (tolower (string2 [comp_index]) == tolower (string1 [name_index]))) { if (comp_index == name_index) substring_contribution++; comp_index++; if (name_index < name_length) { name_index++; substring_length++; } } substring_contribution += (substring_length + 1) * 3; if (longest_so_far < substring_length) longest_so_far = substring_length; } similarity_weight += (substring_contribution + longest_so_far + 1) * 2; similarity_weight /= name_length + 1; } similarity_index = (name_length < 30? name_weight [name_length]: 3) * longest_length; similarity_index /= 10; similarity_index += 2 * length_difference / longest_length; similarity_index = 100 * similarity_weight / similarity_index; } else similarity_index = 0; return (similarity_index); }
| << | < | > | >> | Copyright © 1996-2000 iMatix Corporation |