commit a23b1d785a1b0db82e2ae25e21be39b09eb19e66 Author: Ruben Carlo Benante Date: Fri May 22 19:02:03 2026 -0300 cmp11sht.c 1st commit diff --git a/cmp11sht.c b/cmp11sht.c new file mode 100644 index 0000000..a76cd39 --- /dev/null +++ b/cmp11sht.c @@ -0,0 +1,364 @@ +/* ************************************************************************ * + * cmp11sht.c, v20251221.085434 * + * A fuzzy comparisson between values (floats or strings) * + * * + * Copyright (C) 2025 by Ruben Carlo Benante * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * To contact the author, please write to: * + * Ruben Carlo Benante * + * Webpage: http://www.beco.cc * + * Phone: +55 (81) 3184-7555 * + * ************************************************************************ * + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#define SBUFF 256 /* Max string size */ + +/* fuzzy comparisson */ +int fequal(float a, float b, float delta); /* compare equallity of two float numbers */ +int sequal(char *a, char *b, float thr, float *ratio, char *s1, char *s2); /* compare equality of two strings */ + +/* auxiliary functions */ +int uselesschar(int c); /* check if it is a useless char */ +void trim(char *s); /* remove trailing spaces and tabs */ +void asciify(const char *src, char *dest); /* remove accents */ +int ulen(unsigned char c); /* lenght in chars, not bytes, of a multibyte UTF8 string */ +float shit11(char *s1, char *s2); /* levenshtein similarity */ +float fmin3(float a, float b, float c); /* return the minimum */ +float fmax2(float a, float b); /* return the maximum */ + +/* ---------------------------------------------------------------------- */ +/* get two objets a and b (float or string) and a delta (float) + * return -1 if a < b, within range delta + * return 0 if a ~ b, within range delta + * return +1 if a > b, within range delta + * return -2 if an error occurred + */ +int main(int argc, char **argv) +{ + int res; /* comparisson results */ + char *fenda, *fendb, *fendd; + char s1[SBUFF], s2[SBUFF]; + float a, b, delta, ratio; + int opt=0; /* -no-option:0, -v:1, -o:2, -n:3 */ + + if(argc!=4 && argc!=5) + { + printf( +"cmp11sht - Fuzzy compare strings or floats within range\n\n\ +Usage:\n\t$ cmp11sht -h\n\ +\t$ cmp11sht o1 o2 delta [-v|-o|-n]\n\n\ +After getting two objets o1 and o2 (float or string)\n\ +and a FLT_MIN < delta < 1.0 (float), where:\n\ +- For strings, delta is the Levenshtein ratio\n\ +- For floats, delta is the precision\n\ +the comparisson will:\n\n\ +* return 0 if a ~ b, within range delta\n\ +* return +1 if a > b, within range delta\n\ +* return +2 if a < b, within range delta\n\ +* return +3 if an error occurred\n\n\ +Options:\n\t-h Print this help\n\ +\t-v Print result to stdout (default is system err)\n\ +\t-o Print also the Levenshtein ratio or float difference\n\ +\t-n Print also the normalized strings or floats used for comparisson\n\n\ +cmp11sht v20251221.0718 (C) 2025 by Ruben C. Benante (MIT Lic)\n\n" + ); + return 3; + } + + a=strtof(argv[1], &fenda); + b=strtof(argv[2], &fendb); + delta=strtof(argv[3], &fendd); + + if(argc==5) + switch(argv[4][1]) + { + case 'v': opt=1; break; + case 'o': opt=2; break; + case 'n': opt=3; break; + default: return 3; + } + + if(fendd == argv[3]) // error, need a threshold + { + if(opt) printf("%d\n", 3); + return 3; + } + if(delta <= FLT_MIN) // near zero delta not accepted + { + if(opt) printf("%d\n", 3); + return 3; + } + if(delta >= 1.0) // maximum precision 1.0 + { + if(opt) printf("%d\n", 3); + return 3; + } + + if(fenda == argv[1] || fendb == argv[2]) // string + { + if(opt==3) printf("cmp11sht: string\n"); + res = sequal(argv[1], argv[2], delta, &ratio, s1, s2); + if(opt==3) printf("result: "); + if(opt) printf("%d\n", res); + if(opt==3) printf("ratio: "); + if(opt>=2) printf("%f\n", ratio); + if(opt==3) printf("s1: %s\ns2: %s\n", s1, s2); + } + else // float + { + if(opt==3) printf("cmp11sht: float\n"); + res = fequal(a, b, delta); + if(opt==3) printf("result: "); + if(opt) printf("%d\n", res); + if(opt==3) printf("diff: "); + if(opt>=2) printf("%f\n", fabs(a - b)); + if(opt==3) printf("f1: %E\nf2: %E\n", a, b); + } + + return res; +} + +/* ---------------------------------------------------------------------- */ +/* compare equallity of two float numbers within an error margin delta. + * Return 0 if equal within the error margin, + * -1 if a < b-delta and + * +1 if a > b+delta + */ +int fequal(float a, float b, float delta) +{ + if(a < b - delta) + return 2; + if(a > b + delta) + return 1; + + /* b-delta <= a <= b+delta */ + return 0; +} + +/* ---------------------------------------------------------------------- */ +/* compare similarity between two strings + * Return 0 if similar above given threshold + * -1 if a < b alphabetically + * +1 if a > b alphabetically + */ +int sequal(char *a, char *b, float thr, float *ratio, char *s1, char *s2) +{ + int i; + + // remove accents + asciify(a, s1); + asciify(b, s2); + + // trim spaces + trim(s1); + trim(s2); + + // lowercase + for(i=0; i thr) + return 0; + return (i < 0)? 2 : 1; +} + +/* ---------------------------------------------------------------------- */ +float shit11(char *s1, char *s2) +{ + int len1, len2; + int i, j, cost; + + len1 = strlen(s1); + len2 = strlen(s2); + + if(len1 == 0 || len2 == 0) + return 0.0; + + int d[len1+1][len2+1]; + + for(i=0; i<=len1; i++) + d[i][0] = i; + for(j=0; j<=len2; j++) + d[0][j] = j; + + for(i=1; i <= len1; i++) + for(j=1; j <= len2; j++) + { + cost = (s1[i-1] == s2[j-1])? 0 : 1; + d[i][j] = fmin3(d[i-1][j] + 1, d[i][j-1] + 1, d[i-1][j-1] + cost); + } + return 1.0 - d[len1][len2] / fmax2(len1, len2); +} + +/* ---------------------------------------------------------------------- */ +/* return the minimum */ +float fmin3(float a, float b, float c) +{ + float m=a; + if(bb)? a : b; +} + +/* ---------------------------------------------------------------------- */ +/* check if it is a useless char */ +int uselesschar(int c) +{ + if(c == ' ') return 1; + if(c == '\t') return 1; + if(c == '\n') return 1; + if(c == '\r') return 1; + if(c == 0xC2) return 1; + if(c == 0xA0) return 1; + return 0; +} + +/* ---------------------------------------------------------------------- */ +/* remove leading/trailing spaces and tabs */ +void trim(char *s) +{ + int len, i, j, fin; + + if(!s) + return; + /* remove leading spaces */ + i = 0; + while(uselesschar(s[i])) + i++; + if(i > 0) + memmove(s, s + i, strlen(s + i) + 1); + /* remove trailing spaces */ + len = strlen(s); + while(len > 0 && uselesschar(s[len - 1])) + s[--len] = '\0'; + /* remove double spaces in between */ + i=j=0; + while(s[i] != '\0') + { + if(uselesschar(s[i])) + { + if(!fin) + { + s[j++] = ' '; + fin = 1; + } + } + else + { + s[j++] = s[i]; + fin = 0; + } + i++; + } + s[j] = '\0'; +} + +int ulen(unsigned char c) +{ + if((c & 0xE0) == 0xC0) return 2; /* UTF8 lead 2 bytes 110xxxxx */ + if((c & 0xF0) == 0xE0) return 3; /* UTF8 lead 3 bytes 1110xxxx */ + if((c & 0xF8) == 0xF0) return 4; /* UTF8 lead 4 bytes 11110xxx */ + return 1; /* ASCII or invalid or UTF continution byte */ +} + +void asciify(const char *src, char *dest) +{ + int len, i, k, j, found; + char ch[5]; // UTF8 multibyte char + const char transclear[] = + "AEIOUAEIOUAEIOUAEIOUAEIOU" + "aeiouaeiouaeiouaeiouaeiou" + "aoCcNn123"; + const char *translit[] = { + "Á","É","Í","Ó","Ú", "À","È","Ì","Ò","Ù", + "Ã","Ẽ","Ĩ","Õ","Ũ", "Â","Ê","Î","Ô","Û", + "Ä","Ë","Ï","Ö","Ü", "á","é","í","ó","ú", + "à","è","ì","ò","ù", "ã","ẽ","ĩ","õ","ũ", + "â","ê","î","ô","û", "ä","ë","ï","ö","ü", + "ª","º","Ç","ç","Ñ", "ñ","¹","²","³"}; + + if(!src || !dest) + return; + if(*src=='\0') + return; + + j=i=0; + while(src[i]!='\0' && i < SBUFF) + { + if(((unsigned char)src[i] & 0xc0) == 0x80) // non ASCII, UTF continuation char + { + i++; + continue; + } + + len = ulen((unsigned char)src[i]); + memcpy(ch, &src[i], len); + ch[len]='\0'; + if(((unsigned char)src[i]) < 0x80) /* ASCII */ + { + dest[j++] = ch[0]; + i++; + continue; + } + + found = 0; + for(k=0; k<(int)(sizeof(translit)/sizeof(translit[0])); k++) + if(strcmp(ch, translit[k])==0) + { + dest[j++] = transclear[k]; + found = 1; + break; + } + if(!found) + dest[j++] = '?'; + i+=len; + } + dest[j]='\0'; +} + +/* ---------------------------------------------------------------------- */ +/* vi: set ai et ts=4 sw=4 tw=0 wm=0 fo=croql : C config for Vim modeline */ +/* Template by Dr. Beco Version 20160612.142044 */ +