/* ************************************************************************ * * cmp11sht.c, v20251221.085434 * * A fuzzy comparisson between values (floats or strings) * * * * Copyright (C) 2025 by Ruben Carlo Benante * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * * * To contact the author, please write to: * * Ruben Carlo Benante * * Webpage: http://www.beco.cc * * Phone: +55 (81) 3184-7555 * * ************************************************************************ * * */ #include #include #include #include #include #include #include #include #define SBUFF 256 /* Max string size */ /* fuzzy comparisson */ int fequal(float a, float b, float delta); /* compare equallity of two float numbers */ int sequal(char *a, char *b, float thr, float *ratio, char *s1, char *s2); /* compare equality of two strings */ /* auxiliary functions */ int uselesschar(int c); /* check if it is a useless char */ void trim(char *s); /* remove trailing spaces and tabs */ void asciify(const char *src, char *dest); /* remove accents */ int ulen(unsigned char c); /* lenght in chars, not bytes, of a multibyte UTF8 string */ float shit11(char *s1, char *s2); /* levenshtein similarity */ float fmin3(float a, float b, float c); /* return the minimum */ float fmax2(float a, float b); /* return the maximum */ /* ---------------------------------------------------------------------- */ /* get two objets a and b (float or string) and a delta (float) * return -1 if a < b, within range delta * return 0 if a ~ b, within range delta * return +1 if a > b, within range delta * return -2 if an error occurred */ int main(int argc, char **argv) { int res; /* comparisson results */ char *fenda, *fendb, *fendd; char s1[SBUFF], s2[SBUFF]; float a, b, delta, ratio; int opt=0; /* -no-option:0, -v:1, -o:2, -n:3 */ if(argc!=4 && argc!=5) { printf( "cmp11sht - Fuzzy compare strings or floats within range\n\n\ Usage:\n\t$ cmp11sht -h\n\ \t$ cmp11sht o1 o2 delta [-v|-o|-n]\n\n\ After getting two objets o1 and o2 (float or string)\n\ and a FLT_MIN < delta < 1.0 (float), where:\n\ - For strings, delta is the Levenshtein ratio\n\ - For floats, delta is the precision\n\ the comparisson will:\n\n\ * return 0 if a ~ b, within range delta\n\ * return +1 if a > b, within range delta\n\ * return +2 if a < b, within range delta\n\ * return +3 if an error occurred\n\n\ Options:\n\t-h Print this help\n\ \t-v Print result to stdout (default is system err)\n\ \t-o Print also the Levenshtein ratio or float difference\n\ \t-n Print also the normalized strings or floats used for comparisson\n\n\ cmp11sht v20251221.0718 (C) 2025 by Ruben C. Benante (MIT Lic)\n\n" ); return 3; } a=strtof(argv[1], &fenda); b=strtof(argv[2], &fendb); delta=strtof(argv[3], &fendd); if(argc==5) switch(argv[4][1]) { case 'v': opt=1; break; case 'o': opt=2; break; case 'n': opt=3; break; default: return 3; } if(fendd == argv[3]) // error, need a threshold { if(opt) printf("%d\n", 3); return 3; } if(delta <= FLT_MIN) // near zero delta not accepted { if(opt) printf("%d\n", 3); return 3; } if(delta >= 1.0) // maximum precision 1.0 { if(opt) printf("%d\n", 3); return 3; } if(fenda == argv[1] || fendb == argv[2]) // string { if(opt==3) printf("cmp11sht: string\n"); errno = 0; res = sequal(argv[1], argv[2], delta, &ratio, s1, s2); if(errno == EINVAL) res = 3; /* error → CLI exit 3 */ else if(res > 0) res = 1; /* a > b → CLI exit 1 */ else if(res < 0) res = 2; /* a < b → CLI exit 2 */ /* res == 0 stays 0 (equal) */ if(opt==3) printf("result: "); if(opt) printf("%d\n", res); if(opt==3) printf("ratio: "); if(opt>=2) printf("%f\n", ratio); if(opt==3) printf("s1: %s\ns2: %s\n", s1, s2); } else // float { if(opt==3) printf("cmp11sht: float\n"); res = fequal(a, b, delta); if(res > 0) res = 1; /* a > b → CLI exit 1 */ else if(res < 0) res = 2; /* a < b → CLI exit 2 */ /* res == 0 stays 0 (equal) */ if(opt==3) printf("result: "); if(opt) printf("%d\n", res); if(opt==3) printf("diff: "); if(opt>=2) printf("%f\n", fabs(a - b)); if(opt==3) printf("f1: %E\nf2: %E\n", a, b); } return res; } /* ---------------------------------------------------------------------- */ /* compare equallity of two float numbers within an error margin delta. * Return 0 if equal within the error margin, * -1 if a < b-delta and * +1 if a > b+delta */ int fequal(float a, float b, float delta) { if(a < b - delta) return -1; if(a > b + delta) return 1; /* b-delta <= a <= b+delta */ return 0; } /* ---------------------------------------------------------------------- */ /* compare similarity between two strings. * Return: * 0 if equal or similar above given threshold * -1 if a < b alphabetically (after normalization) * +1 if a > b alphabetically (after normalization) * On error: sets errno = EINVAL and returns 0; result is undefined. * Caller must reset errno = 0 before the call to detect errors. */ int sequal(char *a, char *b, float thr, float *ratio, char *s1, char *s2) { int i; if(!a || !b || !s1 || !s2 || !ratio) { errno = EINVAL; return 0; } // remove accents asciify(a, s1); asciify(b, s2); // trim spaces trim(s1); trim(s2); // lowercase for(i=0; i thr) return 0; return (i < 0)? -1 : 1; } /* ---------------------------------------------------------------------- */ float shit11(char *s1, char *s2) { int len1, len2; int i, j, cost; len1 = strlen(s1); len2 = strlen(s2); if(len1 == 0 || len2 == 0) return 0.0; int d[len1+1][len2+1]; for(i=0; i<=len1; i++) d[i][0] = i; for(j=0; j<=len2; j++) d[0][j] = j; for(i=1; i <= len1; i++) for(j=1; j <= len2; j++) { cost = (s1[i-1] == s2[j-1])? 0 : 1; d[i][j] = fmin3(d[i-1][j] + 1, d[i][j-1] + 1, d[i-1][j-1] + cost); } return 1.0 - d[len1][len2] / fmax2(len1, len2); } /* ---------------------------------------------------------------------- */ /* return the minimum */ float fmin3(float a, float b, float c) { float m=a; if(bb)? a : b; } /* ---------------------------------------------------------------------- */ /* check if it is a useless char */ int uselesschar(int c) { if(c == ' ') return 1; if(c == '\t') return 1; if(c == '\n') return 1; if(c == '\r') return 1; if(c == 0xC2) return 1; if(c == 0xA0) return 1; return 0; } /* ---------------------------------------------------------------------- */ /* remove leading/trailing spaces and tabs */ void trim(char *s) { int len, i, j, fin; if(!s) return; /* remove leading spaces */ i = 0; while(uselesschar(s[i])) i++; if(i > 0) memmove(s, s + i, strlen(s + i) + 1); /* remove trailing spaces */ len = strlen(s); while(len > 0 && uselesschar(s[len - 1])) s[--len] = '\0'; /* remove double spaces in between */ i=j=0; while(s[i] != '\0') { if(uselesschar(s[i])) { if(!fin) { s[j++] = ' '; fin = 1; } } else { s[j++] = s[i]; fin = 0; } i++; } s[j] = '\0'; } int ulen(unsigned char c) { if((c & 0xE0) == 0xC0) return 2; /* UTF8 lead 2 bytes 110xxxxx */ if((c & 0xF0) == 0xE0) return 3; /* UTF8 lead 3 bytes 1110xxxx */ if((c & 0xF8) == 0xF0) return 4; /* UTF8 lead 4 bytes 11110xxx */ return 1; /* ASCII or invalid or UTF continution byte */ } void asciify(const char *src, char *dest) { int len, i, k, j, found; char ch[5]; // UTF8 multibyte char const char transclear[] = "AEIOUAEIOUAEIOUAEIOUAEIOU" "aeiouaeiouaeiouaeiouaeiou" "aoCcNn123"; const char *translit[] = { "Á","É","Í","Ó","Ú", "À","È","Ì","Ò","Ù", "Ã","Ẽ","Ĩ","Õ","Ũ", "Â","Ê","Î","Ô","Û", "Ä","Ë","Ï","Ö","Ü", "á","é","í","ó","ú", "à","è","ì","ò","ù", "ã","ẽ","ĩ","õ","ũ", "â","ê","î","ô","û", "ä","ë","ï","ö","ü", "ª","º","Ç","ç","Ñ", "ñ","¹","²","³"}; if(!src || !dest) return; if(*src=='\0') return; j=i=0; while(src[i]!='\0' && i < SBUFF) { if(((unsigned char)src[i] & 0xc0) == 0x80) // non ASCII, UTF continuation char { i++; continue; } len = ulen((unsigned char)src[i]); memcpy(ch, &src[i], len); ch[len]='\0'; if(((unsigned char)src[i]) < 0x80) /* ASCII */ { dest[j++] = ch[0]; i++; continue; } found = 0; for(k=0; k<(int)(sizeof(translit)/sizeof(translit[0])); k++) if(strcmp(ch, translit[k])==0) { dest[j++] = transclear[k]; found = 1; break; } if(!found) dest[j++] = '?'; i+=len; } dest[j]='\0'; } /* ---------------------------------------------------------------------- */ /* vi: set ai et ts=4 sw=4 tw=0 wm=0 fo=croql : C config for Vim modeline */ /* Template by Dr. Beco Version 20160612.142044 */