/* ************************************************************************ * * lib11sht.h, v1.0 * * Fuzzy comparison library: public API * * * * Copyright (C) 2025 by Ruben Carlo Benante * * GNU GPL version 2 or later. * * ************************************************************************ */ #ifndef LIB11SHT_H #define LIB11SHT_H #include /* size_t */ #define LEVN_SBUFF 256 /* recommended size for s1/s2 buffers */ /* ************************************************************************ * * ERROR CONVENTION (read before using sequal / sequal_full) * * ************************************************************************ * * * sequal() and sequal_full() return -1 / 0 / +1 for the three comparison * outcomes (ab). On error (NULL inputs, zero * buffer size), they set errno = EINVAL and return 0. * * The return value 0 is shared by "match" and "error". To distinguish, * callers MUST reset errno = 0 before the call and check it after: * * errno = 0; * int r = sequal(a, b, 0.85f); * if(errno == EINVAL) { ... error handling ... } * else if(r == 0) { ... match ... } * else { ... ab ... } * * Forgetting the errno=0 reset means errno from a previous failed call * (anywhere in the program) could be mistaken for a sequal error. This * follows the same pattern as strtol(), but be deliberate about it. * * ************************************************************************ */ /* Compare similarity between two strings (after asciify + trim + lowercase). * Symmetric in shape with fequal(a, b, delta). * Returns: * 0 if equal or similar above the shold threshold * -1 if a < b alphabetically (after normalization) * +1 if a > b alphabetically (after normalization) * On error: sets errno = EINVAL and returns 0 — see ERROR CONVENTION above. * * Parameters: * a, b input strings (NUL-terminated, may contain UTF-8 accented Latin chars). * Strings longer than LEVN_SBUFF-1 chars are silently truncated to * their first LEVN_SBUFF-1 chars for comparison. * shold similarity threshold 0.0..1.0; matches above this count as equal. * Use 1.0 for strict mode (no fuzzy fallback, only exact-after-normalize). */ int sequal(char *a, char *b, float shold); /* Full variant of sequal: same comparison but also returns diagnostics. * Used by callers that need the computed ratio or the normalized strings * (e.g. cmp11sht CLI's -o / -n flags). * * Extra parameters: * ratio out: Levenshtein similarity 0.0..1.0 (1.0 on exact-after-normalize) * s1, s2 out: caller-provided buffers filled with the normalized inputs * s1_size size of s1 in bytes (writes capped at s1_size-1 + final NUL) * s2_size size of s2 in bytes (writes capped at s2_size-1 + final NUL) * * TRUNCATION SEMANTICS: when an input is longer than its buffer, only the * leading (s_size-1)-bytes-after-normalization participate in the * comparison. The Levenshtein ratio in *ratio is computed on the * normalized contents of s1 / s2 (i.e. on the possibly-truncated buffer * data), NOT on the original a / b strings. To compare without truncation, * pass buffers at least as large as the longest input — LEVN_SBUFF (256) * is the recommended floor. * * On error: same convention as sequal — sets errno = EINVAL and returns 0; * *ratio, s1, s2 are not modified in that case. See ERROR CONVENTION above. */ int sequal_full(char *a, char *b, float shold, float *ratio, char *s1, size_t s1_size, char *s2, size_t s2_size); /* Compare two floats within ±delta. * Returns: * 0 if |a - b| <= delta * -1 if a < b - delta * +1 if a > b + delta */ int fequal(float a, float b, float delta); /* String trim: removes leading + trailing whitespace (including UTF-8 * NBSP bytes 0xC2 / 0xA0) AND collapses internal runs of whitespace * to a single space. Modifies s in place. Caller's buffer must already * be NUL-terminated. */ void trim(char *s); #endif /* LIB11SHT_H */