2026-05-22 19:40:51 -03:00
|
|
|
/* ************************************************************************ *
|
|
|
|
|
* lib11sht.h, v1.0 *
|
|
|
|
|
* Fuzzy comparison library: public API *
|
|
|
|
|
* *
|
|
|
|
|
* Copyright (C) 2025 by Ruben Carlo Benante <rcb@beco.cc> *
|
|
|
|
|
* GNU GPL version 2 or later. *
|
|
|
|
|
* ************************************************************************ */
|
|
|
|
|
|
|
|
|
|
#ifndef LIB11SHT_H
|
|
|
|
|
#define LIB11SHT_H
|
|
|
|
|
|
2026-05-22 20:28:13 -03:00
|
|
|
#include <stddef.h> /* size_t */
|
|
|
|
|
|
|
|
|
|
#define LEVN_SBUFF 256 /* recommended size for s1/s2 buffers */
|
2026-05-22 19:40:51 -03:00
|
|
|
|
2026-05-23 07:14:51 -03:00
|
|
|
/* ************************************************************************ *
|
|
|
|
|
* ERROR CONVENTION (read before using sequal / sequal_full) *
|
|
|
|
|
* ************************************************************************ *
|
|
|
|
|
*
|
|
|
|
|
* sequal() and sequal_full() return -1 / 0 / +1 for the three comparison
|
|
|
|
|
* outcomes (a<b / equal-or-similar / a>b). On error (NULL inputs, zero
|
|
|
|
|
* buffer size), they set errno = EINVAL and return 0.
|
|
|
|
|
*
|
|
|
|
|
* The return value 0 is shared by "match" and "error". To distinguish,
|
|
|
|
|
* callers MUST reset errno = 0 before the call and check it after:
|
|
|
|
|
*
|
|
|
|
|
* errno = 0;
|
|
|
|
|
* int r = sequal(a, b, 0.85f);
|
|
|
|
|
* if(errno == EINVAL) { ... error handling ... }
|
|
|
|
|
* else if(r == 0) { ... match ... }
|
|
|
|
|
* else { ... a<b or a>b ... }
|
|
|
|
|
*
|
|
|
|
|
* Forgetting the errno=0 reset means errno from a previous failed call
|
|
|
|
|
* (anywhere in the program) could be mistaken for a sequal error. This
|
|
|
|
|
* follows the same pattern as strtol(), but be deliberate about it.
|
|
|
|
|
*
|
|
|
|
|
* ************************************************************************ */
|
|
|
|
|
|
2026-05-22 19:40:51 -03:00
|
|
|
/* Compare similarity between two strings (after asciify + trim + lowercase).
|
2026-05-22 20:00:47 -03:00
|
|
|
* Symmetric in shape with fequal(a, b, delta).
|
2026-05-22 19:40:51 -03:00
|
|
|
* Returns:
|
2026-05-23 07:14:51 -03:00
|
|
|
* 0 if equal or similar above the shold threshold
|
2026-05-22 19:40:51 -03:00
|
|
|
* -1 if a < b alphabetically (after normalization)
|
|
|
|
|
* +1 if a > b alphabetically (after normalization)
|
2026-05-23 07:14:51 -03:00
|
|
|
* On error: sets errno = EINVAL and returns 0 — see ERROR CONVENTION above.
|
2026-05-22 19:40:51 -03:00
|
|
|
*
|
|
|
|
|
* Parameters:
|
2026-05-23 07:14:51 -03:00
|
|
|
* a, b input strings (NUL-terminated, may contain UTF-8 accented Latin chars).
|
|
|
|
|
* Strings longer than LEVN_SBUFF-1 chars are silently truncated to
|
|
|
|
|
* their first LEVN_SBUFF-1 chars for comparison.
|
|
|
|
|
* shold similarity threshold 0.0..1.0; matches above this count as equal.
|
|
|
|
|
* Use 1.0 for strict mode (no fuzzy fallback, only exact-after-normalize).
|
2026-05-22 19:40:51 -03:00
|
|
|
*/
|
2026-05-22 20:00:47 -03:00
|
|
|
int sequal(char *a, char *b, float shold);
|
|
|
|
|
|
|
|
|
|
/* Full variant of sequal: same comparison but also returns diagnostics.
|
|
|
|
|
* Used by callers that need the computed ratio or the normalized strings
|
|
|
|
|
* (e.g. cmp11sht CLI's -o / -n flags).
|
|
|
|
|
*
|
|
|
|
|
* Extra parameters:
|
2026-05-22 20:28:13 -03:00
|
|
|
* ratio out: Levenshtein similarity 0.0..1.0 (1.0 on exact-after-normalize)
|
|
|
|
|
* s1, s2 out: caller-provided buffers filled with the normalized inputs
|
|
|
|
|
* s1_size size of s1 in bytes (writes capped at s1_size-1 + final NUL)
|
|
|
|
|
* s2_size size of s2 in bytes (writes capped at s2_size-1 + final NUL)
|
2026-05-23 07:14:51 -03:00
|
|
|
*
|
|
|
|
|
* TRUNCATION SEMANTICS: when an input is longer than its buffer, only the
|
|
|
|
|
* leading (s_size-1)-bytes-after-normalization participate in the
|
|
|
|
|
* comparison. The Levenshtein ratio in *ratio is computed on the
|
|
|
|
|
* normalized contents of s1 / s2 (i.e. on the possibly-truncated buffer
|
|
|
|
|
* data), NOT on the original a / b strings. To compare without truncation,
|
|
|
|
|
* pass buffers at least as large as the longest input — LEVN_SBUFF (256)
|
|
|
|
|
* is the recommended floor.
|
|
|
|
|
*
|
|
|
|
|
* On error: same convention as sequal — sets errno = EINVAL and returns 0;
|
|
|
|
|
* *ratio, s1, s2 are not modified in that case. See ERROR CONVENTION above.
|
2026-05-22 20:00:47 -03:00
|
|
|
*/
|
2026-05-22 20:28:13 -03:00
|
|
|
int sequal_full(char *a, char *b, float shold, float *ratio,
|
|
|
|
|
char *s1, size_t s1_size,
|
|
|
|
|
char *s2, size_t s2_size);
|
2026-05-22 19:40:51 -03:00
|
|
|
|
|
|
|
|
/* Compare two floats within ±delta.
|
|
|
|
|
* Returns:
|
|
|
|
|
* 0 if |a - b| <= delta
|
|
|
|
|
* -1 if a < b - delta
|
|
|
|
|
* +1 if a > b + delta
|
|
|
|
|
*/
|
|
|
|
|
int fequal(float a, float b, float delta);
|
|
|
|
|
|
2026-05-22 22:08:21 -03:00
|
|
|
/* String trim: removes leading + trailing whitespace (including UTF-8
|
|
|
|
|
* NBSP bytes 0xC2 / 0xA0) AND collapses internal runs of whitespace
|
|
|
|
|
* to a single space. Modifies s in place. Caller's buffer must already
|
|
|
|
|
* be NUL-terminated. */
|
|
|
|
|
void trim(char *s);
|
|
|
|
|
|
2026-05-22 19:40:51 -03:00
|
|
|
#endif /* LIB11SHT_H */
|