better comments on lib11sht API

This commit is contained in:
2026-05-23 07:14:51 -03:00
parent d48c8c1c36
commit e9d00dc619

View File

@@ -13,18 +13,43 @@
#define LEVN_SBUFF 256 /* recommended size for s1/s2 buffers */ #define LEVN_SBUFF 256 /* recommended size for s1/s2 buffers */
/* ************************************************************************ *
* ERROR CONVENTION (read before using sequal / sequal_full) *
* ************************************************************************ *
*
* sequal() and sequal_full() return -1 / 0 / +1 for the three comparison
* outcomes (a<b / equal-or-similar / a>b). On error (NULL inputs, zero
* buffer size), they set errno = EINVAL and return 0.
*
* The return value 0 is shared by "match" and "error". To distinguish,
* callers MUST reset errno = 0 before the call and check it after:
*
* errno = 0;
* int r = sequal(a, b, 0.85f);
* if(errno == EINVAL) { ... error handling ... }
* else if(r == 0) { ... match ... }
* else { ... a<b or a>b ... }
*
* Forgetting the errno=0 reset means errno from a previous failed call
* (anywhere in the program) could be mistaken for a sequal error. This
* follows the same pattern as strtol(), but be deliberate about it.
*
* ************************************************************************ */
/* Compare similarity between two strings (after asciify + trim + lowercase). /* Compare similarity between two strings (after asciify + trim + lowercase).
* Symmetric in shape with fequal(a, b, delta). * Symmetric in shape with fequal(a, b, delta).
* Returns: * Returns:
* 0 if equal or similar above the lratio threshold * 0 if equal or similar above the shold threshold
* -1 if a < b alphabetically (after normalization) * -1 if a < b alphabetically (after normalization)
* +1 if a > b alphabetically (after normalization) * +1 if a > b alphabetically (after normalization)
* On error: sets errno = EINVAL and returns 0; comparison result is undefined. * On error: sets errno = EINVAL and returns 0 — see ERROR CONVENTION above.
* Caller must reset errno = 0 before the call to detect errors.
* *
* Parameters: * Parameters:
* a, b input strings (NUL-terminated, may contain UTF-8 accented Latin chars) * a, b input strings (NUL-terminated, may contain UTF-8 accented Latin chars).
* shold Levenshtein similarity threshold 0.0..1.0; matches above this count as equal * Strings longer than LEVN_SBUFF-1 chars are silently truncated to
* their first LEVN_SBUFF-1 chars for comparison.
* shold similarity threshold 0.0..1.0; matches above this count as equal.
* Use 1.0 for strict mode (no fuzzy fallback, only exact-after-normalize).
*/ */
int sequal(char *a, char *b, float shold); int sequal(char *a, char *b, float shold);
@@ -37,6 +62,17 @@ int sequal(char *a, char *b, float shold);
* s1, s2 out: caller-provided buffers filled with the normalized inputs * s1, s2 out: caller-provided buffers filled with the normalized inputs
* s1_size size of s1 in bytes (writes capped at s1_size-1 + final NUL) * s1_size size of s1 in bytes (writes capped at s1_size-1 + final NUL)
* s2_size size of s2 in bytes (writes capped at s2_size-1 + final NUL) * s2_size size of s2 in bytes (writes capped at s2_size-1 + final NUL)
*
* TRUNCATION SEMANTICS: when an input is longer than its buffer, only the
* leading (s_size-1)-bytes-after-normalization participate in the
* comparison. The Levenshtein ratio in *ratio is computed on the
* normalized contents of s1 / s2 (i.e. on the possibly-truncated buffer
* data), NOT on the original a / b strings. To compare without truncation,
* pass buffers at least as large as the longest input — LEVN_SBUFF (256)
* is the recommended floor.
*
* On error: same convention as sequal — sets errno = EINVAL and returns 0;
* *ratio, s1, s2 are not modified in that case. See ERROR CONVENTION above.
*/ */
int sequal_full(char *a, char *b, float shold, float *ratio, int sequal_full(char *a, char *b, float shold, float *ratio,
char *s1, size_t s1_size, char *s1, size_t s1_size,