better comments on lib11sht API

2026-05-23 07:14:51 -03:00
parent d48c8c1c36
commit e9d00dc619
1 changed files with 41 additions and 5 deletions
--- a/lib11sht.h
+++ b/lib11sht.h
@@ -13,18 +13,43 @@

 #define LEVN_SBUFF 256  /* recommended size for s1/s2 buffers */

+/* ************************************************************************ *
+ * ERROR CONVENTION (read before using sequal / sequal_full)                *
+ * ************************************************************************ *
+ *
+ * sequal() and sequal_full() return -1 / 0 / +1 for the three comparison
+ * outcomes (a<b / equal-or-similar / a>b). On error (NULL inputs, zero
+ * buffer size), they set errno = EINVAL and return 0.
+ *
+ * The return value 0 is shared by "match" and "error". To distinguish,
+ * callers MUST reset errno = 0 before the call and check it after:
+ *
+ *     errno = 0;
+ *     int r = sequal(a, b, 0.85f);
+ *     if(errno == EINVAL) { ... error handling ... }
+ *     else if(r == 0)     { ... match ... }
+ *     else                { ... a<b or a>b ... }
+ *
+ * Forgetting the errno=0 reset means errno from a previous failed call
+ * (anywhere in the program) could be mistaken for a sequal error. This
+ * follows the same pattern as strtol(), but be deliberate about it.
+ *
+ * ************************************************************************ */
+
 /* Compare similarity between two strings (after asciify + trim + lowercase).
 * Symmetric in shape with fequal(a, b, delta).
 * Returns:
- *      0  if equal or similar above the lratio threshold
+ *      0  if equal or similar above the shold threshold
 *     -1  if a < b alphabetically (after normalization)
 *     +1  if a > b alphabetically (after normalization)
- * On error: sets errno = EINVAL and returns 0; comparison result is undefined.
- * Caller must reset errno = 0 before the call to detect errors.
+ * On error: sets errno = EINVAL and returns 0 — see ERROR CONVENTION above.
 *
 * Parameters:
- *   a, b   input strings (NUL-terminated, may contain UTF-8 accented Latin chars)
- *   shold  Levenshtein similarity threshold 0.0..1.0; matches above this count as equal
+ *   a, b   input strings (NUL-terminated, may contain UTF-8 accented Latin chars).
+ *          Strings longer than LEVN_SBUFF-1 chars are silently truncated to
+ *          their first LEVN_SBUFF-1 chars for comparison.
+ *   shold  similarity threshold 0.0..1.0; matches above this count as equal.
+ *          Use 1.0 for strict mode (no fuzzy fallback, only exact-after-normalize).
 */
 int sequal(char *a, char *b, float shold);

@@ -37,6 +62,17 @@ int sequal(char *a, char *b, float shold);
 *   s1, s2   out: caller-provided buffers filled with the normalized inputs
 *   s1_size  size of s1 in bytes (writes capped at s1_size-1 + final NUL)
 *   s2_size  size of s2 in bytes (writes capped at s2_size-1 + final NUL)
+ *
+ * TRUNCATION SEMANTICS: when an input is longer than its buffer, only the
+ * leading (s_size-1)-bytes-after-normalization participate in the
+ * comparison. The Levenshtein ratio in *ratio is computed on the
+ * normalized contents of s1 / s2 (i.e. on the possibly-truncated buffer
+ * data), NOT on the original a / b strings. To compare without truncation,
+ * pass buffers at least as large as the longest input — LEVN_SBUFF (256)
+ * is the recommended floor.
+ *
+ * On error: same convention as sequal — sets errno = EINVAL and returns 0;
+ * *ratio, s1, s2 are not modified in that case. See ERROR CONVENTION above.
 */
 int sequal_full(char *a, char *b, float shold, float *ratio,
                char *s1, size_t s1_size,