lib11sht/lib11sht.h

/* ************************************************************************ *
 *    lib11sht.h, v1.0                                                      *
 *    Fuzzy comparison library: public API                                  *
 *                                                                          *
 *    Copyright (C) 2025 by Ruben Carlo Benante <rcb@beco.cc>               *
 *    GNU GPL version 2 or later.                                           *
 * ************************************************************************ */

#ifndef LIB11SHT_H
#define LIB11SHT_H

#include <stddef.h>  /* size_t */

#define LEVN_SBUFF 256  /* recommended size for s1/s2 buffers */

/* ************************************************************************ *
 * ERROR CONVENTION (read before using sequal / sequal_full)                *
 * ************************************************************************ *
 *
 * sequal() and sequal_full() return -1 / 0 / +1 for the three comparison
 * outcomes (a<b / equal-or-similar / a>b). On error (NULL inputs, zero
 * buffer size), they set errno = EINVAL and return 0.
 *
 * The return value 0 is shared by "match" and "error". To distinguish,
 * callers MUST reset errno = 0 before the call and check it after:
 *
 *     errno = 0;
 *     int r = sequal(a, b, 0.85f);
 *     if(errno == EINVAL) { ... error handling ... }
 *     else if(r == 0)     { ... match ... }
 *     else                { ... a<b or a>b ... }
 *
 * Forgetting the errno=0 reset means errno from a previous failed call
 * (anywhere in the program) could be mistaken for a sequal error. This
 * follows the same pattern as strtol(), but be deliberate about it.
 *
 * ************************************************************************ */

/* Compare similarity between two strings (after asciify + trim + lowercase).
 * Symmetric in shape with fequal(a, b, delta).
 * Returns:
 *      0  if equal or similar above the shold threshold
 *     -1  if a < b alphabetically (after normalization)
 *     +1  if a > b alphabetically (after normalization)
 * On error: sets errno = EINVAL and returns 0 -- see ERROR CONVENTION above.
 *
 * Parameters:
 *   a, b   input strings (NUL-terminated, may contain UTF-8 accented Latin chars).
 *          Strings longer than LEVN_SBUFF-1 chars are silently truncated to
 *          their first LEVN_SBUFF-1 chars for comparison.
 *   shold  similarity threshold 0.0..1.0; matches above this count as equal.
 *          Use 1.0 for strict mode (no fuzzy fallback, only exact-after-normalize).
 */
int sequal(char *a, char *b, float shold);

/* Full variant of sequal: same comparison but also returns diagnostics.
 * Used by callers that need the computed ratio or the normalized strings
 * (e.g. cmp11sht CLI's -o / -n flags).
 *
 * Extra parameters:
 *   ratio    out: Levenshtein similarity 0.0..1.0 (1.0 on exact-after-normalize)
 *   s1, s2   out: caller-provided buffers filled with the normalized inputs
 *   s1_size  size of s1 in bytes (writes capped at s1_size-1 + final NUL)
 *   s2_size  size of s2 in bytes (writes capped at s2_size-1 + final NUL)
 *
 * TRUNCATION SEMANTICS: when an input is longer than its buffer, only the
 * leading (s_size-1)-bytes-after-normalization participate in the
 * comparison. The Levenshtein ratio in *ratio is computed on the
 * normalized contents of s1 / s2 (i.e. on the possibly-truncated buffer
 * data), NOT on the original a / b strings. To compare without truncation,
 * pass buffers at least as large as the longest input -- LEVN_SBUFF (256)
 * is the recommended floor.
 *
 * On error: same convention as sequal -- sets errno = EINVAL and returns 0;
 * *ratio, s1, s2 are not modified in that case. See ERROR CONVENTION above.
 */
int sequal_full(char *a, char *b, float shold, float *ratio,
                char *s1, size_t s1_size,
                char *s2, size_t s2_size);

/* Compare two floats within +/-delta.
 * Returns:
 *      0  if |a - b| <= delta
 *     -1  if a < b - delta
 *     +1  if a > b + delta
 */
int fequal(float a, float b, float delta);

/* String trim: removes leading + trailing whitespace (including UTF-8
 * NBSP bytes 0xC2 / 0xA0) AND collapses internal runs of whitespace
 * to a single space. Modifies s in place. Caller's buffer must already
 * be NUL-terminated. */
void trim(char *s);

#endif /* LIB11SHT_H */