lib11sht/test_lib11sht.c

/* Stress tests for lib11sht — aim to break upper / lower bounds.
 * Compile: gcc -Wall -Wextra -O2 test_lib11sht.c lib11sht.c -lm -o test_lib11sht
 * Run: ./test_lib11sht
 * Exit 0 if all tests pass, non-zero on first failure.
 */
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include "lib11sht.h"

static int fails = 0;

#define CHECK(cond, msg) do { \
    printf("  %s ... ", msg); \
    if(cond) { printf("PASS\n"); } else { printf("FAIL\n"); fails++; } \
} while(0)

int main(void)
{
    int r;
    float ratio;
    char s1[LEVN_SBUFF], s2[LEVN_SBUFF];

    /* ----- UPPER bound tests ----- */

    /* Test 1: pure ASCII input longer than LEVN_SBUFF. Should NOT crash,
     * should truncate cleanly. Build a 400-char string. */
    {
        char long_a[400], long_b[400];
        int i;
        for(i = 0; i < 399; i++) { long_a[i] = 'x'; long_b[i] = 'x'; }
        long_a[399] = long_b[399] = '\0';

        printf("Test 1: 400-char identical ASCII strings (>LEVN_SBUFF)\n");
        errno = 0;
        r = sequal(long_a, long_b, 0.85);
        CHECK(errno == 0 && r == 0, "no errno, returns 0 (equal-after-truncate)");
    }

    /* Test 2: one 400-char of 'x', other 400-char of 'y'. Truncated to 256
     * each. Should differ. */
    {
        char xs[400], ys[400];
        int i;
        for(i = 0; i < 399; i++) { xs[i] = 'x'; ys[i] = 'y'; }
        xs[399] = ys[399] = '\0';

        printf("Test 2: 400 'x' vs 400 'y' (all different, over-length)\n");
        errno = 0;
        r = sequal(xs, ys, 0.85);
        CHECK(errno == 0 && r != 0, "no errno, returns non-zero (different)");
    }

    /* Test 3: exact-boundary length: 255 chars + NUL = 256 = LEVN_SBUFF */
    {
        char a[LEVN_SBUFF], b[LEVN_SBUFF];
        int i;
        for(i = 0; i < LEVN_SBUFF - 1; i++) { a[i] = 'a'; b[i] = 'a'; }
        a[LEVN_SBUFF - 1] = b[LEVN_SBUFF - 1] = '\0';

        printf("Test 3: exactly LEVN_SBUFF-1 chars + NUL\n");
        errno = 0;
        r = sequal(a, b, 0.85);
        CHECK(errno == 0 && r == 0, "no errno, returns 0 (equal at boundary)");
    }

    /* Test 4: long input WITH accents — UTF-8 multi-byte at offset 250.
     * Should asciify each accent to ASCII, total visible chars < 256. */
    {
        char a[400] = "", b[400] = "";
        int i;
        /* Fill with 240 'a' then put "ÃÃÃÃÃÃ" (UTF-8: 0xC3 0x83 each, 12 bytes) */
        for(i = 0; i < 240; i++) { a[i] = 'a'; b[i] = 'a'; }
        a[240] = b[240] = '\0';
        strcat(a, "ÃÃÃÃÃÃ");          /* +12 bytes UTF-8, 6 chars visible → asciify to 'AAAAAA' */
        strcat(b, "ÃÃÃÃÃÃ");
        printf("Test 4: 240 'a' + 6×Ã (multi-byte near boundary)\n");
        errno = 0;
        r = sequal(a, b, 0.85);
        CHECK(errno == 0 && r == 0, "no errno, returns 0 (equal)");
    }

    /* Test 5: full-detail call with undersized buffers — this is the actual
     * realistic break path. Pass 64-byte buffers to sequal_full. */
    {
        char small1[64], small2[64];
        char a[300], b[300];
        int i;
        for(i = 0; i < 299; i++) { a[i] = 'a'; b[i] = 'a'; }
        a[299] = b[299] = '\0';

        printf("Test 5: sequal_full with 64-byte buffers + 300-char input\n");
        errno = 0;
        r = sequal_full(a, b, 0.85, &ratio, small1, sizeof(small1), small2, sizeof(small2));
        CHECK(errno == 0 && r == 0, "no errno, returns 0 (equal after truncation to 63 chars)");
        CHECK(strlen(small1) <= sizeof(small1) - 1, "small1 NUL-terminated within buffer");
        CHECK(strlen(small2) <= sizeof(small2) - 1, "small2 NUL-terminated within buffer");
    }

    /* ----- LOWER bound tests ----- */

    /* Test 6: NULL inputs — should set errno=EINVAL and return 0 */
    {
        printf("Test 6: NULL inputs via sequal_full\n");
        errno = 0;
        r = sequal_full(NULL, "x", 0.85, &ratio, s1, LEVN_SBUFF, s2, LEVN_SBUFF);
        CHECK(errno == EINVAL && r == 0, "NULL a: errno=EINVAL, returns 0");

        errno = 0;
        r = sequal_full("x", NULL, 0.85, &ratio, s1, LEVN_SBUFF, s2, LEVN_SBUFF);
        CHECK(errno == EINVAL && r == 0, "NULL b: errno=EINVAL, returns 0");

        errno = 0;
        r = sequal_full("x", "y", 0.85, NULL, s1, LEVN_SBUFF, s2, LEVN_SBUFF);
        CHECK(errno == EINVAL && r == 0, "NULL ratio: errno=EINVAL, returns 0");

        errno = 0;
        r = sequal_full("x", "y", 0.85, &ratio, NULL, LEVN_SBUFF, s2, LEVN_SBUFF);
        CHECK(errno == EINVAL && r == 0, "NULL s1: errno=EINVAL, returns 0");

        errno = 0;
        r = sequal_full("x", "y", 0.85, &ratio, s1, LEVN_SBUFF, NULL, LEVN_SBUFF);
        CHECK(errno == EINVAL && r == 0, "NULL s2: errno=EINVAL, returns 0");
    }

    /* Test 7: zero-size buffers — should set errno=EINVAL */
    {
        printf("Test 7: zero-size buffers\n");
        errno = 0;
        r = sequal_full("x", "y", 0.85, &ratio, s1, 0, s2, LEVN_SBUFF);
        CHECK(errno == EINVAL && r == 0, "s1_size=0: errno=EINVAL, returns 0");

        errno = 0;
        r = sequal_full("x", "y", 0.85, &ratio, s1, LEVN_SBUFF, s2, 0);
        CHECK(errno == EINVAL && r == 0, "s2_size=0: errno=EINVAL, returns 0");
    }

    /* Test 8: minimum non-empty strings + threshold extremes */
    {
        printf("Test 8: minimal strings + threshold edge cases\n");

        /* single char equal */
        errno = 0;
        r = sequal("a", "a", 0.85);
        CHECK(r == 0 && errno == 0, "'a' vs 'a' → 0 (equal)");

        /* single char different */
        errno = 0;
        r = sequal("a", "b", 0.85);
        CHECK(r != 0 && errno == 0, "'a' vs 'b' → non-zero (different)");

        /* threshold = 0.0: any non-zero similarity matches → 0 even when different */
        r = sequal("hello", "world", 0.0);
        CHECK(r == 0, "shold=0.0 makes any non-empty pair 'match'");

        /* threshold = 1.0: only exact-after-normalize → strict strcmp */
        r = sequal("hello", "Hello", 1.0);
        CHECK(r == 0, "shold=1.0 + case-only diff still matches (normalized exact)");

        r = sequal("hello", "world", 1.0);
        CHECK(r != 0, "shold=1.0 + completely different → non-zero");

        /* whitespace-only strings */
        r = sequal("   ", "       ", 0.85);
        CHECK(r == 0, "whitespace-only strings normalize equal");
    }

    /* Summary */
    printf("\n");
    if(fails == 0) {
        printf("ALL TESTS PASSED\n");
        return 0;
    } else {
        printf("%d TEST(S) FAILED\n", fails);
        return 1;
    }
}