Skip to main content

c_src/libwrouter/src/lexer.c

#include "lexer.h"
#include "common.h"
#include "token.h"
#include "symbol.h"
#include <stdint.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

void lexer_load(lexer_t *lx, const char *request, size_t length)
{
    // Null.
    if (request == NULL || length == 0)
        goto failure;

    // Enforce all requests begin with '/'.
    if (*request != '/')
        goto failure;

    // Guard against ridiculous sizes.
    if (length > LEXER_CHAR_LIMIT)
        goto failure;

    // Load.
    lx->str = request;
    lx->cursor = request;
    lx->length = length;
    return;

failure:
    memset(lx, 0, sizeof(*lx));
}

token_t lexer_next(lexer_t *lx)
{
    const char *start, *c = lx->cursor;
    const char *end = lx->str + lx->length;
    token_t tok = { 0 };

    // Null.
    if (c == NULL || lx->length == 0)
        return tok; // Illegal.

    // End.
    if (c == end) {
        tok.type = TOKEN_END;
        return tok;
    }

    // Check for root '/' or trailing-slash.
    if (*c == '/' && c == end - 1) {
        tok.type = c == lx->str ? TOKEN_END : TOKEN_TRAILING;
        return tok;
    }

    // Check for double-slash.
    if (++c < end && *c == '/')
        return tok; // Illegal.

    // Consume until next '/'.
    for (start = c; c < end && *c != '/'; c++)
        if (*c == '*' || *c == '#' || *c == '?' || isspace(*c) || iscntrl(*c))
            return tok; // Illegal character.

    // Literal.
    lx->cursor = c;
    tok.type = TOKEN_LITERAL;
    tok.ptr = start;
    tok.length = (uint16_t)(c - start);
    return tok;
}