parsing – URL parser in C


Title should be self-explenatory. This is a simple URL parser I wrote in C. The function takes a URL from the user and produces a struct that contains the information that can be used to request the resource at the URL over http(s).

enum url_protocol {
    PROTOCOL_HTTP,
    PROTOCOL_HTTPS
};

struct url {
    /* Protocol to use */
    enum url_protocol protocol;

    /* Credentials */
    char *username;
    char *password;

    /* Host and service */
    char *host;
    char *service;

    /* Path to request */
    char *path;
};

void
free_url(struct url *url);

int
parse_url(char *start, struct url *url)
{
    char *end, *delim;

    memset(url, 0, sizeof(*url));

    /* Find the end of the scheme */
    end = strchr(start, ':');
    if (!end)
        goto err;

    /* Find protocol from scheme */
    if (!strncmp(start, "http", end - start))
        url->protocol = PROTOCOL_HTTP;
    else if (!strncmp(start, "https",  end - start))
        url->protocol = PROTOCOL_HTTPS;
    else
        goto err;

    /* URLs must begin with // */
    if (*++end != '/' || *++end != '/')
        goto err;

    /* Parse credentials */
    start = ++end;
    end = strchr(start, '@');
    if (end) {
        delim = strchr(start, ':');
        if (delim && delim < end) { /* Username and password */
            if (!(delim - start))
                goto err;
            url->username = strndup(start, delim - start);
            ++delim;
            if (!(end - delim))
                goto err;
            url->password = strndup(delim, end - delim);
        } else { /* Only username */
            if (!(end - start))
                goto err;
            url->username = strndup(start, end - start);
        }
        start = ++end;
    }

    /* Host till / or end of string */
    end = strchrnul(start, '/');

    /* Skip IPv6 literals before : search */
    if (*start == '(' && (delim = strchr(start, ')')))
        delim = strchr(delim, ':'); /* Found IPv6 literal */
    else
        delim = strchr(start, ':'); /* Normal search */

    if (delim && delim < end) { /* Host and service */
        if (!(delim - start))
            goto err;
        url->host = strndup(start, delim - start);
        ++delim;
        if (!(end - delim))
            goto err;
        url->service = strndup(delim, end - delim);
    } else { /* Only host */
        if (!(end - start))
            goto err;
        url->host = strndup(start, end - start);;
    }

    /* Default path is a single / */
    url->path = *end ? strdup(end) : strdup("/");
    return 0;

err:
    free_url(url);
    return -1;
}

void
free_url(struct url *url)
{
    /* Assume free ignores NULL */
    free(url->username);
    free(url->password);
    free(url->host);
    free(url->service);
    free(url->path);
}
```