/*
 * XML Catalog Manager (xmlcatmgr)
 * $Id: sgml.c,v 1.2 2004/08/31 21:25:47 jmmv Exp $
 *
 * Copyright (c) 2003, 2004 Julio M. Merino Vidal.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 * 3. Neither the name of the author nor the names of contributors may
 *    be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * This file implements all SGML mode actions, as well as the catalog
 * parser for SGML files.
 */

#include "system.h"

#ifndef lint
__RCSID("$Id: sgml.c,v 1.2 2004/08/31 21:25:47 jmmv Exp $");
#endif

#include "grstr.h"
#include "mem.h"
#include "linklist.h"
#include "sgml.h"

/* This structure matches catalog entry types with the number of
 * arguments they require.  Used during argument parsing and catalog
 * validation. */
static struct type {
    const char *t_name;
    int t_params;
} Types[] = {
    { "BASE", 1 },
    { "CATALOG", 1 },
    { "DELEGATE", 2 },
    { "DOCTYPE", 2 },
    { "DOCUMENT", 1 },
    { "ENTITY", 2 },
    { "LINKTYPE", 2 },
    { "NOTATION", 2 },
    { "OVERRIDE", 1 },
    { "PUBLIC", 2 },
    { "SGMLDECL", 1 },
    { "SYSTEM", 2 },
    { NULL, 0 }
};

/* Describes a catalog entry.
 * If e_type is ENTRY_NORMAL, e_field[123] are pointers to strings
 * with information about the entry.  e_field3 is only defined when
 * Types[e_field1].t_params is 2.
 * Pointers in this structure are expected to be pointers to memory
 * regions allocated with malloc(3); they are later free(2)'d
 * unconditionally.
 */
struct entry {
    LINKLIST_ENTRY(entry);

#define ENTRY_NORMAL 1
#define ENTRY_COMMENT 2
    int e_type;
    char *e_comment;
    char *e_field1;
    char *e_field2;
    char *e_field3;
};

/* This list type describes a catalog; a list of entries. */
LINKLIST_HEAD(entries, entry);

/* Token types that we can find while reading a catalog file. */
#define TOKEN_ERROR 0
#define TOKEN_WORD 1
#define TOKEN_STRING 2
#define TOKEN_COMMENT 3

static bool add_entry(struct entries *, const char *, const char *,
                      const char *, bool);
static bool remove_entry(struct entries *, const char *, const char *);
static void free_catalog(struct entries *);
static void read_catalog(FILE *, struct entries *);
static int read_token(FILE *, char **);
static bool read_token_comment(FILE *, struct grstr *);
static bool read_token_string(FILE *, struct grstr *);
static bool read_token_word(FILE *, struct grstr *);
static void write_catalog(FILE *, struct entries *);

/* --------------------------------------------------------------------- */

/*
 * The SGML add action.  For each triplet of arguments, add_entry is
 * called to register the given entry in the catalog.
 */
bool
sgml_add(int argc, char *const *argv, FILE *f, bool prepend)
{
    bool res;
    struct entries catalog;

    read_catalog(f, &catalog);

    res = true;
    while (argc > 0) {
        const char *type, *orig, *replace;

        if (argc == 1) {
            warnx("unbalanced arguments for `add' action");
            res = false;
            argc--; argv++;
        } else {
            type = argv[0];
            orig = argv[1];
            argc -= 2; argv += 2;

            if (argc >= 1) {
                replace = strcmp(argv[0], "--") != 0 ? argv[0] : NULL;
                argc--; argv++;
            } else
                replace = NULL;

            res &= add_entry(&catalog, type, orig, replace, prepend);
        }
    }

    write_catalog(f, &catalog);
    free_catalog(&catalog);

    return res;
}

/* --------------------------------------------------------------------- */

/*
 * The SGML create action.  Generates an empty catalog file, with a
 * single comment in it.
 */
bool
sgml_create(FILE *f)
{
    char buf[] = " Created by " PACKAGE_STRING " ";
    struct entry *e;
    struct entries catalog;

    LINKLIST_INIT(&catalog);

    e = (struct entry *)malloc(sizeof(struct entry));
    e->e_type = ENTRY_COMMENT;
    e->e_comment = strdup(buf); 
    LINKLIST_APPEND(&catalog, e);

    write_catalog(f, &catalog);
    free_catalog(&catalog);

    return true;
}

/* --------------------------------------------------------------------- */

/*
 * The SGML lookup action.  Searches the given entries in the catalog
 * file.  Only returns success if all of them were found.
 */
bool
sgml_lookup(int argc, char *const *argv, FILE *f)
{
    bool found, res;
    struct entry *iter;
    struct entries catalog;

    assert(argc > 0 && argv != NULL && f != NULL);

    read_catalog(f, &catalog);

    res = true;
    while (argc > 0) {
        found = false;
        LINKLIST_FOREACH(iter, &catalog) {
            if (iter->e_type == ENTRY_NORMAL) {
                assert(iter->e_field2 != NULL);
                if (strcmp(iter->e_field2, argv[0]) == 0) {
                    found = true;
                    if (iter->e_field3 == NULL) {
                        printf("%s \"%s\"\n", iter->e_field1,
                               iter->e_field2);
                    } else {
                        printf("%s \"%s\" \"%s\"\n", iter->e_field1,
                               iter->e_field2, iter->e_field3);
                    }
                 }
            }
        }

        if (!found) {
            warnx("no matching entry for `%s'", argv[0]);
            res = false;
        }

        argc--; argv++;
    }

    free_catalog(&catalog);

    return res;
}

/* --------------------------------------------------------------------- */

/*
 * The SGML remove action.  Removes all given entries from the catalog.
 * Arguments are expected to come in pairs, although if only one is
 * provided, all matching catalog entries are removed (compatibility with
 * previous versions).
 */
bool
sgml_remove(int argc, char *const *argv, FILE *f)
{
    bool res;
    struct entries catalog;

    if (argc == 0) {
        warnx("too few arguments for `remove' action");
        return false;
    }

    read_catalog(f, &catalog);

    if (argc == 1) {
        warnx("enabling compatibility mode; removing ALL matching entries");
        res = remove_entry(&catalog, NULL, argv[0]);
    } else {
        res = true;
        while (argc >= 2 && argc % 2 == 0) {
            res &= remove_entry(&catalog, argv[0], argv[1]);
            argc -= 2; argv += 2;
        }

        if (argc % 2 != 0) {
            warnx("unbalanced arguments for `remove' action");
            res = false;
        }
    }

    write_catalog(f, &catalog);
    free_catalog(&catalog);

    return res;
}

/* --------------------------------------------------------------------- */

/*
 * Adds the given entry to the catalog.  'replace' may be null if the
 * given 'type' only expects one argument (this is determined from the
 * 'Types' array).
 */
static bool
add_entry(struct entries *catalog, const char *type, const char *orig,
          const char *replace, bool prepend)
{
    bool found;
    int i;
    struct entry *e;

    i = 0;
    found = false;
    while (!found && Types[i].t_name != NULL) {
        if (strcmp(Types[i].t_name, type) == 0) {
            found = true;
        } else {
            i++;
        }
    }
    if (!found) {
        warnx("unknown type `%s'", type);
        return false;
    }
    if ((Types[i].t_params == 1 && replace != NULL) ||
        (Types[i].t_params == 2 && replace == NULL)) {
        warnx("parameter count mismatch for type `%s'", type);
        return false;
    }

    found = false;
    LINKLIST_FOREACH(e, catalog) {
        if (e->e_type == ENTRY_NORMAL) {
            assert(e->e_field1 != NULL);
            assert(e->e_field2 != NULL);
            if (strcmp(e->e_field1, type) == 0 &&
                strcmp(e->e_field2, orig) == 0) {
                found = true;
                break;
            }
        }
    }
    if (found) {
        warnx("entry already exists for `%s' of type `%s'", orig, type);
        return false;
    }

    e = (struct entry *)malloc(sizeof(struct entry));
    e->e_type = ENTRY_NORMAL;
    e->e_field1 = strdup(type);
    e->e_field2 = strdup(orig);
    e->e_field3 = replace == NULL ? NULL : strdup(replace);
    if (prepend) {
        LINKLIST_PREPEND(catalog, e);
    } else {
        LINKLIST_APPEND(catalog, e);
    }

    return true;
}

/* --------------------------------------------------------------------- */

/*
 * Removes the given entry from the catalog.  If 'type' is NULL, removes
 * all matching entries, not only one.
 */
static bool
remove_entry(struct entries *catalog, const char *type, const char *orig)
{
    bool found;
    struct entry *iter;

    assert(catalog != NULL && orig != NULL);

    if (type != NULL) {
        int i = 0;

        found = false;
        while (!found && Types[i].t_name != NULL) {
            if (strcmp(Types[i].t_name, type) == 0) {
                found = true;
            } else {
                i++;
            }
        }
        if (!found) {
            warnx("unknown type `%s'", type);
            return false;
        }
    }

    found = false;
    iter = LINKLIST_FIRST(catalog);
    while (iter != NULL) {
        struct entry *tmp;

        tmp = LINKLIST_NEXT(iter);
        if (iter->e_type == ENTRY_NORMAL) {
            if (strcmp(iter->e_field2, orig) == 0 &&
                (type == NULL || ((type != NULL) &&
                                  (strcmp(iter->e_field1, type) == 0)))) {
                free(iter->e_field1);
                free(iter->e_field2);
                if (iter->e_field3 != NULL)
                    free(iter->e_field3);
                LINKLIST_REMOVE(catalog, iter);
                free(iter);
                found = true;
            }
        }
        iter = tmp;
    }

    if (!found && type != NULL) {
        warnx("no matching entry for `%s' of type `%s'", orig, type);
    } else if (!found) {
        warnx("no matching entry for `%s' of any type", orig);
    }

    return found;
}

/* --------------------------------------------------------------------- */

/*
 * Deletes the given catalog, and all its entries.
 */
static void
free_catalog(struct entries *catalog)
{
    struct entry *iter;

    iter = LINKLIST_FIRST(catalog);
    while (iter != NULL) {
        struct entry *tmp;

        tmp = LINKLIST_NEXT(iter);

        if (iter->e_type == ENTRY_COMMENT)
            free(iter->e_comment);
        else {
            free(iter->e_field1);
            free(iter->e_field2);
            free(iter->e_field3);
        }
        free(iter);

        iter = tmp;
    }
}

/* --------------------------------------------------------------------- */

/*
 * Reads a catalog file, storing all entries found to the given catalog.
 */
static void
read_catalog(FILE *f, struct entries *catalog)
{
    bool found;
    char *token;
    int i, type;
    struct entry *e;

    LINKLIST_INIT(catalog);
    rewind(f);

    /* Read tokens from the catalog file until we hit EOF or an error.
     * Note that the function returns a new memory chunk in 'token', so
     * we have to handle it properly to not produce leaks.  */
    while ((type = read_token(f, &token)) != TOKEN_ERROR) {
        switch (type) {
        case TOKEN_COMMENT:
            /* Got a comment; add it to the catalog as is. */
            e = (struct entry *)malloc(sizeof(struct entry));
            e->e_type = ENTRY_COMMENT;
            e->e_comment = token;
            LINKLIST_APPEND(catalog, e);
            break;
        case TOKEN_WORD:
            /* Got a word.  It must be the beginning of an entry, so we
             * have to check if it's valid by looking at the 'Types'
             * array. */
            i = 0;
            found = false;
            while (!found && Types[i].t_name != NULL) {
                if (strcmp(Types[i].t_name, token) == 0) {
                    found = true;
                } else {
                    i++;
                }
            }

            if (!found) {
                warnx("unexpected token `%s'", token);
                free(token);
            } else {
                char *f1, *f2, *f3;
                int tktype;

                /* The token was a valid word (i.e., start of entry), so
                 * we have to read one or two more words depending on the
                 * entry type. */

                f1 = token;
                f2 = f3 = NULL;

                while ((tktype = read_token(f, &f2)) == TOKEN_COMMENT)
                    warnx("discarding comment '%s'", f2);
                if (tktype == TOKEN_ERROR)
                    warnx("`%s' entry requires %d arguments", f1,
                          Types[i].t_params);

                if (Types[i].t_params == 2) {
                    while ((tktype = read_token(f, &f3)) == TOKEN_COMMENT)
                        warnx("discarding comment '%s'", f3);
                    if (tktype == TOKEN_ERROR)
                        warnx("`%s' entry requires %d arguments", f1,
                              Types[i].t_params);
                }

                e = (struct entry *)malloc(sizeof(struct entry));
                e->e_type = ENTRY_NORMAL;
                e->e_field1 = f1;
                e->e_field2 = f2;
                e->e_field3 = f3;
                LINKLIST_APPEND(catalog, e);
            }

            break;
        default:
            warnx("unexpected token `%s'", token);
            free(token);
            break;
        }
    }
}

/* --------------------------------------------------------------------- */

/*
 * Reads the next token from the catalog file.  As it is always a string,
 * we keep it in memory using a grstr object and then return a pointer to
 * the new memory chunk in the 'dest' output parameter.
 */
static int
read_token(FILE *f, char **dest)
{
    int ch, type;
    struct grstr *buf;

    buf = grstr_new();
    if (buf == NULL)
        return TOKEN_ERROR;
    
    type = TOKEN_ERROR;

    while ((ch = fgetc(f)) != EOF && isspace(ch));

    if (ch == EOF) {
        if (ferror(f))
            warn("cannot read next character");
    } else if (ch == '"') {
        /* Got a double quote character; this is the start of a string,
         * which is handled by read_token_string. */
        if (read_token_string(f, buf))
            type = TOKEN_STRING;
    } else if (ch == '-') {
        /* Got a dash character; this may be the start of a comment depending
         * on whether the next character is another dash or not. */
        ch = fgetc(f);
        if (ch == EOF) {
            if (ferror(f))
                warn("cannot read next character");
        } else if (ch == '-') {
            /* Got two dashes; this is the start of a comment, which is
             * handled by read_token_comment. */
            if (read_token_comment(f, buf))
                type = TOKEN_COMMENT;
        } else {
            if (grstr_append_char(buf, '-') && grstr_append_char(buf, ch) &&
                read_token_word(f, buf))
                type = TOKEN_WORD;
        }
    } else {
        /* Got an unrecognized character: start of word. */
        if (grstr_append_char(buf, ch) && read_token_word(f, buf))
            type = TOKEN_WORD;
    }

    if (type != TOKEN_ERROR)
        *dest = grstr_to_text(buf);
    else {
        *dest = NULL;
        grstr_free(buf);
    }

    return type;
}

/* --------------------------------------------------------------------- */

/*
 * Read a comment from the catalog file (until we reach a sequence of two
 * consecutive dashes '--'.  We assume that we have already picked the
 * first double quote character, so we start reading from within the
 * comment.
 */
static bool
read_token_comment(FILE *f, struct grstr *gs)
{
    bool res, error;
    int ch;

    res = false;

    error = false;
    while (!error) {
        while (!error && (ch = fgetc(f)) != EOF && ch != '-') {
            error |= !grstr_append_char(gs, ch);
        }

        ch = fgetc(f);
        if (ch == '-')
            break;
        else {
            error |= !grstr_append_char(gs, '-');
            error |= !grstr_append_char(gs, ch);
        }
    }

    if (error || (ch == EOF && ferror(f)))
        warn("cannot read next character");
    else
        res = true;

    return res;
}

/* --------------------------------------------------------------------- */

/*
 * Read a string from the catalog file (until we find another double
 * quote character).  We assume that we have already picked the first
 * double quote character, so we start reading from within the string.
 */
static bool
read_token_string(FILE *f, struct grstr *gs)
{
    bool error, res;
    int ch;

    error = false;

    while (!error && (ch = fgetc(f)) != EOF && ch != '"') {
        error |= !grstr_append_char(gs, ch);
    }

    if (error || (ch == EOF && ferror(f))) {
        res = false;
        warn("cannot read next character");
    } else
        res = true;

    return res;
}

/* --------------------------------------------------------------------- */

/*
 * Read a word from the catalog file (until we find a space character).
 * We assume that we have already picked the first letter and the caller
 * has added it to 'gs'.
 */
static bool
read_token_word(FILE *f, struct grstr *gs)
{
    bool error, res;
    int ch;

    error = false;

    while (!error && (ch = fgetc(f)) != EOF && !isspace(ch)) {
        if (ch == '-') {
            ch = fgetc(f);
            if (ch == '-') {
                struct grstr *tmp;

                tmp = grstr_new();
                if (tmp == NULL)
                    error = true;
                else {
                    if (read_token_comment(f, tmp)) {
                        char *text;

                        text = grstr_to_text(tmp);
                        warnx("discarding comment touching word `%s'", text);
                        free(text);
                    } else
                        error = true;
                }
            }
        } else if (!grstr_append_char(gs, ch)) {
            error = true;
        }
    }

    if (error || (ch == EOF && ferror(f))) {
        res = false;
        warn("cannot read next character");
    } else
        res = true;

    return res;
}

/* --------------------------------------------------------------------- */

/*
 * Write the given catalog to the file.  This truncates the stream to
 * zero bytes before writing anything, to ensure the file contains no
 * garbage.
 */
static void
write_catalog(FILE *f, struct entries *catalog)
{
    struct entry *iter;

    rewind(f);
    fflush(f);
    ftruncate(fileno(f), 0);

    LINKLIST_FOREACH(iter, catalog) {
        if (iter->e_type == ENTRY_COMMENT) 
            fprintf(f, "--%s--\n", iter->e_comment);
        else {
            if (iter->e_field3 == NULL)
                fprintf(f, "%s \"%s\"\n\n", iter->e_field1, iter->e_field2);
            else
                fprintf(f, "%s \"%s\"\n\t\"%s\"\n\n", iter->e_field1,
                        iter->e_field2, iter->e_field3);
        }
    }
}

/*
 * Local Variables: ***
 * mode: c ***
 * c-file-style: "stroustrup" ***
 * End: ***
 * vim: syntax=c:expandtab:shiftwidth=4:softtabstop=4
 */
