/*
 * telega-dat.c --- Bridge between Emacs and TDLib.
 *
 * Copyright (C) 2016-2025 by Zajcev Evgeny
 *
 * Author: Zajcev Evgeny <zevlg@yandex.ru>
 *
 * telega is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * telega is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with telega.  If not, see <http://www.gnu.org/licenses/>.
 *
 ** Commentary:
 *
 * json <--> plist converter without dependences
 *
 * Uses recursive descending techniques for conversions.
 *
 * JSON syntax from http://json.org
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *   object
 *       {}
 *       { members }
 *   members
 *       pair
 *       pair , members
 *   pair
 *       string : value
 *   array
 *       []
 *       [ elements ]
 *   elements
 *       value
 *       value , elements
 *   value
 *       string
 *       number
 *       object
 *       array
 *       true
 *       false
 *       null
 *
 * Plist syntax
 * ~~~~~~~~~~~~
 *   object
 *       ()
 *       (members)
 *   members
 *       pair
 *       pair <SPACE> members
 *   pair
 *       :keyword value
 *   vector
 *       []
 *       [ elements ]
 *   elements
 *       value
 *       value <SPACE> elements
 *   value
 *       string
 *       number
 *       object
 *       vector
 *       t
 *       :false
 *       nil
 */
#include <ctype.h>
#include <string.h>
#include <assert.h>
#include <stdio.h>

#include "telega-dat.h"
#include "telega-emoji.h"

void tdat_emojify_string(struct telega_dat* src_str, struct telega_dat* props);

void
tdat_ensure(struct telega_dat* tdat, size_t add_cap)
{
        while (tdat->end + add_cap > tdat->cap) {
                tdat->cap += 1 + ((add_cap > tdat->cap) ? add_cap : tdat->cap);
                tdat->data = (char*)realloc(tdat->data, tdat->cap);
        }
        assert(tdat->data != NULL);
}

void
tdat_drop(struct telega_dat* tdat)
{
        if (tdat->free_data)
                (*tdat->free_data)(tdat->data);
}

static inline char
tdat_at(struct telega_dat* tdat, size_t pos)
{
        if (tdat->start + pos < tdat->end)
                return tdat->data[tdat->start + pos];
        return 0;
}

static inline bool
tdat_has_data(struct telega_dat* tdat)
{
        return (tdat->start < tdat->end) && (tdat_at(tdat, 0) != '\0');
}

/*
 * Move N bytes from SRC to DST
 * DST could be NULL, in this case it just drain N bytes from SRC
 */
void
tdat_move(struct telega_dat* src, struct telega_dat* dst, size_t n)
{
        if (dst == NULL) {
                tdat_drain(src, n);
                return;
                /* NOT REACHED */
        }

        assert(src->start + n <= src->end);

        tdat_ensure(dst, dst->end + n);
        memcpy(&dst->data[dst->end], &src->data[src->start], n);
        dst->end += n;
        src->start += n;
}

void
tdat_append(struct telega_dat* dst, const char* data, size_t len)
{
        tdat_ensure(dst, dst->end + len);
        memcpy(&dst->data[dst->end], data, len);
        dst->end += len;
}

void
tdat_rebase(struct telega_dat* tdat)
{
        size_t clen = tdat_len(tdat);
        memmove(tdat->data, &tdat->data[tdat->start], clen);
        tdat->start = 0;
        tdat->end = clen;
}

#ifdef WITH_ZLIB
#include <zlib.h>

size_t
tdat_zlib_deflate(struct telega_dat* src, struct telega_dat* dst)
{
        z_stream zstr = {
                .zalloc = Z_NULL,
                .zfree  = Z_NULL,
                .opaque = Z_NULL,
                .avail_in = tdat_len(src),
                .next_in = (Bytef*)tdat_start(src),
        };

        int ret = deflateInit(&zstr, Z_BEST_SPEED);
        if (ret != Z_OK) {
                fprintf(stderr, "deflateInit() error: %d\n", ret);
                assert(false);
                /* NOT REACHED */
                return -1;
        }

        size_t out_len = tdat_len(src) + tdat_len(src)/1000 + 12;
        tdat_ensure(dst, out_len);
        zstr.avail_out = dst->cap;
        zstr.next_out = (Bytef*)tdat_end(dst);

        ret = deflate(&zstr, Z_FINISH);
        deflateEnd(&zstr);
        if (ret != Z_STREAM_END) {
                fprintf(stderr, "deflate() error: %d\n", ret);
                /* NOT REACHED */
                assert(false);
                return -1;
        }

        dst->end += zstr.total_out;
        return zstr.total_out;
}
#endif /* WITH_ZLIB */


/* JSON */
static void
tdat_json_whitespaces(struct telega_dat* src)
{
        while (tdat_has_data(src)) {
                if (!isspace(tdat_at(src, 0)))
                        break;
                tdat_drain(src, 1);
        }
}

static void
tdat_json_string0(struct telega_dat* src, struct telega_dat* dst, bool no_spaces)
{
        assert(tdat_at(src, 0) == '"');

        tdat_drain(src, 1);     /* " */
        while (tdat_has_data(src)) {
                char c = tdat_at(src, 0);
                if (c == '\"') {
                        tdat_drain(src, 1);
                        return;
                }

                if (c == '\\') {
                        tdat_move1(src, dst);
                        if (!tdat_has_data(src))
                                break;
                }

                if (no_spaces && isspace(c)) {
                        fprintf(stderr, "Space in string is not allowed\n");
                        assert(false);
                }

                tdat_move1(src, dst);
        }
}

static void
tdat_json_object(struct telega_dat* json, struct telega_dat* plist)
{
        size_t opt_prop_start = 0;
        tdat_append1(plist, "(");

        assert(tdat_at(json, 0) == '{');
        tdat_drain(json, 1);    /* { */
        while (tdat_has_data(json)) {
                tdat_json_whitespaces(json);
                switch (tdat_at(json, 0)) {
                case '}':
                        tdat_append1(plist, ")");
                        tdat_drain(json, 1);
                        return;
                case ':':
                        tdat_drain(json, 1); /* : */
                        tdat_append1(plist, " ");

                        size_t val_pos = plist->end;
                        tdat_json_value(json, plist);
                        if (((optimize & OPTIMIZE_NIL_VALUES)
                             && ((plist->end - val_pos) == 3)
                             && !strncmp("nil", &plist->data[val_pos], 3))
                            ||
                            ((optimize & OPTIMIZE_EMPTY_LISTS)
                             && ((plist->end - val_pos) == 2)
                             && !strncmp("[]", &plist->data[val_pos], 2))
                            ||
                            ((optimize & OPTIMIZE_EMPTY_STRINGS)
                             && ((plist->end - val_pos) == 2)
                             && !strncmp("\"\"", &plist->data[val_pos], 2)))
                        {
                                plist->end = opt_prop_start;
                        }
                        break;
                case ',':
                        tdat_append1(plist, " ");
                        tdat_drain(json, 1); /* , */
                        tdat_json_whitespaces(json);
                        /* FALLTHROUGH */
                case '"':
                        opt_prop_start = plist->end;
                        tdat_append1(plist, ":");
                        tdat_json_string0(json, plist, true);
                        break;
                default:
                        fprintf(stderr, "Unexpected char '%c' in json object\n",
                                tdat_at(json, 0));
                        assert(false);
                        /* NOT REACHED */
                }
        }
}

static void
tdat_json_array(struct telega_dat* json, struct telega_dat* plist)
{
        assert(tdat_at(json, 0) == '[');

        tdat_move1(json, plist); /* [ */
        while (tdat_has_data(json)) {
                tdat_json_whitespaces(json);
                switch (tdat_at(json, 0)) {
                case ']':
                        tdat_move1(json, plist); /* ] */
                        return;
                case ',':
                        tdat_append1(plist, " ");
                        tdat_drain(json, 1);
                        break;
                default:
                        tdat_json_value(json, plist);
                        break;
                }
        }
}

static void
tdat_json_number(struct telega_dat* json, struct telega_dat* plist)
{
        while (tdat_has_data(json)) {
                switch (tdat_at(json, 0)) {
                case '-':
                case '+':
                case '0' ... '9':
                case 'e':
                case 'E':
                case '.':
                        tdat_move1(json, plist);
                        break;
                default:
                        return;
                }
        }
}

void
tdat_json_value(struct telega_dat* json, struct telega_dat* plist)
{
        tdat_json_whitespaces(json);
        switch (tdat_at(json, 0)) {
        case '{':
                tdat_json_object(json, plist);
                break;
        case '[':
                tdat_json_array(json, plist);
                break;
        case '\"': {
                struct telega_dat pstr = TDAT_INIT;
                struct telega_dat props = TDAT_INIT;
                tdat_json_string0(json, &pstr, false);
                tdat_emojify_string(&pstr, &props);
                if (tdat_has_data(&props)) {
                        tdat_append_str(plist, "#(\"");
                        tdat_move(&pstr, plist, tdat_len(&pstr));
                        tdat_append1(plist, "\"");
                        tdat_move(&props, plist, tdat_len(&props));
                        tdat_append1(plist, ")");
                } else {
                        tdat_append1(plist, "\"");
                        tdat_move(&pstr, plist, tdat_len(&pstr));
                        tdat_append1(plist, "\"");
                }
                tdat_drop(&pstr);
                tdat_drop(&props);
                break;
        }
        case '-':
        case '0' ... '9':
                tdat_json_number(json, plist);
                break;
        case 't':               /* true */
                tdat_drain(json, 4);
                tdat_append1(plist, "t");
                break;
        case 'f':               /* false */
                tdat_drain(json, 5);
                tdat_append(plist, "nil", 3);
                break;
        case 'n':               /* null */
                tdat_drain(json, 4);
                tdat_append(plist, "nil", 3);
                break;
        default:
                fprintf(stderr, "Unexpected char '%c' in json value\n",
                        tdat_at(json, 0));
                assert(false);
        }
        tdat_json_whitespaces(json);
}


/* PLIST */
#define tdat_plist_string0 tdat_json_string0
#define tdat_plist_number tdat_json_number
#define tdat_plist_whitespaces tdat_json_whitespaces

static void
tdat_plist_keyword(struct telega_dat* plist, struct telega_dat* json)
{
        tdat_append1(json, "\"");

        assert(tdat_at(plist, 0) == ':');
        tdat_drain(plist, 1);   /* : */
        while (tdat_has_data(plist)) {
                if (isspace(tdat_at(plist, 0)))
                        break;
                tdat_move1(plist, json);
        }

        tdat_append1(json, "\"");
}

static void
tdat_plist_object(struct telega_dat* plist, struct telega_dat* json)
{
        tdat_append1(json, "{");

        assert(tdat_at(plist, 0) == '(');
        tdat_drain(plist, 1); /* ( */
        tdat_plist_whitespaces(plist);
        while (tdat_has_data(plist)) {
                switch (tdat_at(plist, 0)) {
                case ')':
                        tdat_append1(json, "}");
                        tdat_drain(plist, 1);
                        return;
                case ' ':
                        tdat_drain(plist, 1);
                        tdat_append1(json, ",");
                        break;
                case ':':
                        tdat_plist_keyword(plist, json);
                        tdat_append1(json, ":");
                        tdat_plist_value(plist, json);
                        break;
                default:
                        fprintf(stderr, "Invalid plist object at pos=%zu\n",
                                plist->start);
                        assert(false);
                }
        }
}

static void
tdat_plist_vector(struct telega_dat* plist, struct telega_dat* json)
{
        assert(tdat_at(plist, 0) == '[');
        tdat_move1(plist, json); /* [ */

        tdat_plist_whitespaces(plist);
        while (tdat_has_data(plist)) {
                switch (tdat_at(plist, 0)) {
                case ']':
                        tdat_move1(plist, json); /* ] */
                        return;
                case ' ':
                        tdat_append1(json, ",");
                        tdat_plist_whitespaces(plist);
                        break;
                default:
                        tdat_plist_value(plist, json);
                        break;
                }
        }
}

void
tdat_plist_value(struct telega_dat* plist, struct telega_dat* json)
{
        tdat_plist_whitespaces(plist);
        switch (tdat_at(plist, 0)) {
        case '(':
                tdat_plist_object(plist, json);
                break;
        case '[':
                tdat_plist_vector(plist, json);
                break;
        case '\"':
                tdat_append1(json, "\"");
                tdat_plist_string0(plist, json, false);
                tdat_append1(json, "\"");
                break;
        case '-':
        case '0' ... '9':
                tdat_plist_number(plist, json);
                break;
        case 't':               /* t */
                tdat_drain(plist, 1);
                tdat_append(json, "true", 4);
                break;
        case ':':               /* :false */
                tdat_drain(plist, 6);
                tdat_append(json, "false", 5);
                break;
        case 'n':               /* nil */
                tdat_drain(plist, 3);
                tdat_append(json, "null", 4);
                break;
        default:
                fprintf(stderr, "Unexpected char '%c' in plist value\n",
                        tdat_at(plist, 0));
                assert(false);
                /* NOT REACHED */
        }
}

/* UTF16 code points */
static inline int
hexc2int(int c)
{
        if ((c >= '0') && (c <= '9'))
                return c - '0';
        else if ((c >= 'a') && (c <= 'f'))
                return c - 'a' + 10;
        else if ((c >= 'A') && (c <= 'F'))
                return c - 'A' + 10;
        else
                assert(false);
        return -1;
}

/**
 * Read UTF16 code point in \uXXXX form
 * Return newly read character
 */
static uint32_t
tdat_move_utf16codepoint(struct telega_dat* src, struct telega_dat* dst)
{
        uint32_t c0 = (uint32_t)tdat_at(src, 0);
        if (c0 != '\\') {
                /* Unescaped char */
                tdat_move1(src, dst);
                return c0;
                /* NOT REACHED */
        }

        assert(tdat_len(src) > 1);
        char c1 = tdat_at(src, 1);
        tdat_move(src, dst, 2);

        switch (c1) {
        case '\\':
                return (uint32_t)'\\';
        case 'n':
                return (uint32_t)'\n';
        case 'f':
                return (uint32_t)'\f';
        case 'r':
                return (uint32_t)'\r';
        case 't':
                return (uint32_t)'\t';
        case 'b':
                return (uint32_t)'\b';
        case '\'':
                return (uint32_t)'\'';
        case '"':
                return (uint32_t)'\"';
        case 'u': {
                char u0 = tdat_at(src, 0);
                char u1 = tdat_at(src, 1);
                char u2 = tdat_at(src, 2);
                char u3 = tdat_at(src, 3);
                tdat_move(src, dst, 4);

                return hexc2int(u3) | (hexc2int(u2) << 4)
                        | (hexc2int(u1) << 8) | (hexc2int(u0) << 12);
        }
        default:
                fprintf(stderr, "Unsupported escape char '\\%c'\n", c1);
                assert(false);
        }
        return -1;
}

/**
 * Extract UTF16 char, possibly dessurogating surrogated pairs
 */
uint32_t
tdat_move_utf16char(struct telega_dat* src, struct telega_dat* dst)
{
        /* NOTE: we save dst->end in case for surrogate pair in SRC.
         * In this case, we restore dst_end and put unicode char
         * (with \U prefix) into dst
         */
        size_t saved_dst_end = dst ? dst->end : 0;

        uint32_t high = tdat_move_utf16codepoint(src, dst);
        if ((high >= 0xD800) && (high <= 0xDBFF)) {
                uint32_t low = tdat_move_utf16codepoint(src, dst);
                if ((low >= 0xDC00) && (low <= 0xDFFF)) {
                        high = (high - 0xD800) << 10;
                        high += 0x10000 + (low - 0xDC00);

                        char high_hex[11];
                        snprintf(high_hex, 11, "\\U%08x", high);

                        if (dst) {
                                dst->end = saved_dst_end;
                                tdat_append_str(dst, high_hex);
                        }
                }
        }
        return high;
}

/* Return len in utf16 codepoints for char C */
static inline int
utf16_clen(uint32_t c)
{
        return (c > 0xFFFF) ? 2 : 1;
}

/*
 * Read emoji sequence from SRC to DST, consuming utf16 chars
 * Return number of utf16 chars placed into DST
 */
size_t
tdat_move_emoji_sequence(struct telega_dat* src, struct telega_dat* dst,
                         const struct emoji_trie* emoji_trie)
{
        if (!tdat_has_data(src))
                return 0;

        struct telega_dat ret = TDAT_INIT;
        size_t start_from = src->start;

        uint32_t ch = tdat_move_utf16char(src, &ret);

        assert(emoji_trie != NULL);
        const struct emoji_trie* et;
        for (et = &emoji_trie[0]; et->match; et++) {
                if ((et->match != EMOJI_MATCH_ANY) && (ch != et->match))
                        continue;

                size_t cn = 0;
                /* Try longest match first */
                if ((et->childs
                     && (cn = tdat_move_emoji_sequence(src, &ret, et->childs)))
                    || et->is_terminal)
                {
                        /* FOUND */
                        tdat_move(&ret, dst, tdat_len(&ret));
                        tdat_drop(&ret);
                        return cn + utf16_clen(ch);
                        /* NOT REACHED */
                }
        }

        /* Reset SRC to initial state */
        src->start = start_from;

        tdat_drop(&ret);
        return 0;
}

#ifndef MIN
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#endif /* MIN */

/*
 * Return true if CH starts basic emoji with FE0F suffix.
 */
static bool
utf16_is_fe0f_prefix(uint32_t ch)
{
        struct emoji_trie* et;
        if ((0x1f170 <= ch) && (ch <= 0x1f6f3)) {
                for (et = fe0f1; et->match; et++) {
                        if ((ch == et->match) && (et->childs == fe0f2))
                                return true;
                }
        }

        if ((0x1f004 <= ch) && (ch <= 0x1f6bc)) {
                for (et = emoji_12_1_basic2_fe0f; et->match; et++) {
                        if ((ch == et->match) && (et->childs == fe0f2))
                                return true;
                }
        }

        return false;
}

static void
tdat_emojify_append_props(struct telega_dat* props, size_t start,
                          size_t end, bool emoji_p, struct telega_dat* disp)
{
        char prop_position[33];
        snprintf(prop_position, 33, " %zu %zu", start, end);
        tdat_append_str(props, prop_position);
        tdat_append_str(props, " (");
        if (emoji_p)
                tdat_append_str(props, "telega-emoji-p t ");
        tdat_append_str(props, "telega-display \"");
        tdat_move(disp, props, tdat_len(disp));
        tdat_append_str(props, "\")");
}

/*
 *
 */
static inline void
tdat_backtrack_roll(struct emoji_backtrack* bt, size_t start, size_t offset)
{
        size_t i;
        for (i = EMOJI_BACKTRACK_SIZE - 1; i > 0; i--) {
                bt->starts[i] = bt->starts[i - 1];
                bt->offsets[i] = bt->offsets[i - 1];
        }
        bt->starts[0] = start;
        bt->offsets[0] = offset;

        if (bt->size < EMOJI_BACKTRACK_SIZE)
                bt->size++;
}

/*
 * Backtrace table to find longest emoji sequence
 * Return true if emoji sequence is found and placed into DST.
 */
static inline bool
tdat_backtrack_emoji_sequence(struct emoji_backtrack* bt,
                              struct emoji_match_table* tables,
                              struct telega_dat* src,
                              struct telega_dat* dst,
                              size_t* ret_start,
                              size_t* ret_end)
{
        size_t saved_start = src->start;

        int bti_size = MIN(bt->size, tables->max_backtrack + 1);
        for (int bti = 0; bti < bti_size; bti++) {
                int btindex = bti_size - bti - 1;
                src->start = bt->starts[btindex];
                size_t coff = bt->offsets[btindex];

                size_t cn = tdat_move_emoji_sequence(
                        src, dst, tables->match_trie);
                if (cn) {
                        /* MATCHED */
                        *ret_start = coff;
                        *ret_end = coff + cn;
                        return true;
                        /* NOT REACHED */
                }
        }

        src->start = saved_start;
        return false;
}

/**
 * Extract emojis from SRC and put corresponding properties into PROPS
 * Return non-false if any of the property has been extracted.
 */
void
tdat_emojify_string(struct telega_dat* src_str, struct telega_dat* props)
{
        struct telega_dat src_view = TDAT_INIT_VIEW(src_str);
        struct telega_dat disp = TDAT_INIT;

        struct emoji_backtrack bt = {.size = 0 };
        size_t offset = 0;

        while (tdat_has_data(&src_view)) {
                /* update the backtrack */
                tdat_backtrack_roll(&bt, src_view.start, offset);

                struct emoji_match_table* tables = NULL;
                uint32_t ch = tdat_move_utf16char(&src_view, NULL);
                offset += utf16_clen(ch);
                if ((0x231A <= ch) && (ch <= 0x2B55))
                        tables = emoji_basic1_tables;
                else if (ch == 0xFE0F)
                        tables = emoji_other_tables; /* fe0f */
                else if ((0x1F1E6 <= ch) && (ch <= 0x1F1FF))
                        tables = emoji_other_tables; /* flags */
                else if (ch == 0x1F3F4)
                        tables = emoji_other_tables; /* tags */
                else if (ch == 0x20E3)
                        tables = emoji_other_tables; /* non-fe0f keycaps */
                else if ((0x1F3FB <= ch) && (ch <= 0x1F3FF))
                        tables = emoji_other_tables; /* modifiers */
                else if (ch == 0x200D)
                        tables = emoji_other_tables; /* zwj */
                else if ((0x1F004 <= ch) && (ch <= 0x1FAD6))
                        tables = emoji_basic2_tables;
                else
                        tables = emoji_null_tables;

                bool found = false;
                size_t found_pos;
                for (; tables->match_trie; tables++) {
                        if ((ch < tables->min_match) || (ch > tables->max_match))
                                continue;

                        tdat_reset(&disp);
                        if (tdat_backtrack_emoji_sequence(
                                    &bt, tables, &src_view, &disp,
                                    &found_pos, &offset))
                        {
                                found = true;
                                break;
                                /* NOT REACHED */
                        }
                }

                if (found) {
                        /*
                         * NOTE: if emoji sequence is already
                         * fully-qualified, but xfe0f is following it,
                         * then make \xfe0f part of the emoji, so
                         * \xfe0f won't be displayed on its own.
                         */
                        struct telega_dat fe0f_view = TDAT_INIT_VIEW(&src_view);
                        while ((0xfe0f == tdat_move_utf16char(&fe0f_view, NULL)))
                        {
                                tdat_move_utf16char(&src_view, NULL);
                                assert(utf16_clen(0xfe0f) == 1);
                                offset += 1;
                        }

                        tdat_emojify_append_props(
                                props, found_pos, offset, true, &disp);

                } else if (utf16_clen(ch) == 2) {
                        /* NOTE: For surrogated pairs that are not
                         * part of the emoji, add only
                         * `telega-display' property, not marking as
                         * emoji.
                         *
                         * With only exception if surrogated pair
                         * codes basic emoji and \xfe0f is missing at
                         * the end.
                         *
                         * See https://github.com/zevlg/telega.el/issues/251
                         */
                        src_view.start = bt.starts[0];
                        tdat_reset(&disp);
                        uint32_t ch = tdat_move_utf16char(&src_view, &disp);

                        assert(offset >= 2);
                        tdat_emojify_append_props(
                                props, offset - 2, offset,
                                utf16_is_fe0f_prefix(ch), &disp);
                }
        }

        tdat_drop(&disp);
}
