#include "lexer.h"

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <utils/memory/memory.h>

#include "utils/types.h"

const char *TOKEN_STRINGS[] = {
    "TOKEN_NONE",
    "TOKEN_IDENTIFIER",
    "TOKEN_NUMBER",
    "TOKEN_STRING",
    "TOKEN_OPERATOR",
    "TOKEN_OPERATOR_PARENTHESES_OPEN",
    "TOKEN_OPERATOR_PARENTHESES_CLOSE",
    "TOKEN_OPERATOR_CURLY_BRACKET_OPEN",
    "TOKEN_OPERATOR_CURLY_BRACKET_CLOSE",
    "TOKEN_OPERATOR_ASSIGN",
    "TOKEN_OPERATOR_EQUAL",
    "TOKEN_OPERATOR_COLON",
    "TOKEN_OPERATOR_COMMA",
    "TOKEN_OPERATOR_EOL",
    "TOKEN_OPERATOR_FUNCTION",
    "TOKEN_SYMBOL",
    "TOKEN_KEYWORD_STRUCT",
    "TOKEN_KEYWORD_EXTERNAL",
    "TOKEN_KEYWORD_IMPORT",
    "TOKEN_PARSED",
};

static const char *KEYWORDS_STRINGS[] = {
    "struct",
    "external",
    "import",
};
static const Token KEYWORDS_TOKENS[] = {
    TOKEN_KEYWORD_STRUCT,
    TOKEN_KEYWORD_EXTERNAL,
    TOKEN_KEYWORD_IMPORT,
};
static const size_t KEYWORDS_SIZE = sizeof(KEYWORDS_STRINGS) / sizeof(char *);

static const char *OPERATORS_STRINGS[] = {
    "(", ")", "{", "}", "=", "==", ":", ",", ";", "->",
};
static const Token OPERATORS_TOKENS[] = {
    TOKEN_OPERATOR_PARENTHESES_OPEN,
    TOKEN_OPERATOR_PARENTHESES_CLOSE,
    TOKEN_OPERATOR_CURLY_BRACKET_OPEN,
    TOKEN_OPERATOR_CURLY_BRACKET_CLOSE,
    TOKEN_OPERATOR_ASSIGN,
    TOKEN_OPERATOR_EQUAL,
    TOKEN_OPERATOR_COLON,
    TOKEN_OPERATOR_COMMA,
    TOKEN_OPERATOR_EOL,
    TOKEN_OPERATOR_FUNCTION,
};
static const size_t OPERATORS_SIZE = sizeof(OPERATORS_STRINGS) / sizeof(char *);

void printNodes(Nodes nodes) {
  for (size_t i = 0; i < nodes.size; ++i) {
    const Node node = nodes.nodes[i];
    printf("{'%.*s',%s}\n", (int)(node.strEnd - node.strBegin), node.strBegin,
           TOKEN_STRINGS[node.token]);
  }
}

void deleteNodes(Nodes nodes) { free(nodes.nodes); }

Nodes lexer(char const *const restrict code) {
  size_t nodes_size = 10;
  Node *nodes = a404m_malloc(nodes_size * sizeof(Node));
  size_t nodes_inserted = 0;

  Node node = {
      .strBegin = code,
      .strEnd = code,
      .token = TOKEN_NONE,
  };

  for (int i = 0;; ++i) {
    const char c = code[i];
    if (c == '\0') {
      push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
                        TOKEN_NONE);
      break;
    } else if (c == '/') {
      const char follow = code[i + 1];
      if (follow == '/') {
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
                          TOKEN_NONE);
        for (i += 2; code[i] != '\0' && code[i] != '\n'; ++i);
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
                          TOKEN_NONE);
        if (code[i] == '\0') {
          goto RETURN_SUCCESS;
        }
        continue;
      } else if (follow == '*') {
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
                          TOKEN_NONE);
        int in = 1;
        for (i += 2;; ++i) {
          switch (code[i]) {
            case '\0':
              printError("Expected multi line comment to end", code,
                         node.strBegin, code + i);
              goto RETURN_ERROR;
            case '*':
              ++i;
              if (code[i] == '/') {
                --in;
                if (in == 0) {
                  goto END_OF_BLOCK_COMMENT_LOOP;
                }
              } else if (code[i] == '\0') {
                printError("Expected multi line comment to end", code,
                           node.strBegin, code + i);
                goto RETURN_ERROR;
              }
              break;
            case '/':
              ++i;
              if (code[i] == '*') {
                ++in;
              } else if (code[i] == '\0') {
                printError("Expected multi line comment to end", code,
                           node.strBegin, code + i);
                goto RETURN_ERROR;
              }
              break;
          }
        }
      END_OF_BLOCK_COMMENT_LOOP:
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
                          TOKEN_NONE);
        if (code[i] == '\0') {
          goto RETURN_SUCCESS;
        }
        continue;
      }
    }
    if (isSpace(c)) {
      push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
                        TOKEN_NONE);
    } else if (isIdentifier(c)) {
      if (node.token != TOKEN_IDENTIFIER && node.token != TOKEN_SYMBOL) {
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
                          TOKEN_IDENTIFIER);
      }
    } else if (isIdentifierSymbol(c)) {
      push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
                        TOKEN_IDENTIFIER);
      for (++i;; ++i) {
        const char current = code[i];
        if (current == c) {
          break;
        } else if (current == '\0') {
          printError("Expected %c to end", code, node.strBegin, code + i, c);
          goto RETURN_ERROR;
        }
      }
      ++node.strBegin;
      push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node,
                               code, i, TOKEN_NONE);
    } else if (isNumber(c)) {
      if (node.token != TOKEN_NUMBER && node.token != TOKEN_IDENTIFIER &&
          node.token != TOKEN_SYMBOL) {
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
                          TOKEN_NUMBER);
      }
    } else if (isString(c)) {
      push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
                        TOKEN_STRING);

      for (++i;; ++i) {
        const char current = code[i];
        if (current == c) {
          break;
        } else if (current == '\\') {
          ++i;
        } else if (current == '\0') {
          printError("Expected %c to end", code, node.strBegin, code + i, c);
          goto RETURN_ERROR;
        }
      }

      ++i;
      push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
                        TOKEN_NONE);
      --i;
    } else if (isOperator(c)) {
      if (node.token == TOKEN_OPERATOR) {
        const Token token = getOperator(node.strBegin, code + i + 1);
        if (token != TOKEN_NONE) {
          continue;
        } else {
          node.token = getOperator(node.strBegin, code + i);
          if (node.token == TOKEN_NONE) {
            printError("Unknown operator '%.*s'", code, node.strBegin,
                       node.strEnd, (int)(code + i - node.strBegin),
                       node.strBegin);
            goto RETURN_ERROR;
          }
          push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node,
                                   code, i, TOKEN_OPERATOR);
        }
      } else {
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
                          TOKEN_OPERATOR);
      }
    } else if (isSymbol(c)) {
      if (node.token != TOKEN_SYMBOL) {
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
                          TOKEN_SYMBOL);
      }
    } else {
      printError("Unexpected char '%c'", code, code + i, code + i + 1, c);
      goto RETURN_ERROR;
    }
  }

RETURN_SUCCESS:
  Nodes result = {
      .nodes = a404m_realloc(nodes, nodes_inserted * sizeof(Node)),
      .size = nodes_inserted,
  };

  return result;
RETURN_ERROR:
  free(nodes);
  Nodes error = {
      .nodes = NULL,
      .size = ERROR_SIZE,
  };

  return error;
}

void push_if_not_empty(Node **restrict nodes, size_t *restrict nodes_size,
                       size_t *restrict nodes_inserted, Node *restrict node,
                       char const *restrict str, int i, Token token) {
  if (node->token != TOKEN_NONE) {
    if (*nodes_size == *nodes_inserted) {
      *nodes_size += *nodes_size / 2 + 1;
      *nodes = a404m_realloc(*nodes, *nodes_size * sizeof(Node));
    }
    node->strEnd = str + i;
    if (node->token == TOKEN_IDENTIFIER) {
      const Token foundToken = getKeyword(node->strBegin, node->strEnd);
      if (foundToken != TOKEN_NONE) {
        node->token = foundToken;
      }
    } else if (node->token == TOKEN_OPERATOR) {
      const Token foundToken = getOperator(node->strBegin, node->strEnd);
      if (foundToken != TOKEN_NONE) {
        node->token = foundToken;
      }
    }

    (*nodes)[*nodes_inserted] = *node;
    ++*nodes_inserted;
  }
  node->strBegin = str + i;
  node->token = token;
}

void push_clear_without_check(Node **restrict nodes,
                              size_t *restrict nodes_size,
                              size_t *restrict nodes_inserted, Node *node,
                              char const *restrict str, int i, Token token) {
  if (*nodes_size == *nodes_inserted) {
    *nodes_size += *nodes_size / 2 + 1;
    *nodes = a404m_realloc(*nodes, *nodes_size * sizeof(Node));
  }
  node->strEnd = str + i;
  (*nodes)[*nodes_inserted] = *node;
  ++*nodes_inserted;
  node->strBegin = str + i;
  node->token = token;
}

bool isSpace(char c) { return isspace(c); }

bool isIdentifier(char c) {
  return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_';
}

bool isIdentifierSymbol(char c) { return c == '`'; }

bool isNumber(char c) { return '0' <= c && c <= '9'; }

bool isString(char c) { return c == '"' || c == '\''; }

bool isOperator(char c) {
  switch (c) {
    case '(':
    case ')':
    case '[':
    case ']':
    case '{':
    case '}':
    case '*':
    case '/':
    case '%':
    case '+':
    case '-':
    case ':':
    case '!':
    case '=':
    case '<':
    case '>':
    case '&':
    case '|':
    case '.':
    case ',':
    case ';':
      return true;
  }
  return false;
}

bool isSymbol(char c) { return c == '#'; }

Token getTokenInStrings(char const *strBegin, char const *strEnd,
                        const char *strings[], const Token tokens[],
                        size_t size) {
  const size_t strSize = strEnd - strBegin;

  for (size_t i = 0; i < size; ++i) {
    const char *search = strings[i];
    // faster than strlen+strncmp
    for (size_t j = 0;; ++j) {
      const char searchChar = search[j];
      if (j == strSize) {
        if (searchChar == '\0') {
          return tokens[i];
        } else {
          break;
        }
      } else if (searchChar == '\0') {
        break;
      } else if (searchChar != strBegin[j]) {
        break;
      }
    }
  }
  return TOKEN_NONE;
}

Token getKeyword(char const *strBegin, char const *strEnd) {
  return getTokenInStrings(strBegin, strEnd, KEYWORDS_STRINGS, KEYWORDS_TOKENS,
                           KEYWORDS_SIZE);
}
Token getOperator(char const *strBegin, char const *strEnd) {
  return getTokenInStrings(strBegin, strEnd, OPERATORS_STRINGS,
                           OPERATORS_TOKENS, OPERATORS_SIZE);
}