/*
  ==============================================================================

   This file is part of the JUCE library - "Jules' Utility Class Extensions"
   Copyright 2004-11 by Raw Material Software Ltd.

  ------------------------------------------------------------------------------

   JUCE can be redistributed and/or modified under the terms of the GNU General
   Public License (Version 2), as published by the Free Software Foundation.
   A copy of the license is included in the JUCE distribution, or can be found
   online at www.gnu.org/licenses.

   JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
   A PARTICULAR PURPOSE.  See the GNU General Public License for more details.

  ------------------------------------------------------------------------------

   To release a closed-source product which uses JUCE, commercial licenses are
   available: visit www.rawmaterialsoftware.com/juce for more information.

  ==============================================================================
*/

namespace CppTokeniser
{
    enum TokenType
    {
        tokenType_error = 0,
        tokenType_comment,
        tokenType_keyword,
        tokenType_operator,
        tokenType_identifier,
        tokenType_integer,
        tokenType_float,
        tokenType_string,
        tokenType_bracket,
        tokenType_punctuation,
        tokenType_preprocessor
    };

    static bool isIdentifierStart (const juce_wchar c) noexcept
    {
        return CharacterFunctions::isLetter (c)
                || c == '_' || c == '@';
    }

    static bool isIdentifierBody (const juce_wchar c) noexcept
    {
        return CharacterFunctions::isLetterOrDigit (c)
                || c == '_' || c == '@';
    }

    static bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
    {
        static const char* const keywords2Char[] =
            { "if", "do", "or", "id", 0 };

        static const char* const keywords3Char[] =
            { "for", "int", "new", "try", "xor", "and", "asm", "not", 0 };

        static const char* const keywords4Char[] =
            { "bool", "void", "this", "true", "long", "else", "char",
              "enum", "case", "goto", "auto", 0 };

        static const char* const keywords5Char[] =
            {  "while", "bitor", "break", "catch", "class", "compl", "const", "false",
                "float", "short", "throw", "union", "using", "or_eq", 0 };

        static const char* const keywords6Char[] =
            { "return", "struct", "and_eq", "bitand", "delete", "double", "extern",
              "friend", "inline", "not_eq", "public", "sizeof", "static", "signed",
              "switch", "typeid", "wchar_t", "xor_eq", 0};

        static const char* const keywords7Char[] =
            { "default", "mutable", "private", "typedef", "nullptr", "virtual", 0 };

        static const char* const keywordsOther[] =
            { "noexcept", "const_cast", "continue", "explicit", "namespace",
              "operator", "protected", "register", "reinterpret_cast", "static_cast",
              "template", "typename", "unsigned", "volatile", "constexpr",
              "@implementation", "@interface", "@end", "@synthesize", "@dynamic", "@public",
              "@private", "@property", "@protected", "@class", 0 };

        const char* const* k;

        switch (tokenLength)
        {
            case 2:     k = keywords2Char; break;
            case 3:     k = keywords3Char; break;
            case 4:     k = keywords4Char; break;
            case 5:     k = keywords5Char; break;
            case 6:     k = keywords6Char; break;
            case 7:     k = keywords7Char; break;

            default:
                if (tokenLength < 2 || tokenLength > 16)
                    return false;

                k = keywordsOther;
                break;
        }

        for (int i = 0; k[i] != 0; ++i)
            if (token.compare (CharPointer_ASCII (k[i])) == 0)
                return true;

        return false;
    }

    static int parseIdentifier (CodeDocument::Iterator& source) noexcept
    {
        int tokenLength = 0;
        String::CharPointerType::CharType possibleIdentifier [100];
        String::CharPointerType possible (possibleIdentifier);

        while (isIdentifierBody (source.peekNextChar()))
        {
            const juce_wchar c = source.nextChar();

            if (tokenLength < 20)
                possible.write (c);

            ++tokenLength;
        }

        if (tokenLength > 1 && tokenLength <= 16)
        {
            possible.writeNull();

            if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
                return tokenType_keyword;
        }

        return tokenType_identifier;
    }

    static bool skipNumberSuffix (CodeDocument::Iterator& source)
    {
        const juce_wchar c = source.peekNextChar();
        if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
            source.skip();

        if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
            return false;

        return true;
    }

    static bool isHexDigit (const juce_wchar c) noexcept
    {
        return (c >= '0' && c <= '9')
                || (c >= 'a' && c <= 'f')
                || (c >= 'A' && c <= 'F');
    }

    static bool parseHexLiteral (CodeDocument::Iterator& source) noexcept
    {
        if (source.nextChar() != '0')
            return false;

        juce_wchar c = source.nextChar();
        if (c != 'x' && c != 'X')
            return false;

        int numDigits = 0;
        while (isHexDigit (source.peekNextChar()))
        {
            ++numDigits;
            source.skip();
        }

        if (numDigits == 0)
            return false;

        return skipNumberSuffix (source);
    }

    static bool isOctalDigit (const juce_wchar c) noexcept
    {
        return c >= '0' && c <= '7';
    }

    static bool parseOctalLiteral (CodeDocument::Iterator& source) noexcept
    {
        if (source.nextChar() != '0')
            return false;

        if (! isOctalDigit (source.nextChar()))
             return false;

        while (isOctalDigit (source.peekNextChar()))
            source.skip();

        return skipNumberSuffix (source);
    }

    static bool isDecimalDigit (const juce_wchar c) noexcept
    {
        return c >= '0' && c <= '9';
    }

    static bool parseDecimalLiteral (CodeDocument::Iterator& source) noexcept
    {
        int numChars = 0;
        while (isDecimalDigit (source.peekNextChar()))
        {
            ++numChars;
            source.skip();
        }

        if (numChars == 0)
            return false;

        return skipNumberSuffix (source);
    }

    static bool parseFloatLiteral (CodeDocument::Iterator& source) noexcept
    {
        int numDigits = 0;

        while (isDecimalDigit (source.peekNextChar()))
        {
            source.skip();
            ++numDigits;
        }

        const bool hasPoint = (source.peekNextChar() == '.');

        if (hasPoint)
        {
            source.skip();

            while (isDecimalDigit (source.peekNextChar()))
            {
                source.skip();
                ++numDigits;
            }
        }

        if (numDigits == 0)
            return false;

        juce_wchar c = source.peekNextChar();
        const bool hasExponent = (c == 'e' || c == 'E');

        if (hasExponent)
        {
            source.skip();

            c = source.peekNextChar();
            if (c == '+' || c == '-')
                source.skip();

            int numExpDigits = 0;
            while (isDecimalDigit (source.peekNextChar()))
            {
                source.skip();
                ++numExpDigits;
            }

            if (numExpDigits == 0)
                return false;
        }

        c = source.peekNextChar();
        if (c == 'f' || c == 'F')
            source.skip();
        else if (! (hasExponent || hasPoint))
            return false;

        return true;
    }

    static int parseNumber (CodeDocument::Iterator& source)
    {
        const CodeDocument::Iterator original (source);

        if (parseFloatLiteral (source))
            return tokenType_float;

        source = original;

        if (parseHexLiteral (source))
            return tokenType_integer;

        source = original;

        if (parseOctalLiteral (source))
            return tokenType_integer;

        source = original;

        if (parseDecimalLiteral (source))
            return tokenType_integer;

        source = original;
        return tokenType_error;
    }

    static void skipQuotedString (CodeDocument::Iterator& source) noexcept
    {
        const juce_wchar quote = source.nextChar();

        for (;;)
        {
            const juce_wchar c = source.nextChar();

            if (c == quote || c == 0)
                break;

            if (c == '\\')
                source.skip();
        }
    }

    static void skipComment (CodeDocument::Iterator& source) noexcept
    {
        bool lastWasStar = false;

        for (;;)
        {
            const juce_wchar c = source.nextChar();

            if (c == 0 || (c == '/' && lastWasStar))
                break;

            lastWasStar = (c == '*');
        }
    }

    static void skipPreprocessorLine (CodeDocument::Iterator& source) noexcept
    {
        bool lastWasBackslash = false;

        for (;;)
        {
            const juce_wchar c = source.peekNextChar();

            if (c == '"')
            {
                skipQuotedString (source);
                continue;
            }

            if (c == '/')
            {
                CodeDocument::Iterator next (source);
                next.skip();
                const juce_wchar c2 = next.peekNextChar();

                if (c2 == '/' || c2 == '*')
                    return;
            }

            if (c == 0)
                break;

            if (c == '\n' || c == '\r')
            {
                source.skipToEndOfLine();

                if (lastWasBackslash)
                    skipPreprocessorLine (source);

                break;
            }

            lastWasBackslash = (c == '\\');
            source.skip();
        }
    }

    static void skipIfNextCharMatches (CodeDocument::Iterator& source, const juce_wchar c) noexcept
    {
        if (source.peekNextChar() == c)
            source.skip();
    }

    static void skipIfNextCharMatches (CodeDocument::Iterator& source,
                                       const juce_wchar c1, const juce_wchar c2) noexcept
    {
        const juce_wchar c = source.peekNextChar();

        if (c == c1 || c == c2)
            source.skip();
    }
}

//==============================================================================
CPlusPlusCodeTokeniser::CPlusPlusCodeTokeniser() {}
CPlusPlusCodeTokeniser::~CPlusPlusCodeTokeniser() {}

int CPlusPlusCodeTokeniser::readNextToken (CodeDocument::Iterator& source)
{
    using namespace CppTokeniser;
    int result = tokenType_error;
    source.skipWhitespace();

    const juce_wchar firstChar = source.peekNextChar();

    switch (firstChar)
    {
    case 0:
        source.skip();
        break;

    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
    case '.':
        result = parseNumber (source);

        if (result == tokenType_error)
        {
            source.skip();

            if (firstChar == '.')
                result = tokenType_punctuation;
        }

        break;

    case ',':
    case ';':
    case ':':
        source.skip();
        result = tokenType_punctuation;
        break;

    case '(':
    case ')':
    case '{':
    case '}':
    case '[':
    case ']':
        source.skip();
        result = tokenType_bracket;
        break;

    case '"':
    case '\'':
        skipQuotedString (source);
        result = tokenType_string;
        break;

    case '+':
        result = tokenType_operator;
        source.skip();
        skipIfNextCharMatches (source, '+', '=');
        break;

    case '-':
        source.skip();
        result = parseNumber (source);

        if (result == tokenType_error)
        {
            result = tokenType_operator;
            skipIfNextCharMatches (source, '-', '=');
        }
        break;

    case '*':
    case '%':
    case '=':
    case '!':
        result = tokenType_operator;
        source.skip();
        skipIfNextCharMatches (source, '=');
        break;

    case '/':
        result = tokenType_operator;
        source.skip();

        if (source.peekNextChar() == '=')
        {
            source.skip();
        }
        else if (source.peekNextChar() == '/')
        {
            result = tokenType_comment;
            source.skipToEndOfLine();
        }
        else if (source.peekNextChar() == '*')
        {
            source.skip();
            result = tokenType_comment;
            skipComment (source);
        }

        break;

    case '?':
    case '~':
        source.skip();
        result = tokenType_operator;
        break;

    case '<':
    case '>':
    case '|':
    case '&':
    case '^':
        source.skip();
        result = tokenType_operator;
        skipIfNextCharMatches (source, firstChar);
        skipIfNextCharMatches (source, '=');
        break;

    case '#':
        result = tokenType_preprocessor;
        skipPreprocessorLine (source);
        break;

    default:
        if (isIdentifierStart (firstChar))
            result = parseIdentifier (source);
        else
            source.skip();

        break;
    }

    return result;
}

CodeEditorComponent::ColourScheme CPlusPlusCodeTokeniser::getDefaultColourScheme()
{
    struct Type
    {
        const char* name;
        uint32 colour;
    };

    const Type types[] =
    {
        { "Error",              0xffcc0000 },
        { "Comment",            0xff00aa00 },
        { "Keyword",            0xff0000cc },
        { "Operator",           0xff225500 },
        { "Identifier",         0xff000000 },
        { "Integer",            0xff880000 },
        { "Float",              0xff885500 },
        { "String",             0xff990099 },
        { "Bracket",            0xff000055 },
        { "Punctuation",        0xff004400 },
        { "Preprocessor Text",  0xff660000 }
    };

    CodeEditorComponent::ColourScheme cs;

    for (int i = 0; i < sizeof (types) / sizeof (types[0]); ++i)  // (NB: numElementsInArray doesn't work here in GCC4.2)
        cs.set (types[i].name, Colour (types[i].colour));

    return cs;
}

bool CPlusPlusCodeTokeniser::isReservedKeyword (const String& token) noexcept
{
    return CppTokeniser::isReservedKeyword (token.getCharPointer(), token.length());
}