Source code for pycropml.transpiler.antlr_py.grammars.PythonLexerBase

import sys
from enum import Enum
from pkgutil import get_data
from typing import List, Optional, Dict, TextIO
from antlr4 import Lexer, Token, InputStream, Parser, TokenStream
from antlr4.Token import CommonToken


[docs]
class PythonLexerBase(Lexer):
    tab_size = 4

    def __init__(self, input_stream: InputStream, output: TextIO = sys.stdout):
        super().__init__(input_stream, output)

        self.__opened: int = 0
        self.__indents: List[int] = []
        self.__first_tokens_ind: int = 0
        self.__last_tokens_ind: int = 0
        self.__buffer: List[Optional[Token]] = [None for _ in range(32)]
        self.__last_token: Optional[Token] = None


[docs]
    def emitToken(self, token: Token) -> None:
        self._token = token

        if self.__buffer[self.__first_tokens_ind] is not None:
            self.__last_tokens_ind = self.__inc_token_ind(self.__last_tokens_ind)

            if self.__last_tokens_ind == self.__first_tokens_ind:
                # Enlarge buffer
                new_array: List[Optional[Token]] = [None for _ in range(len(self.__buffer) * 2)]
                dest_ind = len(new_array) - (len(self.__buffer) - self.__first_tokens_ind)

                new_array[0:self.__first_tokens_ind] = self.__buffer[0:self.__first_tokens_ind]
                new_array[dest_ind:dest_ind + len(self.__buffer) - self.__first_tokens_ind] = \
                    self.__buffer[self.__first_tokens_ind:len(self.__buffer)]

                self.__first_tokens_ind = dest_ind
                self.__buffer = new_array

        self.__buffer[self.__last_tokens_ind] = token
        self.__last_token = token



[docs]
    def nextToken(self) -> Token:
        # Check if the end-of-file is ahead and there are still some DEDENTS expected.
        if self._input.LA(1) == Token.EOF and self.__indents:
            if (self.__buffer[self.__last_tokens_ind] is not None or
                self.__buffer[self.__last_tokens_ind].type != self.LINE_BREAK):
                # First emit an extra line break that serves as the end of the statement.
                self.__emit_token_type(self.LINE_BREAK)

            # Now emit as much DEDENT tokens as needed.
            while self.__indents:
                self.__emit_token_type(self.DEDENT)
                self.__indents.pop()

        next_token: Token = super().nextToken()

        if self.__buffer[self.__first_tokens_ind] is None:
            return next_token

        result: Token = self.__buffer[self.__first_tokens_ind]
        self.__buffer[self.__first_tokens_ind] = None

        if self.__first_tokens_ind != self.__last_tokens_ind:
            self.__first_tokens_ind = self.__inc_token_ind(self.__first_tokens_ind)

        return result



[docs]
    def HandleNewLine(self) -> None:
        self.__emit_token_type_on_channel(self.NEWLINE, self.HIDDEN, self.text)

        c = self._input.LA(1)
        if c == -1 :
            return
        next_char: str = chr(c)

        # Process whitespaces in handle_spaces
        if next_char != ' ' and next_char != '\t' and self.__is_not_new_line_or_comment(next_char):
            self.__process_new_line(0)



[docs]
    def HandleSpaces(self) -> None:
        next_char: str = chr(self._input.LA(1))

        if ((self.__last_token is None or self.__last_token.type == self.NEWLINE) and
                self.__is_not_new_line_or_comment(next_char)):
            # Calculates the indentation of the provided spaces, taking the
            # following rules into account:
            #
            # "Tabs are replaced (from left to right) by one to eight spaces
            #  such that the total number of characters up to and including
            #  the replacement is a multiple of eight [...]"
            #
            #  -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation

            indent: int = 0

            for i in range(0, len(self.text)):
                indent += PythonLexerBase.tab_size - indent % PythonLexerBase.tab_size if self.text[i] == '\t' else 1

            self.__process_new_line(indent)

        self.__emit_token_type_on_channel(self.WS, self.HIDDEN, self.text)



[docs]
    def IncIndentLevel(self) -> None:
        self.__opened += 1



[docs]
    def DecIndentLevel(self) -> None:
        if self.__opened:
            self.__opened -= 1


    def __is_not_new_line_or_comment(self, next_char: str) -> bool:
        return (self.__opened == 0 and
                next_char != '\r' and
                next_char != '\n' and
                next_char != '\f' and
                next_char != '#')

    def __process_new_line(self, indent: int) -> None:
        self.__emit_token_type(self.LINE_BREAK)

        previous: int = 0 if not self.__indents else self.__indents[-1]

        if indent > previous:
            self.__indents.append(indent)
            self.__emit_token_type(self.INDENT)
        else:
            # Possibly emit more than 1 DEDENT token.
            while self.__indents and self.__indents[-1] > indent:
                self.__emit_token_type(self.DEDENT)
                self.__indents.pop()

    def __inc_token_ind(self, ind: int) -> int:
        return (ind + 1) % len(self.__buffer)

    def __emit_token_type(self, token_type: int) -> None:
        self.__emit_token_type_on_channel(token_type, self.DEFAULT_TOKEN_CHANNEL, "")

    def __emit_token_type_on_channel(self, token_type: int, channel: int, text: str) -> None:
        char_index: int = self.getCharIndex()
        token: CommonToken = CommonToken(
            self._tokenFactorySourcePair,
            token_type,
            channel,
            char_index - len(text),
            char_index)
        token.line = self.line
        token.column = self.column
        token.text = text

        self.emitToken(token)




[docs]
class PythonVersion(Enum):
    Autodetect = 0
    Python2 = 2
    Python3 = 3




[docs]
class PythonParserBase(Parser):
    def __init__(self, input_stream: TokenStream):
        super().__init__(input_stream)
        self.__version = PythonVersion.Autodetect

    @property
    def version(self) -> PythonVersion:
        return self.__version

    @version.setter
    def version(self, version):
        if isinstance(version, PythonVersion):
            self.__version = version
        else:
            self.__version = PythonVersion(version)

    def _check_version(self, version: int) -> bool:
        return self.__version == PythonVersion.Autodetect or version == self.__version.value


[docs]
    def set_version(self, required_version: int) -> None:
        self.__version = PythonVersion(required_version)