Source code for pycropml.transpiler.antlr_py.grammars.PythonLexerBase

import sys
from enum import Enum
from pkgutil import get_data
from typing import List, Optional, Dict, TextIO
from antlr4 import Lexer, Token, InputStream, Parser, TokenStream
from antlr4.Token import CommonToken

[docs] class PythonLexerBase(Lexer): tab_size = 4 def __init__(self, input_stream: InputStream, output: TextIO = sys.stdout): super().__init__(input_stream, output) self.__opened: int = 0 self.__indents: List[int] = [] self.__first_tokens_ind: int = 0 self.__last_tokens_ind: int = 0 self.__buffer: List[Optional[Token]] = [None for _ in range(32)] self.__last_token: Optional[Token] = None
[docs] def emitToken(self, token: Token) -> None: self._token = token if self.__buffer[self.__first_tokens_ind] is not None: self.__last_tokens_ind = self.__inc_token_ind(self.__last_tokens_ind) if self.__last_tokens_ind == self.__first_tokens_ind: # Enlarge buffer new_array: List[Optional[Token]] = [None for _ in range(len(self.__buffer) * 2)] dest_ind = len(new_array) - (len(self.__buffer) - self.__first_tokens_ind) new_array[0:self.__first_tokens_ind] = self.__buffer[0:self.__first_tokens_ind] new_array[dest_ind:dest_ind + len(self.__buffer) - self.__first_tokens_ind] = \ self.__buffer[self.__first_tokens_ind:len(self.__buffer)] self.__first_tokens_ind = dest_ind self.__buffer = new_array self.__buffer[self.__last_tokens_ind] = token self.__last_token = token
[docs] def nextToken(self) -> Token: # Check if the end-of-file is ahead and there are still some DEDENTS expected. if self._input.LA(1) == Token.EOF and self.__indents: if (self.__buffer[self.__last_tokens_ind] is not None or self.__buffer[self.__last_tokens_ind].type != self.LINE_BREAK): # First emit an extra line break that serves as the end of the statement. self.__emit_token_type(self.LINE_BREAK) # Now emit as much DEDENT tokens as needed. while self.__indents: self.__emit_token_type(self.DEDENT) self.__indents.pop() next_token: Token = super().nextToken() if self.__buffer[self.__first_tokens_ind] is None: return next_token result: Token = self.__buffer[self.__first_tokens_ind] self.__buffer[self.__first_tokens_ind] = None if self.__first_tokens_ind != self.__last_tokens_ind: self.__first_tokens_ind = self.__inc_token_ind(self.__first_tokens_ind) return result
[docs] def HandleNewLine(self) -> None: self.__emit_token_type_on_channel(self.NEWLINE, self.HIDDEN, self.text) c = self._input.LA(1) if c == -1 : return next_char: str = chr(c) # Process whitespaces in handle_spaces if next_char != ' ' and next_char != '\t' and self.__is_not_new_line_or_comment(next_char): self.__process_new_line(0)
[docs] def HandleSpaces(self) -> None: next_char: str = chr(self._input.LA(1)) if ((self.__last_token is None or self.__last_token.type == self.NEWLINE) and self.__is_not_new_line_or_comment(next_char)): # Calculates the indentation of the provided spaces, taking the # following rules into account: # # "Tabs are replaced (from left to right) by one to eight spaces # such that the total number of characters up to and including # the replacement is a multiple of eight [...]" # # -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation indent: int = 0 for i in range(0, len(self.text)): indent += PythonLexerBase.tab_size - indent % PythonLexerBase.tab_size if self.text[i] == '\t' else 1 self.__process_new_line(indent) self.__emit_token_type_on_channel(self.WS, self.HIDDEN, self.text)
[docs] def IncIndentLevel(self) -> None: self.__opened += 1
[docs] def DecIndentLevel(self) -> None: if self.__opened: self.__opened -= 1
def __is_not_new_line_or_comment(self, next_char: str) -> bool: return (self.__opened == 0 and next_char != '\r' and next_char != '\n' and next_char != '\f' and next_char != '#') def __process_new_line(self, indent: int) -> None: self.__emit_token_type(self.LINE_BREAK) previous: int = 0 if not self.__indents else self.__indents[-1] if indent > previous: self.__indents.append(indent) self.__emit_token_type(self.INDENT) else: # Possibly emit more than 1 DEDENT token. while self.__indents and self.__indents[-1] > indent: self.__emit_token_type(self.DEDENT) self.__indents.pop() def __inc_token_ind(self, ind: int) -> int: return (ind + 1) % len(self.__buffer) def __emit_token_type(self, token_type: int) -> None: self.__emit_token_type_on_channel(token_type, self.DEFAULT_TOKEN_CHANNEL, "") def __emit_token_type_on_channel(self, token_type: int, channel: int, text: str) -> None: char_index: int = self.getCharIndex() token: CommonToken = CommonToken( self._tokenFactorySourcePair, token_type, channel, char_index - len(text), char_index) token.line = self.line token.column = self.column token.text = text self.emitToken(token)
[docs] class PythonVersion(Enum): Autodetect = 0 Python2 = 2 Python3 = 3
[docs] class PythonParserBase(Parser): def __init__(self, input_stream: TokenStream): super().__init__(input_stream) self.__version = PythonVersion.Autodetect @property def version(self) -> PythonVersion: return self.__version @version.setter def version(self, version): if isinstance(version, PythonVersion): self.__version = version else: self.__version = PythonVersion(version) def _check_version(self, version: int) -> bool: return self.__version == PythonVersion.Autodetect or version == self.__version.value
[docs] def set_version(self, required_version: int) -> None: self.__version = PythonVersion(required_version)