Pular para conteúdo

Criando um leitor de JSON

from dataclasses import dataclass
from string import ascii_letters, digits

type JSON = str | int | list["JSON"] | dict[str, "JSON"]

STRING_CHARS = {
    *ascii_letters,
    *digits,
    " ", "!", "#", "$", "%", "&", "'", "(", ")", "*", "_",
}

def read_json(source: str) -> JSON:
    parser = Parser(source)
    value = parser.value()
    if parser.position != len(source):
        raise ValueError("Extra data after JSON value")
    return value 

@dataclass
class Parser:
    source: str
    position: int = 0

    def value(self) -> JSON:
        self.ws()
        match self.peek():
            case '[':
                value = self.array()
            case '{':
                value = self.object()
            case "t":
                self.literal("true")
                value = True
            case "f":
                self.literal("false")
                value = False
            case "n":
                self.literal("null")
                value = None
            case '"':
                value = self.string()
            case c if c in "0123456789":
                value = self.number()
            case x:
                raise ValueError(f"Unexpected character: {x!r}")

        self.ws()
        return value

    def array(self) -> list[JSON]:
        self.read('[')
        self.ws()
        items = []

        # Lista vazia
        if self.peek() == ']':
            self.position += 1
            return items

        # Le o primeiro elemento
        value = self.value()
        items.append(value)

        # Le os elementos seguintes
        while self.peek() == ',':
            self.position += 1
            value = self.value()
            items.append(value)

        self.read(']')
        return items

    def object(self) -> dict[str, JSON]:
        self.read('{')
        self.ws()
        items = {}

        # Objeto vazio
        if self.peek() == '}':
            self.position += 1
            return items

        # Le o primeiro elemento
        self.ws()
        key = self.string()
        self.ws()
        self.read(':')
        value = self.value()
        items[key] = value

        # Le os elementos seguintes
        while self.peek() == ',':
            self.position += 1
            self.ws()
            key = self.string()
            self.ws()
            self.read(':')
            value = self.value()
            items[key] = value

        self.read('}')
        return items

    def string(self) -> str:
        chars = []
        self.read('"')
        while self.peek() in STRING_CHARS:
            chars.append(self.peek())
            self.position += 1
        self.read('"')
        return "".join(chars)

    def number(self) -> int:
        chars = []
        while self.peek() in "0123456789":
            chars.append(self.peek())
            self.position += 1
        text = "".join(chars)
        return int(text)

    def ws(self) -> None:
        while self.peek() in " \t\n\r":
            self.position += 1

    def peek(self) -> str:
        if self.position < len(self.source):
            return self.source[self.position]
        return "\0"

    def read(self, expected: str) -> None:
        if self.position >= len(self.source):
            raise ValueError("Unexpected end of input")
        if self.source[self.position] != expected:
            raise ValueError(f"Expected {expected!r}, got {self.source[self.position]!r}")
        self.position += 1

    def literal(self, text: str) -> None:
        if self.source.startswith(text, self.position):
            self.position += len(text)
        else:
            raise ValueError(f"Expected {text!r}")

if __name__ == "__main__":
    import sys
    from pprint import pprint

    with open(sys.argv[1]) as f:
        source = f.read()
    result = read_json(source)
    print(type(result))
    pprint(result)