"""
Context: We are working with a customer that lets us download internal Ad reports in json format. The format is a dictionary (key-value data) with the following properties.

Keys:
* The dictionary keys are alphanumeric strings.
* The maximum length of a single key is 10 alphanumeric characters, the minimum is 1 character (there are no empty keys).
* All keys are unique.

Values:
* Every value is a list of unsigned 32-bit integers.
* The maximum length of the value list is 5 items.

The customer’s server always returns valid json.

Example:
{
  “a”: [],
  “abc123”: [1,2,3,4,5],
  “xyz”: [4294967295, 0]
}

Q1: Let’s download the report with HTTP GET “<url_1>/report.json” and save it into a database.

Q2: We’ve signed a contract with another customer. They are
willing to provide us with the same kind of reporting in the same
format. Let’s download it with HTTP GET “<url_2>/report.json”. We
also don’t want to spend time waiting for data, so let's download
both reports at the same time next time we need them.

Q3: We’ve signed a contract with a big new customer! They agreed
to provide us with similar ad reports in the same format. They also
expect us to process and analyse the entire historical dataset they
have. The format is the same as the first two customers’ reports.
The new customer says that the dictionary for download contains
10 Billion (B!) unique attributes (keys), and every value is a list
of unsigned integers, like in the previous reports, with the same max
lengths.  Let’s download it via HTTP GET “<url_3>/report.json” and save
it into the DB. The report endpoint is not paginated.

Q4: The client says that next time we should skip ingesting all
keys that start with the symbol “a”.
"""

import aiorequests
import asyncio
import re

async def generate_key_values_from_json_stream_take3(url) -> Generator[tuple[str,list[int]], None, None]:
    """ """
    async def text_parser(character):
        acummulator = []
        if character == "\"":
            yield "".join(acummulator), len(acummulator)
            return None


    async for text_chunk in aiorequests.stream_get(url, size_bytes=1000):
        text_mode = False
        list_mode = False
        text_acummulator = []
        list_acummulator = []
        character = text_chunk[0]
        while character:
            if character "\"":
                yield from text_parser(character)

        for character in text_chunk:
            if character in "{} ":
                continue
            if character == "\"" and not text_mode:
                text_mode = True
            elif text_mode and character != "\"":
                text_acummulator.append(character)
            elif text_mode and character == "\"":
                text_mode = False
            if character == "[" and not list_mode:
                list_mode = True
            elif list_mode and character != "[":
                list_acummulator.append(character)
            elif list_mode and character == "]":
                list_mode = False
                yield ("".join(text_acummulator), [int(x) for x in list_acummulator.split(",")])
                text_acummulator = []
                list_acummulator = []
            
            

async def generate_key_values_from_json_stream_take2(url, key_filter_predicate:Callable) -> Generator[tuple[str,list[int]], None, None]:
    async for text_chunk in aiorequests.stream_get(url, size_bytes=1000):
        text_mode = False
        list_mode = False
        text_acummulator = []
        list_acummulator = []
        for character in text_chunk:
            if character in "{} ":
                continue
            if character == "\"" and not text_mode:
                text_mode = True
            elif text_mode and character != "\"":
                text_acummulator.append(character)
            elif text_mode and character == "\"":
                text_mode = False
            if character == "[" and not list_mode:
                list_mode = True
            elif list_mode and character != "[":
                list_acummulator.append(character)
            elif list_mode and character == "]":
                list_mode = False
                key, value = ("".join(text_acummulator), [int(x) for x in list_acummulator.split(",")])
                if key_filter_predicate(key):
                    yield key, value
                text_acummulator = []
                list_acummulator = []
            
            




    content = await response.json()
    return content

async def generate_key_values_from_json_stream(url) -> Generator[tuple[str,list[int]], None, None]:
    async for text_chunk in aiorequests.stream_get(url, size_bytes=1000):
        lines = text_chunk.split("],")
        # strip {} and newlines
        # pass the last chunk to next iteration
        for line in lines:
            match = re.match("\"([A-z]*)\": \[(.*)\]")
            if not match:

            _, key, value =  match.groups()
            int_values = [int(x) for x in value.split(",")]


    content = await response.json()
    return content

async def retrieve_json(url):
    response = aiorequests.get(url)
    content = await response.json()
    return content


async def main():
    url1="https://whatever.com/report.json"
    url2="https://whatever.com/report.json"
    await asyncio.gather([retrieve_json(url1), retrieve_json(url2)])


if __name__ == "__main__":
    asyncio.run(main())
