#!/usr/bin/env python
import argparse
import sys
import markdown_to_json
import json
import re
import requests
from overrides import language_overrides, voice_overrides

def read_input(source):
    if source == '-':
        return sys.stdin.read()
    else:
        with open(source, 'r') as file:
            return file.read()

def main():
    parser = argparse.ArgumentParser(description="Convert Piper project VOICES.md Markdown to a Pied JSON compatible data structures.")
    parser.add_argument('input', nargs='?', default='-', help="Input markdown file or '-' for stdin")
    parser.add_argument('output', nargs='?', default='-', help="Output dart file or '-' for stdout")
    args = parser.parse_args()

    input_text = read_input(args.input)
    output_filename = args.output

    voices = {}   
    languageCodes = {}   
 
    jsonified = json.loads(markdown_to_json.jsonify(input_text))
    json_output = json.dumps(jsonified, indent=4)

    for key in jsonified:
        print("==== Informational status ====")
        print("Languages:")
        for value in jsonified[key]:
            if type(value) is str:
                print("  {}".format(value))
                match = re.search(r'(\w+)\s+\(\`(\w+)\`,\s+(.+?)\)', value)
                if match:
                    languageCodes[match.group(2)] = "{} ({})".format(match.group(1), match.group(3))
                else:
                    match = re.search(r'(\w+)\s+\((\w+)\)', value)
                    if match:
                        languageCodes[match.group(2)] = "{} ({})".format(match.group(1), re.sub(r'\w+?_', "", match.group(2)))
                locale = match.group(2)
                if locale in language_overrides:
                    languageCodes[locale] = language_overrides[locale]
                language = languageCodes[locale]
                voices[language] = {}
            elif type(value) is list:
                print("    Voices:")
                for item in value:
                    if type(item) is str:
                        print("      {}".format(item))
                        voice = item
                    elif type(item) is list:
                        for line in item:
                            print("        {}".format(line))
                            match = re.search(r'(\w+)\s-\s\[\[model\]\((https://.+)\)\]\s+\[\[config\]\((https://.+)\)\]', line)
                            if match:
                                model_url = match.group(2)
                                match_filename = re.search(r'(https://.+)/([^/].+?)\?download=true', model_url)
                                base_url = match_filename.group(1)
                                model_filename = match_filename.group(2)
                                config_url = match.group(3)
                                r = requests.get(config_url)
                                remote_config = r.json()
                                sample_url = "{}{}".format(base_url, r"/samples/speaker_:speaker_id:.mp3?download=true")
                                card_url = "{}{}".format(base_url, "/MODEL_CARD?download=true")
                                quality = match.group(1)
                                if quality == "x_low":
                                    quality = "Very Low"
                                if voice in voice_overrides:
                                    voice = voice_overrides[voice]
                                else:
                                    voice = voice.replace("_", " ").title()
                                voices[language]["{} - {}".format(voice, quality.title())] = [
                                                                "{}".format(remote_config["audio"]["sample_rate"]),
                                                                locale,
                                                                model_url,
                                                                config_url,
                                                                model_filename,
                                                                sample_url,
                                                                card_url,
                                                                remote_config["speaker_id_map"]
                                                            ]
    output_data = "Map<String, Map<String, List<dynamic>>> voices = %s;\n\nMap<String, String> languageCodes =%s;" % (json.dumps(voices, indent=2), json.dumps(languageCodes, indent=2))
    if output_filename == "-":
        print("\nPied output:\n")
        print(output_data)
    else:
        f = open(output_filename, 'w')
        f.write(
"""// WARNING: This file is auto-generated by scripts/md-voices-to-map.py
// Any manual changes are likely to be overwritten.

// Format:
// Voice Name: [Sample Rate, Language Code, Model URL, Model Metadata URL, Filename, Sample URL, Model card, Speaker Mapping]

""")
        f.write(output_data)
        f.close()
        print("Written to:", output_filename)

if __name__ == "__main__":
    main()
