﻿using HtmlAgilityPack;
using LMKit.Model;
using LMKit.TextGeneration;
using LMKit.TextGeneration.Sampling;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;

namespace web_content_info_extractor_to_json
{
    internal class Program
    {
        static readonly string DEFAULT_LLAMA3_1_8B_MODEL_PATH = @"https://huggingface.co/lm-kit/llama-3.1-8b-instruct-gguf/resolve/main/Llama-3.1-8B-Instruct-Q4_K_M.gguf?download=true";
        static readonly string DEFAULT_GEMMA2_9B_MODEL_PATH = @"https://huggingface.co/lm-kit/gemma-2-9b-gguf/resolve/main/gemma-2-9B-Q4_K_M.gguf?download=true";
        static readonly string DEFAULT_PHI3_5_MINI_3_8B_MODEL_PATH = @"https://huggingface.co/lm-kit/phi-3.5-mini-3.8b-instruct-gguf/resolve/main/Phi-3.5-mini-Instruct-Q4_K_M.gguf?download=true";
        static readonly string DEFAULT_QWEN2_5_7B_MODEL_PATH = @"https://huggingface.co/lm-kit/qwen-2.5-7b-instruct-gguf/resolve/main/Qwen-2.5-7B-Instruct-Q4_K_M.gguf?download=true";
        static readonly string DEFAULT_QWEN2_5_05B_MODEL_PATH = @"https://huggingface.co/lm-kit/qwen-2.5-0.5b-instruct-gguf/resolve/main/Qwen-2.5-0.5B-Instruct-Q4_K_M.gguf?download=true";
        static readonly string DEFAULT_MISTRAL_NEMO_12_2B_MODEL_PATH = @"https://huggingface.co/lm-kit/mistral-nemo-2407-12.2b-instruct-gguf/resolve/main/Mistral-Nemo-2407-12.2B-Instruct-Q4_K_M.gguf?download=true";
        static readonly string DEFAULT_LLAMA_3_2_1B_MODEL_PATH = @"https://huggingface.co/lm-kit/llama-3.2-1b-instruct.gguf/resolve/main/Llama-3.2-1B-Instruct-Q4_K_M.gguf?download=true";
        static bool _isDownloading;

        private static bool ModelDownloadingProgress(string path, long? contentLength, long bytesRead)
        {
            _isDownloading = true;
            if (contentLength.HasValue)
            {
                double progressPercentage = Math.Round((double)bytesRead / contentLength.Value * 100, 2);
                Console.Write($"\rDownloading model {progressPercentage:0.00}%");
            }
            else
            {
                Console.Write($"\rDownloading model {bytesRead} bytes");
            }

            return true;
        }

        private static bool ModelLoadingProgress(float progress)
        {
            if (_isDownloading)
            {
                Console.Clear();
                _isDownloading = false;
            }

            Console.Write($"\rLoading model {Math.Round(progress * 100)}%");

            return true;
        }

        private static void Main(string[] args)
        {
            LMKit.Licensing.LicenseManager.SetLicenseKey(""); //set an optional license key here if available.
            Console.InputEncoding = Encoding.UTF8;
            Console.OutputEncoding = Encoding.UTF8;

            Console.Clear();

            WriteColor("*******************************************************************************************************\n" +
                       "* In this demo, we are extracting and summarizing web content into a JSON formatted output.           *\n" +
                       "* For each provided web page URI, the agent will output the following information, formatted in JSON: *\n" +
                       "* - 'Primary Topic': The main subject or theme of the content.                                        *\n" +
                       "* - 'Domain or Field': The area of knowledge or industry the content belongs to.                      *\n" +
                       "* - 'Language': The language in which the content is written.                                         *\n" +
                       "* - 'Audience': The intended or target audience for the content.                                      *\n" +
                       "*******************************************************************************************************\n", ConsoleColor.Blue);

            Console.WriteLine("Please select the model you want to use:\n");
            Console.WriteLine("0 - Mistral Nemo 2407 12.2B (requires approximately 7.7 GB of VRAM)");
            Console.WriteLine("1 - Meta Llama 3.1 8B (requires approximately 6 GB of VRAM)");
            Console.WriteLine("2 - Google Gemma2 9B Medium (requires approximately 7 GB of VRAM)");
            Console.WriteLine("3 - Microsoft Phi-3.5 3.82B Mini (requires approximately 3.3 GB of VRAM)");
            Console.WriteLine("4 - Alibaba Qwen-2.5 7.6B (requires approximately 5.6 GB of VRAM)");
            Console.WriteLine("5 - Alibaba Qwen-2.5 0.5.6B (requires approximately 0.8 GB of VRAM)");
            Console.WriteLine("6 - Meta Llama 3.2 1B (requires approximately 1 GB of VRAM)");
            Console.Write("Other entry: A custom model URI\n\n> ");

            string input = Console.ReadLine();
            string modelLink;

            switch (input.Trim())
            {
                case "0":
                    modelLink = DEFAULT_MISTRAL_NEMO_12_2B_MODEL_PATH;
                    break;
                case "1":
                    modelLink = DEFAULT_LLAMA3_1_8B_MODEL_PATH;
                    break;
                case "2":
                    modelLink = DEFAULT_GEMMA2_9B_MODEL_PATH;
                    break;
                case "3":
                    modelLink = DEFAULT_PHI3_5_MINI_3_8B_MODEL_PATH;
                    break;
                case "4":
                    modelLink = DEFAULT_QWEN2_5_7B_MODEL_PATH;
                    break;
                case "5":
                    modelLink = DEFAULT_QWEN2_5_05B_MODEL_PATH;
                    break;
                case "6":
                    modelLink = DEFAULT_LLAMA_3_2_1B_MODEL_PATH;
                    break;
                default:
                    modelLink = input.Trim().Trim('"'); ;
                    break;
            }

            //Loading model
            Uri modelUri = new Uri(modelLink);
            LLM model = new LLM(modelUri,
                                    downloadingProgress: ModelDownloadingProgress,
                                    loadingProgress: ModelLoadingProgress);


            Console.Clear();

            SingleTurnConversation chat = new SingleTurnConversation(model)
            {
                MaximumCompletionTokens = 256,
                MaximumInputTokens = model.GpuLayerCount > 0 ? 3840 : 1024,
                SamplingMode = new GreedyDecoding(),
                SystemPrompt = @"You are an expert in extracting and summarizing web content. When provided with the content of a web page, respond with a JSON formatted output that always and only includes the following fields:

'Primary Topic': The main subject or theme of the content.
'Domain or Field': The area of knowledge or industry the content belongs to.
'Language': The language in which the content is written.
'Audience': The intended or target audience for the content."
            };


            chat.Grammar = Grammar.CreateJsonGrammarFromTextFields(new string[] { "Primary Topic", "Domain or Field", "Language", "Audience" });

            chat.AfterTextCompletion += Chat_AfterTextCompletion;

            while (true)
            {
                WriteColor($"\nEnter webpage page URI to be analyzed: ", ConsoleColor.Green, addNL: false);

                string uri = Console.ReadLine();

                if (string.IsNullOrWhiteSpace(uri))
                {
                    break;
                }
                else if (uri.StartsWith("www."))
                {
                    uri = "https://" + uri;
                }

                if (!Uri.IsWellFormedUriString(uri, UriKind.Absolute))
                {
                    Console.Write($"\nThe provided URI is not correctly formatted.");
                    continue;
                }

                try
                {
                    string pageText = ExtractHtmlText(DownloadContent(new Uri(uri)));

                    WriteColor("Assistant: ", ConsoleColor.Green);

                    TextGenerationResult result = chat.Submit(pageText, new CancellationTokenSource(TimeSpan.FromMinutes(2)).Token);

                    Console.Write($"\n(gen. tokens: {result.GeneratedTokens.Count} - stop reason: {result.TerminationReason} - quality score: {Math.Round(result.QualityScore, 2)} - speed: {Math.Round(result.TokenGenerationRate, 2)} tok/s - ctx usage: {result.ContextTokens.Count}/{result.ContextSize})");
                }
                catch (Exception e)
                {
                    WriteColor("Error: " + e.Message, ConsoleColor.Red);
                }
            }

            Console.WriteLine("The chat ended. Press any key to exit the application.");
            _ = Console.ReadKey();
        }

        private static void Chat_AfterTextCompletion(object sender, LMKit.TextGeneration.Events.AfterTextCompletionEventArgs e)
        {
            Console.Write(e.Text);
        }

        private static string NormalizeSpacings(string text)
        {
            text = new Regex("[ ]{2,}", RegexOptions.None).Replace(text.Replace("\t", ""), " ").Replace("\r\n", "\n").Replace("\n ", "\n").Trim();
            text = Regex.Replace(text, "(\\n){2,}", "\n", RegexOptions.IgnoreCase);

            return text;
        }

        private static string ExtractHtmlText(string html)
        {//note Loïc: while this solution may not be optimal, it appears to be effective for the task at hand.
            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(html);
            StringBuilder result = new StringBuilder();

            IEnumerable<HtmlNode> nodes = doc.DocumentNode.Descendants().Where(n =>
                                          n.NodeType == HtmlNodeType.Text &&
                                          n.ParentNode.Name != "script" &&
                                          n.ParentNode.Name != "style");

            foreach (HtmlNode node in nodes)
            {
                result.Append(node.InnerText);
            }

            return NormalizeSpacings(result.ToString());
        }

        private static string DownloadContent(Uri uri)
        {
            using (var client = new HttpClient())
            {
                client.DefaultRequestHeaders.Add("User-Agent", "Other");

                string content = client.GetStringAsync(uri).Result;

                if (string.IsNullOrWhiteSpace(content))
                {
                    throw new Exception("an empty response has been received from: " + uri.AbsoluteUri);
                }

                return content;
            }
        }

        private static void WriteColor(string text, ConsoleColor color, bool addNL = true)
        {
            Console.ForegroundColor = color;
            if (addNL)
            {
                Console.WriteLine(text);
            }
            else
            {
                Console.Write(text);
            }

            Console.ResetColor();
        }
    }
}