﻿// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using System.Reflection;
using System.Text.RegularExpressions;

using Microsoft.Recognizers.Definitions;
using Microsoft.Recognizers.Text.Matcher;

namespace Microsoft.Recognizers.Text.Sequence
{
    public class BaseURLExtractor : BaseSequenceExtractor
    {

        private URLConfiguration config;

        public BaseURLExtractor(URLConfiguration config)
        {
            this.config = config;

            var regexes = new Dictionary<Regex, string>
            {
                {
                    config.UrlRegex,
                    Constants.URL_REGEX
                },
                {
                    config.IpUrlRegex,
                    Constants.URL_REGEX
                },
                {
                    new Regex(BaseURL.UrlRegex2, RegexOptions.Compiled),
                    Constants.URL_REGEX
                },
            };

            Regexes = regexes.ToImmutableDictionary();
            AmbiguousTimeTerm = new Regex(BaseURL.AmbiguousTimeTerm, RegexOptions.Compiled, RegexTimeOut);

            TldMatcher = new StringMatcher();
            TldMatcher.Init(BaseURL.TldList);
        }

        internal override ImmutableDictionary<Regex, string> Regexes { get; }

        protected static TimeSpan RegexTimeOut => SequenceRecognizer.GetTimeout(MethodBase.GetCurrentMethod().DeclaringType);

        protected sealed override string ExtractType { get; } = Constants.SYS_URL;

        private StringMatcher TldMatcher { get; }

        private Regex AmbiguousTimeTerm { get; }

        public override bool IsValidMatch(Match match)
        {
            var isValidTld = false;
            var isIPUrl = match.Groups["IPurl"].Success;

            if (!isIPUrl)
            {
                var tldString = match.Groups["Tld"].Value;
                var tldMatches = TldMatcher.Find(tldString);

                if (tldMatches.Any(o => o.Start == 0 && o.End == tldString.Length))
                {
                    isValidTld = true;
                }
            }

            // For cases like "7.am" or "8.pm" which are more likely time terms.
            if (AmbiguousTimeTerm.IsMatch(match.Value))
            {
                return false;
            }

            return isValidTld || isIPUrl;
        }
    }
}
