from hazm import tree2brackets


class TestChunker:

    def test_parse(self: "TestChunker", chunker):
        actual = str(chunker.parse([("نامه", "NOUN,EZ"), ("ایشان", "PRON"), ("را", "ADP"), ("دریافت", "NOUN"), ("داشتم", "VERB"), (".", "PUNCT")]))
        expected = "(S\n  (NP نامه/NOUN,EZ ایشان/PRON)\n  (POSTP را/ADP)\n  (VP دریافت/NOUN داشتم/VERB)\n  ./PUNCT)"
        assert actual == expected

    def test_data_maker(self: "TestChunker", chunker):
        actual = chunker.data_maker(tokens = [[("من", "PRON"), ("به", "ADP"), ("مدرسه", "NOUN,EZ"), ("ایران", "NOUN"), ("رفته_بودم", "VERB"), (".", "PUNCT")]])
        expected = [[{"word": "من", "is_first": True, "is_last": False, "prefix-1": "م", "prefix-2": "من", "prefix-3": "من", "suffix-1": "ن", "suffix-2": "من", "suffix-3": "من", "prev_word": "", "two_prev_word": "", "next_word": "به", "two_next_word": "مدرسه", "is_numeric": False, "prev_is_numeric": "", "next_is_numeric": False, "is_punc": False, "prev_is_punc": "", "next_is_punc": False, "pos": "PRON", "prev_pos": "", "next_pos": "ADP"}, {"word": "به", "is_first": False, "is_last": False, "prefix-1": "ب", "prefix-2": "به", "prefix-3": "به", "suffix-1": "ه", "suffix-2": "به", "suffix-3": "به", "prev_word": "من", "two_prev_word": ".", "next_word": "مدرسه", "two_next_word": "ایران", "is_numeric": False, "prev_is_numeric": False, "next_is_numeric": False, "is_punc": False, "prev_is_punc": False, "next_is_punc": False, "pos": "ADP", "prev_pos": "PRON", "next_pos": "NOUN,EZ"}, {"word": "مدرسه", "is_first": False, "is_last": False, "prefix-1": "م", "prefix-2": "مد", "prefix-3": "مدر", "suffix-1": "ه", "suffix-2": "سه", "suffix-3": "رسه", "prev_word": "به", "two_prev_word": "من", "next_word": "ایران", "two_next_word": "رفته_بودم", "is_numeric": False, "prev_is_numeric": False, "next_is_numeric": False, "is_punc": False, "prev_is_punc": False, "next_is_punc": False, "pos": "NOUN,EZ", "prev_pos": "ADP", "next_pos": "NOUN"}, {"word": "ایران", "is_first": False, "is_last": False, "prefix-1": "ا", "prefix-2": "ای", "prefix-3": "ایر", "suffix-1": "ن", "suffix-2": "ان", "suffix-3": "ران", "prev_word": "مدرسه", "two_prev_word": "به", "next_word": "رفته_بودم", "two_next_word": ".", "is_numeric": False, "prev_is_numeric": False, "next_is_numeric": False, "is_punc": False, "prev_is_punc": False, "next_is_punc": False, "pos": "NOUN", "prev_pos": "NOUN,EZ", "next_pos": "VERB"}, {"word": "رفته_بودم", "is_first": False, "is_last": False, "prefix-1": "ر", "prefix-2": "رف", "prefix-3": "رفت", "suffix-1": "م", "suffix-2": "دم", "suffix-3": "ودم", "prev_word": "ایران", "two_prev_word": "مدرسه", "next_word": ".", "two_next_word": "", "is_numeric": False, "prev_is_numeric": False, "next_is_numeric": False, "is_punc": False, "prev_is_punc": False, "next_is_punc": True, "pos": "VERB", "prev_pos": "NOUN", "next_pos": "PUNCT"}, {"word": ".", "is_first": False, "is_last": True, "prefix-1": ".", "prefix-2": ".", "prefix-3": ".", "suffix-1": ".", "suffix-2": ".", "suffix-3": ".", "prev_word": "رفته_بودم", "two_prev_word": "ایران", "next_word": "", "two_next_word": "", "is_numeric": False, "prev_is_numeric": False, "next_is_numeric": "", "is_punc": True, "prev_is_punc": False, "next_is_punc": "", "pos": "PUNCT", "prev_pos": "VERB", "next_pos": ""}]]
        assert actual == expected

    def test_evaluate(self: "TestChunker", chunker):
        trees = list(chunker.parse_sents([[("نامه", "NOUN,EZ"), ("ایشان", "PRON"), ("را", "ADP"), ("دریافت", "NOUN"), ("داشتم", "VERB"), (".", "PUNCT")]]))
        actual = chunker.evaluate(trees)
        expected = 1.0
        assert actual == expected

class TestRuleBasedChunker:

    def test_parse(self: "TestRuleBasedChunker", rull_based_chunker):
        actual = tree2brackets(rull_based_chunker.parse([("نامه", "Ne"), ("۱۰", "NUMe"), ("فوریه", "Ne"), ("شما", "PRO"), ("را", "POSTP"), ("دریافت", "N"), ("داشتم", "V"), (".", "PUNC")]))
        expected = "[نامه ۱۰ فوریه شما NP] [را POSTP] [دریافت داشتم VP] ."
        assert actual == expected

