#!/usr/bin/env python3

"""
Minimize an input JSON file for frontend use.

Implementation is based on the JavaScript code at:
`preprint-similarity-search/frontend/public/data/filter-plot.html`

The output JSON file of this script is slightly larger than the one
generated by the above HTML.  As an example, when the input file is
2021-04-26 auto_updater `pmc_sqaure_plot.json`, the output size of this
script is *1,215,003* bytes, and the output size of HTML implementation
is *1,214,718* bytes. Part of the differences is due to the different
format of floating point values (`-6.36783e-05` vs. `-0.0000636783`).

But after both output files are "prettified" by `jq` command, it's
confirmed that they have identical contents.
"""

import json
import sys

from utils import set_read_only


def minimize_entry(input_entry):
    """Convert an input dict and return the new dict."""

    output_entry = dict()

    output_entry['x'] = round(input_entry['x'], 2)
    output_entry['y'] = round(input_entry['y'], 2)

    output_entry['count'] = input_entry['count']

    # 'journals' field in output_entry: only keep top 5
    input_journals = input_entry['journal']
    output_journals = [
        {'name': k, 'count': v} for k, v in input_journals.items()
    ]

    output_journals.sort(key=lambda x: x['count'], reverse=True)
    output_entry['journals'] = output_journals[:5]

    # 'pcs' field in output_entry
    input_pc = input_entry['pc']
    output_pc = [
        {'name': x['pc'], 'score': x['score']} for x in input_pc
    ]
    output_pc.sort(key=lambda x: abs(x['score']), reverse=True)
    output_entry['pcs'] = output_pc

    # 'lemmas' field in output_entry: only keep top 10
    input_lemmas = input_entry['bin_odds']
    output_lemmas = [
        {'name': x['lemma'], 'score': x['odds_ratio']} for x in input_lemmas
    ]
    output_lemmas.sort(key=lambda x: abs(x['score']), reverse=True)
    output_entry['lemmas'] = output_lemmas[:10]

    return output_entry


def minimize_json(input_filename, output_filename):
    """
    Convert an input JSON object in `input_filename` and save the new
    JSON object in `output_filename`.
    """

    with open(input_filename) as ifh:
        input_obj = json.load(ifh)

    output_obj = list(map(minimize_entry, input_obj))
    output_obj.sort(
        key=lambda entry: (entry['y'], entry['x'])
    )

    with open(output_filename, 'w') as ofh:
        json.dump(output_obj, ofh, separators=(',', ':'))

    set_read_only(output_filename)  # set output file read-only


# Test harness
if __name__ == '__main__':
    if len(sys.argv) != 3:
        print(f"Usage: {sys.argv[0]} [input_json_filename] [output_json_filename]")
        sys.exit()

    input_filename = sys.argv[1]
    output_filename = sys.argv[2]

    minimize_json(input_filename, output_filename)
