///////////////////////////////////////////////////////////////////////////////
//Copyright (C) 2012 Assaf Urieli
//
//This file is part of Jochre.
//
//Jochre is free software: you can redistribute it and/or modify
//it under the terms of the GNU Affero General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//
//Jochre is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//GNU Affero General Public License for more details.
//
//You should have received a copy of the GNU Affero General Public License
//along with Jochre.  If not, see <http://www.gnu.org/licenses/>.
//////////////////////////////////////////////////////////////////////////////
package com.joliciel.jochre.lexicon;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Scanner;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.joliciel.jochre.utils.JochreException;

/**
 * Constructs a lexicon from a tab-separated text file resource, organised as:
 * word tab frequency. If there is no tab and frequency, the word will be
 * assumed to have a frequency of 1. Lines starting with a # will be ignored.
 * 
 * @author Assaf Urieli
 *
 */
public class TextFileLexicon implements Lexicon, Serializable {

  private static final long serialVersionUID = 1278484873657866572L;
  private static final Logger LOG = LoggerFactory.getLogger(TextFileLexicon.class);
  private Map<String, Integer> entries = new HashMap<>();

  public TextFileLexicon() {
  }

  public TextFileLexicon(Map<String, Integer> entries) {
    this.entries = entries;
  }

  public TextFileLexicon(File textFile) {
    this(textFile, Charset.defaultCharset());
  }

  public TextFileLexicon(File textFile, Charset charset) {
    Scanner scanner;
    try {
      scanner = new Scanner(new BufferedReader(new InputStreamReader(new FileInputStream(textFile), charset)));

      try {
        while (scanner.hasNextLine()) {
          String line = scanner.nextLine();
          if (!line.startsWith("#")) {
            String[] parts = line.split("\t");

            if (parts.length > 0) {
              String word = parts[0];
              int frequency = 1;
              if (parts.length > 1)
                frequency = Integer.parseInt(parts[1]);
              entries.put(word, frequency);
            }

          }

        }
      } finally {
        scanner.close();
      }
    } catch (IOException e) {
      throw new JochreException(e);
    }
  }

  public void writeFile(Writer writer) {
    for (Entry<String, Integer> entry : entries.entrySet()) {
      try {
        writer.write(entry.getKey() + "\t" + entry.getValue() + "\n");
      } catch (IOException e) {
        throw new JochreException(e);
      }
    }
  }

  public void incrementEntry(String word) {
    Integer freqObj = entries.get(word);
    if (freqObj == null)
      entries.put(word, 1);
    else
      entries.put(word, freqObj.intValue() + 1);
  }

  public void setEntry(String word, int frequency) {
    entries.put(word, frequency);
  }

  @Override
  public int getFrequency(String word) {
    Integer freqObj = entries.get(word);
    if (freqObj != null)
      return freqObj.intValue();
    else
      return 0;
  }

  public void serialize(File memoryBaseFile) {
    LOG.debug("serialize");
    boolean isZip = false;
    if (memoryBaseFile.getName().endsWith(".zip"))
      isZip = true;

    FileOutputStream fos = null;
    ObjectOutputStream out = null;
    ZipOutputStream zos = null;
    try {
      fos = new FileOutputStream(memoryBaseFile);
      if (isZip) {
        zos = new ZipOutputStream(fos);
        zos.putNextEntry(new ZipEntry("lexicon.obj"));
        out = new ObjectOutputStream(zos);
      } else {
        out = new ObjectOutputStream(fos);
      }

      try {
        out.writeObject(this);
      } finally {
        out.flush();
        out.close();
      }
    } catch (IOException ioe) {
      throw new JochreException(ioe);
    }
  }

  public static Lexicon deserialize(ZipInputStream zis) {
    List<Lexicon> lexicons = new ArrayList<>();
    try {
      ZipEntry zipEntry;
      while ((zipEntry = zis.getNextEntry()) != null) {
        LOG.debug("Scanning zip entry " + zipEntry.getName());
        if (zipEntry.getName().endsWith(".zip")) {
          ZipInputStream innerZis = new ZipInputStream(zis);
          Lexicon lexicon = TextFileLexicon.deserialize(innerZis);
          lexicons.add(lexicon);
        } else {
          ObjectInputStream in = new ObjectInputStream(zis);
          Lexicon lexicon = (TextFileLexicon) in.readObject();
          lexicons.add(lexicon);
        }
        zis.closeEntry();
      }
    } catch (IOException ioe) {
      throw new JochreException(ioe);
    } catch (ClassNotFoundException cnfe) {
      throw new JochreException(cnfe);
    }

    if (lexicons.size() == 1)
      return lexicons.get(0);
    LexiconMerger lexiconMerger = new LexiconMerger();
    for (Lexicon lexicon : lexicons)
      lexiconMerger.addLexicon(lexicon);
    return lexiconMerger;
  }

  public static Lexicon deserialize(File memoryBaseFile) {
    LOG.debug("deserializeMemoryBase");
    boolean isZip = false;
    if (memoryBaseFile.getName().endsWith(".zip"))
      isZip = true;

    Lexicon memoryBase = null;
    ZipInputStream zis = null;
    FileInputStream fis = null;
    ObjectInputStream in = null;

    try {
      fis = new FileInputStream(memoryBaseFile);
      if (isZip) {
        zis = new ZipInputStream(fis);
        memoryBase = TextFileLexicon.deserialize(zis);
      } else {
        in = new ObjectInputStream(fis);
        try {
          memoryBase = (TextFileLexicon) in.readObject();
        } finally {
          in.close();
        }
      }
    } catch (IOException ioe) {
      throw new JochreException(ioe);
    } catch (ClassNotFoundException cnfe) {
      throw new JochreException(cnfe);
    }

    return memoryBase;
  }

  @Override
  public Iterator<String> getWords() {
    return entries.keySet().iterator();
  }
}
