import sys
import pdfminer.settings
import six
import pdfminer.high_level
import pdfminer.layout
from pdfminer.image import ImageWriter
from regex import P

pdfminer.settings.STRICT = False


def extract_text(files=None, outfile='-',
                 _py2_no_more_posargs=None,
                 no_laparams=False, all_texts=None, detect_vertical=None,
                 word_margin=None, char_margin=None, line_margin=None, boxes_flow=None,
                 output_type='text', codec='utf-8', strip_control=False,
                 maxpages=0, page_numbers=None, password="", scale=1.0, rotation=0,
                 layoutmode='normal', output_dir=None, debug=False,
                 disable_caching=False, **other):
    if files is None:
        files = []
    if _py2_no_more_posargs is not None:
        raise ValueError("Many args")
    if not files:
        raise ValueError("Enter Filename")

    if not no_laparams:
        laparams = pdfminer.layout.LAParams()
        for param in ("all_texts", "detect_vertical", "word_margin", "char_margin", "line_margin", "boxes_flow"):
            paramv = locals().get(param, None)
            if paramv is not None:
                setattr(laparams, param, paramv)
    else:
        laparams = None

    imagewriter = None
    if output_dir:
        imagewriter = ImageWriter(output_dir)

    if output_type == "text" and outfile != "-":
        for override, alttype in ((".htm", "html"), (".html", "html"), (".xml", "xml"), (".tag", "tag")):
            if outfile.endswith(override):
                output_type = alttype

    if outfile == "-":
        outfp = sys.stdout
        if outfp.encoding is not None:
            codec = 'utf-8'
    else:
        outfp = open(outfile, "wb")

    for fname in files:
        with open(fname, "rb") as fp:
            pdfminer.high_level.extract_text_to_fp(fp, **locals())
            fp.close()
    return outfp


def main(args=None):
    A = P.parse_args(args=args)

    if A.page_numbers:
        A.page_numbers = set([x - 1 for x in A.page_numbers])
    if A.pagenos:
        A.page_numbers = set([int(x) - 1 for x in A.pagenos.split(",")])

    imagewriter = None
    if A.output_dir:
        imagewriter = ImageWriter(A.output_dir)

    if six.PY2 and sys.stdin.encoding:
        A.password = A.password.decode(sys.stdin.encoding)

    if A.output_type == "text" and A.outfile != "-":
        for override, alttype in ((".htm", "html"), (".html", "html"), (".xml", "xml"), (".tag", "tag")):
            if A.outfile.endswith(override):
                A.output_type = alttype

    if A.outfile == "-":
        outfp = sys.stdout
        if outfp.encoding is not None:
            A.codec = 'utf-8'
    else:
        outfp = open(A.outfile, "wb")

    outfp = extract_text(**vars(A))
    outfp.close()
    return 0


if __name__ == '__main__':
    sys.exit(main())
