#!/usr/bin/env python
#
# Copyright (c) 2014 Microsoft Corporation. All rights reserved.
# Released under Apache 2.0 license as described in the file LICENSE.
#
# Author: Soonho Kong

# Python 2/3 compatibility
from __future__ import print_function

import argparse
import fnmatch
import os
import sys
import codecs

def get_ilean_files(directory):
    """Return ilean files under directory (recursively)"""
    ilean_files = []
    if not os.path.isdir(directory):
        raise IOError("%s is not a directory" % directory)

    for root, dirnames, filenames in os.walk(directory):
        for filename in fnmatch.filter(filenames, '*.ilean'):
            ilean_files.append(os.path.join(root, filename))
    return ilean_files

def parse_arg(argv):
    """ Parse arguments """
    parser = argparse.ArgumentParser(description='leantags: tag generator for Lean theorem prover.')
    parser.add_argument('--etags', '-e', action='store_true', default=True,  help="Generate etags TAGS file.")
    # parser.add_argument('--gtags', '-g', action='store_true', default=False, help="Generate gtags files (GTAGS, GRTAGS, GPATH).")
    parser.add_argument('--relative', action='store', default=None, const=os.getcwd(), nargs='?', help="Generate relative paths based on the parameter.")
    parser.add_argument('files', nargs='*')
    args = parser.parse_args(argv)
    return args

def get_short_name(item):
    category = item['category']
    declname = item['declname']
    names    = declname.split(".")
    if category == "d":
        kind = item['kind']
        # For instance, for the recursor "nat.rec", this function should return "nat"
        if kind == "recursor" and len(names) >= 2:
            return names[-2]
    # Normally, it returns the last part of hierarchical name
    return names[-1]

def build_line_to_byteoffset_map(ilean_filename):
    """Given a .ilean filename, return a mapping from a line number to the
    byte offset of the beginning of the line. Note that line number
    starts with 0 in our convention.
    """
    map = [0, 0]
    lines = [""]
    pos = 0
    with codecs.open(ilean_filename, "r", "utf-8") as f:
        while True:
            line = f.readline()
            if not line:
                break
            pos = pos + len(line)
            map.append(pos + 1)
            lines.append(line)
    return (map, lines)

def extract_filename_from_info(info):
    return next((item['filename'] for item in info if 'filename' in item), None)

def convert_position_to_etag_style(info):
    """Convert the position format from (line, col) to (line, byteoffset)"""
    filename = extract_filename_from_info(info)
    if filename != None:
        (line_to_byteoffset, contents) = build_line_to_byteoffset_map(filename)
        for item in info:
            if item['category'] != "a":
                linenum = item['linenum']
                col     = item['col']
                item['offset'] = line_to_byteoffset[linenum] + col
                item['prefix'] = contents[linenum][:col] + get_short_name(item)

def extract_info_from_ilean(ilean_file, decl_dict, args):
    info = []
    with codecs.open(ilean_file, "r", "utf-8") as f:
        for line in f:
            line = line.replace("\r\n", "\n")
            array = line[:-1].split("|")
            item = {}
            item['category'] = array[0]
            if item['category'] == 'a':
                item['abbrname'] = array[1]
                item['declname'] = array[2]
            elif item['category'] == 'd':
                item['filename'] = array[1]
                item['linenum']  = int(array[2])
                item['col']      = int(array[3])
                item['declname'] = array[4]
                item['kind']     = array[5]
                item['type']     = array[6]
                decl_dict[item['declname']] = item
            elif item['category'] == 'r':
                item['filename'] = array[1]
                item['linenum']  = int(array[2])
                item['col']      = int(array[3])
                item['declname'] = array[4]
            if args.relative and "filename" in item:
                item['filename'] = os.path.relpath(item['filename'], args.relative)
            info.append(item)
    return info

def get_etag_def_header(filename, len):
    result = "\x0c\n%s,%d\n" % (filename, len)
    return result

def get_etag_def_item(item):
    result = "%s\x7f%s\x01%d,%d\n" \
             % (item['prefix'], item['declname'], item['linenum'], item['offset'])
    return result

def get_etag_def_items(items):
    result = ""
    for item in items:
        if item['category'] == 'd':
            result += get_etag_def_item(item)
    return result

def get_etag_def(info):
    body_str = get_etag_def_items(info)
    filename = extract_filename_from_info(info)
    if filename:
        header = get_etag_def_header(filename, len(body_str))
        return header + body_str
    else:
        return ""

def print_item(item):
    print("\t%s\t%-60s:%4d:%4d:%6d - %s" \
        % (item['category'], item['filename'], item['linenum'], item['col'], item['offset'], item['declname']))

def print_items(items):
    for item in items:
        print(get_etag_def_entry(item), end=' ')

def find_makefile(path, makefile_names):
    """ Find makefile in a given directory.

    Args:
        path: a string of path to look up
        makefile_names: a list of strings to search

    Return:
        When found, return the full path of a makefile
        Otherwise, return False.
    """
    for makefile in makefile_names:
        makefile_pathname = os.path.join(path, makefile)
        if os.path.isfile(makefile_pathname):
            return makefile_pathname
    return False

def find_makefile_upward(path, makefile_names):
    """ Strating from a given directory, search upward to find
    a makefile

    Args:
        path: a string of path to start the search

    Return:
        When found, return the full path of a makefile
        Otherwise, return False.
    """
    makefile = find_makefile(path, makefile_names)
    if makefile:
        return makefile
    up = os.path.dirname(path)
    if up != path:
        return find_makefile_upward(up, makefile_names)
    return False

def filter_ilean_files(ilean_files):
    """Remove .ilean file if a corresponding .lean/.hlean file does not exist."""
    result = []
    for ilean_file in ilean_files:
        lean_file = ilean_file[:-5] + "lean"
        hlean_file = ilean_file[:-5] + "hlean"
        if (os.path.isfile(lean_file) or os.path.isfile(hlean_file)) and os.path.isfile(ilean_file):
            result.append(ilean_file)
    return result

def resolve_abbr(info, decl_dict):
    for item in info:
        if item['category'] == 'a':
            abbrname = item['abbrname']
            declname = item['declname']
            item = decl_dict[declname]
            item['declname'] = abbrname

def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]
    args = parse_arg(argv)

    directory = os.getcwd()
    if args.files:
        ilean_files = fnmatch.filter(args.files, '*.ilean')
    else:
        makefile_names = ["GNUmakefile", "makefile", "Makefile", "build.ninja"]
        makefile = find_makefile_upward(directory, makefile_names)
        if makefile:
            directory = os.path.dirname(makefile)
        ilean_files = get_ilean_files(directory)

    ilean_files = filter_ilean_files(ilean_files)

    if not ilean_files:
        return 0

    with codecs.open(os.path.join(directory, "TAGS"), 'w', 'utf-8') as tag_file:
        info_list = []
        decl_dict = {}
        for ilean_file in ilean_files:
            info = extract_info_from_ilean(ilean_file, decl_dict, args)
            if info:
                convert_position_to_etag_style(info)
                info_list.append(info)

        for info in info_list:
            resolve_abbr(info, decl_dict)
            tag_file.write(get_etag_def(info))

if __name__ == "__main__":
    sys.exit(main())
