"""Parser for tag files and XML generated by Doxygen."""

# This file is part of Cantera. See License.txt in the top-level directory or
# at https://cantera.org/license.txt for license and copyright information.

import sys
from pathlib import Path
import re
from sys import version_info

if version_info.minor < 11:
    from typing import Sequence, Iterable
    from typing_extensions import Self
else:
    from typing import Sequence, Iterable, Self

import logging
from dataclasses import dataclass
import xml.etree.ElementTree as ET

from ..dataclasses import ArgList, Param, Func
from .._helpers import with_unpack_iter


_LOGGER = logging.getLogger(__name__)


@dataclass(frozen=True)
@with_unpack_iter
class TagInfo:
    """
    Represents information parsed from a Doxygen tag file.

    May represent a member function or a variable.
    """

    base: str = ""  #: Qualified scope (skipping Cantera namespace)
    type: str = ""  #: Return type
    name: str = ""  #: Function/variable name
    arglist: str = ""  #: Function argument list (original XML string)
    anchorfile: str = ""  #: Doxygen anchor file
    anchor: str = ""  #: Doxygen anchor
    kind: str = ""  #: Member kind

    @classmethod
    def from_xml(cls: Self, qualified_name: str, xml: str) -> Self:
        """Create tag information based on XML data."""
        base = ""
        if "::" in qualified_name:
            base = qualified_name.split("::", 1)[0]

        xml_tree = ET.fromstring(xml)
        return cls(base,
                   xml_tree.find("type").text,
                   xml_tree.find("name").text,
                   xml_tree.find("arglist").text,
                   xml_tree.find("anchorfile").text.replace(".html", ".xml"),
                   xml_tree.find("anchor").text,
                   xml_tree.attrib.get("kind", ""))

    def __bool__(self) -> bool:
        return all([self.type, self.name, self.arglist, self.anchorfile, self.anchor])

    @property
    def signature(self) -> str:
        """Generate function signature based on tag information."""
        return f"{self.type} {self.name}{self.arglist}"

    @property
    def id(self) -> str:
        """Generate Doxygen id."""
        return f"{self.anchorfile.replace('.xml', '')}_1{self.anchor}"

    @property
    def qualified_name(self) -> str:
        """Return qualified name."""
        if self.base:
            return f"{self.base}::{self.name}"
        return self.name


@dataclass(frozen=True)
@with_unpack_iter
class TagDetails(TagInfo):
    """Create tag information based on XML data."""

    location: str = ""  #: File containing Doxygen description
    briefdescription: str = ""  #: Brief Doxygen description
    parameterlist: list[Param] | None = None  #: Annotated Doxygen parameter list
    deprecated: str | None = None  #: Deprecation message (if applicable)


class TagFileParser:
    """Class handling contents of Doxygen tag file."""

    _known: dict[str, str]  #: Dictionary of known functions and corresponding XML tags

    def __init__(self, root: str, bases: dict[str, str]) -> None:
        if Path(root).is_dir():
            self._tag_path = Path(root) / "build" / "doc"
        else:
            self._tag_path = Path.cwd() / root / "build" / "doc"
        self._xml_path = self._tag_path / "doxygen" / "xml"

        tag_file = self._tag_path / "Cantera.tag"

        if not tag_file.exists():
            msg = (f"Tag file does not exist at expected location:\n    {tag_file}\n"
                "Run 'scons doxygen' to generate.")
            _LOGGER.critical(msg)
            sys.exit(1)

        logging.info("Parsing Doxygen tags...")
        doxygen_tags = tag_file.read_text(encoding="utf-8")
        self._parse_doxyfile(doxygen_tags, bases)

    def _parse_doxyfile(self, doxygen_tags: str, bases: Sequence[str]) -> None:
        """Retrieve class and function information from Cantera namespace."""

        def xml_compounds(kind: str, names: Sequence[str]) -> dict[str, str]:
            regex = re.compile(rf'<compound kind="{kind}"[\s\S]*?</compound>')
            found = []
            compounds = {}
            for compound in re.findall(regex, doxygen_tags):
                qualified_name = ET.fromstring(compound).find("name").text
                compound_name = qualified_name.split(":")[-1]
                if compound_name in names:
                    found.append(compound_name)
                    compounds[compound_name] = compound
                    if not (set(names) - set(found)):
                        return compounds
            missing = '", "'.join(set(names) - set(found))
            msg = f"Missing {kind!r} compound(s):\n    {missing!r}\nusing regex "
            msg += f"{regex}. Continuing with remaining compounds: \n    {found!r}"
            _LOGGER.error(msg)

        # Parse content of namespace Cantera
        namespace = xml_compounds("namespace", ["Cantera"])["Cantera"]
        qualified_names = []
        xml_tree = ET.fromstring(namespace).findall("class")
        for element in xml_tree:
            if element.attrib.get("kind", "") == "class":
                qualified_names.append(element.text)
        class_names = [_.split(":")[-1] for _ in qualified_names]

        # Handle exceptions for unknown/undocumented classes
        unknown = set(bases) - set(class_names)
        if "', '".join(unknown):
            unknown = "', '".join(unknown)
            msg = ("Class(es) in configuration file are missing "
                   f"from tag file: {unknown!r}")
            _LOGGER.critical(msg)
            exit(1)

        # Parse content of classes that are specified by the configuration file
        class_names = set(bases) & set(class_names)
        classes = xml_compounds("class", class_names)

        def xml_members(kind: str, text: str, prefix: str = "") -> dict[str, str]:
            regex = re.compile(rf'<member kind="{kind}"[\s\S]*?</member>')
            functions = {}
            for func in re.findall(regex, text):
                func_name = f'{prefix}{ET.fromstring(func).find("name").text}'
                if func_name in functions:
                    # tag file may contain duplicates
                    if func not in functions[func_name]:
                        functions[func_name].append(func)
                else:
                    functions[func_name] = [func]
            return functions

        # Get known functions from namespace and methods from classes
        self._known = xml_members("function", namespace)
        self._known.update(xml_members("variable", namespace))
        for name, cls in classes.items():
            prefix = f"{name}::"
            self._known.update(xml_members("function", cls, prefix))
            self._known.update(xml_members("variable", cls, prefix))

    def exists(self, cxx_member: str) -> bool:
        """Check whether Doxygen tag exists."""
        return cxx_member in self._known

    def detect(self, name: str, bases: Iterable[str], permissive: bool = True) -> str:
        """Detect qualified method name."""
        for base in bases:
            name_ = f"{base}::{name}"
            if self.exists(name_):
                return name_
        if self.exists(name):
            return name
        if permissive:
            return ""
        msg = f"Unable to detect {name!r} in Doxygen tags."
        _LOGGER.critical(msg)
        exit(1)

    def tag_info(self, func_string: str) -> TagInfo:
        """Look up tag information based on (partial) function signature."""
        cxx_member = func_string.split("(")[0].split(" ")[-1]
        if cxx_member not in self._known:
            msg = f"Could not find {cxx_member!r} in Doxygen tag file."
            _LOGGER.critical(msg)
            sys.exit(1)
        ix = 0
        if len(self._known[cxx_member]) > 1:
            # Disambiguate functions with same name
            # TODO: current approach does not use information on default arguments
            known_args = [ET.fromstring(xml).find("arglist").text
                          for xml in self._known[cxx_member]]
            known_args = [ArgList.from_xml(al).short_str() for al in known_args]
            args = re.findall(re.compile(r"(?<=\().*(?=\))"), func_string)
            if not args and "()" in known_args:
                # Candidate function without arguments exists
                ix = known_args.index("()")
            elif not args:
                # Function does not use arguments
                known = "\n - ".join([""] + known_args)
                msg = (f"Need argument list to disambiguate {func_string!r}.\n"
                       f"Possible matches are:{known}")
                _LOGGER.critical(msg)
                sys.exit(1)
            else:
                args = f"({args[0]}"
                ix = -1
                for i, known in enumerate(known_args):
                    if known.startswith(args):
                        # Detected argument list that uses default arguments
                        ix = i
                        break
                if ix < 0:
                    msg = f"Unable to match {func_string!r} to known functions."
                    _LOGGER.critical(msg)
                    sys.exit(1)

        return TagInfo.from_xml(cxx_member, self._known[cxx_member][ix])

    def cxx_member(self, func_string: str, setter: bool = False) -> Func | Param:
        """Generate annotated C++ function/variable specification."""
        details = tag_lookup(self._xml_path, self.tag_info(func_string))
        ret_param = Param.from_xml(details.type)

        if details.kind == "variable":
            direction = "in" if setter else "out"
            return Param(ret_param.p_type, details.name,
                         details.briefdescription, direction, None, details.base)

        # Merge attributes from Doxygen signature and Doxygen annotations
        args = ArgList.from_xml(details.arglist).params  # from signature
        args_annotated = details.parameterlist  # from documentation
        args_merged = []
        for arg in args:
            for desc in args_annotated:
                if arg.name == desc.name:
                    args_merged.append(
                        Param(arg.p_type, arg.name,
                              desc.description, desc.direction, arg.default))
                    break
            else:
                args_merged.append(Param(arg.p_type, arg.name, "Undocumented."))

        return Func(ret_param.p_type, details.name, ArgList(args_merged),
                    details.briefdescription, None, ret_param.description,
                    details.base, [], details.deprecated)


def tag_lookup(xml_path: Path, tag_info: TagInfo) -> TagDetails:
    """Retrieve tag details from Doxygen tree."""
    xml_file = xml_path / tag_info.anchorfile
    if not xml_file.exists():
        msg = f"Tag file does not exist at expected location:\n    {xml_file}"
        _LOGGER.error(msg)
        return TagDetails()

    xml_details = xml_file.read_text(encoding="utf-8")
    id_ = tag_info.id
    kind_ = tag_info.kind
    regex = re.compile(rf'<memberdef kind="{kind_}" id="{id_}"[\s\S]*?</memberdef>')
    matches = re.findall(regex, xml_details)

    if not matches:
        msg = f"No XML matches found for {tag_info.qualified_name!r}"
        _LOGGER.error(msg)
        return TagDetails()
    if len(matches) != 1:
        msg = f"Inconclusive XML matches found for {tag_info.qualified_name!r}"
        _LOGGER.warning(msg)
        matches = matches[:1]

    def replace_xml_tags(entry: str) -> str:
        # Replace stray XML markup that causes problems for doxygen parsing.

        # Remove ref tags but keep content between them
        entry = re.sub(r"<ref\b[^>]*>(.*?)</ref>", r"\1", entry, flags=re.DOTALL)

        # Replace XML tags with markdown equivalents
        replacements = {
            "computeroutput": "`",
            "emphasis": "*",
            "bold": "**",
        }

        for tag, markdown in replacements.items():
            entry = re.sub(rf"</?{tag}\b[^>]*>", markdown, entry, flags=re.DOTALL)

        return entry

    def xml_parameterlist(xml_tree: ET) -> list[Param]:
        # Resolve/flatten parameter list
        names = []
        directions = []
        for element in xml_tree.find("parameternamelist"):
            names.append(element.text)
            directions.append(element.attrib.get("direction", ""))
        description = xml_tree.find("parameterdescription").find("para").text.strip()
        return [Param("", n, description, d) for n, d in zip(names, directions)]

    def xml_deprecated(xml_tree: ET) -> str | None:
        # Extract deprecation message if applicable
        for xrefsect in xml_tree.iter("xrefsect"):
            xreftitle = xrefsect.find("xreftitle")
            if xreftitle is not None and xreftitle.text == "Deprecated":
                xrefdescription = xrefsect.find("xrefdescription")
                if xrefdescription is not None:
                    return "".join(xrefdescription.itertext()).strip()
        return None

    xml = matches[0]
    xml_tree = ET.fromstring(replace_xml_tags(xml))

    par_list = []
    xml_details = xml_tree.find("detaileddescription")
    if xml_details:
        # TODO: confirm that this is always the last "para" entry
        xml_list = xml_details.findall("para")[-1].find("parameterlist")
        if xml_list:
            for item in xml_list.findall("parameteritem"):
                par_list.extend(xml_parameterlist(item))

    location = xml_tree.find("location").attrib["file"]
    try:
        brief = xml_tree.find("briefdescription").find("para").text.strip()
    except AttributeError:
        msg = f"Unable to retrieve brief description for {tag_info.qualified_name!r}."
        _LOGGER.warning(msg)

    deprecated = xml_deprecated(xml_tree)

    return TagDetails(*tag_info, location, brief, par_list, deprecated)
