# python3
# Copyright 2021 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""The gen module implements the gen action.

The gen action will generate the dependency graph of from crates rooted in
`third_party/rust/third_party.toml` including transitive dependencies which are
all found in `third_party/rust/`, and then write the BUILD.gn files required to
build each such crate."""

from __future__ import annotations

from typing import Iterator
from contextlib import contextmanager
from datetime import datetime
from pprint import pprint
import argparse
import tempfile
import os
import re
import sys

from lib.build_rule import BuildRule
from lib import common
from lib import compiler
from lib import consts
from lib import cargo


class BuildData:
    """The root of the crate dependency tree.

    This holds information collected from the dependency tree that is used to
    generate BUILD.gn rules. This data needs to be collected and merged from
    across many crates, as each crate `X` depending on `Z` will contribute to
    data about what is needed from `Z`."""

    def __init__(self):
        self.for_normal = DataForUsage()
        self.for_buildrs = DataForUsage()
        self.for_tests = DataForUsage()

    @contextmanager
    def for_usage(self, usage: cargo.CrateUsage) -> Iterator[DataForUsage]:
        """A ContextManager for accessing the nested `DataForUsage` structure.

        The `DataForUsage` contains data which is different depending how
        crates are being used (for building a library/executable, for building
        a build script, or for building tests).
        """
        try:
            if usage == cargo.CrateUsage.FOR_NORMAL:
                yield self.for_normal
            elif usage == cargo.CrateUsage.FOR_BUILDRS:
                yield self.for_buildrs
            elif usage == cargo.CrateUsage.FOR_TESTS:
                yield self.for_tests
            else:
                assert False  # Unhandled CrateUsage
        finally:
            pass

    def has_crate_for_usage(self, usage: cargo.CrateUsage,
                            crate: cargo.CrateKey) -> bool:
        """Whether a crate has been seen yet as a dependency."""
        with self.for_usage(usage) as for_usage:
            return for_usage.has_crate(crate)

    @contextmanager
    def per_crate_for_usage(self, usage: cargo.CrateUsage,
                            crate: cargo.CrateKey) -> Iterator[PerCrateData]:
        """A shortcut to PerCrateData without going through `DataForUsage`.

        This is equivalent to using `for_usage(usage)` and then
        `per_crate(crate)`."""
        assert crate
        with self.for_usage(usage) as for_usage:
            if not crate in for_usage._map:
                for_usage._map[crate] = PerCrateData()
            try:
                yield for_usage._map[crate]
            finally:
                pass

    def get_per_crate_data_copy(self, usage: cargo.CrateUsage,
                                crate: cargo.CrateKey) -> PerCrateData:
        with self.per_crate_for_usage(usage, crate) as crate_data:
            return crate_data


class DataForUsage:
    """Data that differs depending on how crates are being consumed.

    Crates can be built up to 3 times, for use in building different types of
    targets.
    """

    def __init__(self):
        self._map: dict[cargo.CrateKey, PerCrateData] = {}

    @contextmanager
    def per_crate(self, crate: cargo.CrateKey) -> Iterator[PerCrateData]:
        """A ContextManager to access the PerCrateData structure.

        This contains data about how the crate should be build, how other crates
        depend on it and what it depends on.
        """
        assert crate
        if not crate in self._map:
            self._map[crate] = PerCrateData()
        try:
            yield self._map[crate]
        finally:
            pass

    def has_crate(self, crate: cargo.CrateKey) -> bool:
        """Whether a crate has been seen as a dependency yet.

        Note this includes crates depended on by our fictional top-level
        third_party.toml crate.
        """
        return crate in self._map

    def all_crates(self) -> set[cargo.CrateKey]:
        """All crates that appear as a dependency in the dependency tree.

        Note this includes crates depended on by our fictional top-level
        third_party.toml crate.
        """
        return set(self._map.keys())

    def all_crates_data(self) -> list[tuple[cargo.CrateKey, PerCrateData]]:
        """All crates along with their PerCrateData in the dependency tree.

        Note this includes crates depended on by our fictional top-level
        third_party.toml crate.
        """
        return list(self._map.items())


class PerCrateData:
    """The set of data about a Crate for a particular usage of that crate.

    This defines how other crates depend on this crate, as well as what it
    depends on."""

    def __init__(self):
        # Whether the crate is allowed to be used from first-party code. This is
        # the set of crates depended on by our fictional third_party.toml
        # crate.
        self.for_first_party = False
        # The set of features requested by other crates, and the architectures
        # that they requested them on.
        self.features = FeatureSet()
        # Dependencies of this crate, with different sets for the normal
        # (library or executable) output, the build script, or tests.
        self.deps = {o: DepSet() for o in cargo.CrateBuildOutput}
        # On what architectures this crate is depended on by another crate
        # (including by our fictional top-level third_party.toml). If this is
        # empty, the crate is not used at all (at least under a particular usage
        # aka DataForUsage).
        self.used_on_archs = compiler.ArchSet.EMPTY()
        # Whether the crate is considered "architecture-specific". When true,
        # the crate may have different dependencies or feature-requirements on
        # different architectures, and we must check each architecture for them.
        # As a result, any dependencies of this crate would also be considered
        # as "architecture-specific" and would get this flag set eventually as
        # well.
        self.arch_specific = False
        # The set of files generated from the crate's build.rs build script.
        self.build_script_outputs: set[str] = set()
        # The map of full version numbers that Cargo has requested for the crate
        # + epoch to the path where Cargo said it has found them. The path will
        # be missing if there's no local crate that matches.
        self.cargo_full_versions: map[str, str] = {}


class DepSet:
    """The set of dependencies, and what architectures they are needed on."""

    class Data:
        def __init__(self):
            # The architectures where the dependency is needed.
            self.archset = compiler.ArchSet.EMPTY()

    def __init__(self):
        self._map: dict[cargo.CrateKey, DepSet.Data] = {}

    @contextmanager
    def dep_data(self, dep: cargo.CrateKey) -> Iterator[DepSet.Data]:
        """A ContextManager to access the dependency data of a crate."""
        assert dep
        if not dep in self._map:
            self._map[dep] = DepSet.Data()
        try:
            yield self._map[dep]
        finally:
            pass

    def all_deps(self) -> set[cargo.CrateKey]:
        """The set of all crates which are dependencies, on any architecture."""
        return {k for k in self._map.keys()}

    def all_deps_data(self) -> list[tuple[cargo.CrateKey, DepSet.Data]]:
        """The set of all crates and their dependency data."""
        return [p for p in self._map.items()]


class FeatureSet:
    """The set of features requested of a crate.

    This contains the set of requested features, and the archictures they are
    requested for.
    """

    def __init__(self):
        self._map: dict[str, compiler.ArchSet] = {}

    def has_feature(self, feature: str) -> bool:
        """Whether the named feature is requested for any architecture."""
        return feature in self._map

    @contextmanager
    def archset_for_feature(self, feature: str) -> Iterator[compiler.ArchSet]:
        """A ContextManager to access the ArchSet for anamed feature.

        This is the set of architectures where the feature is requested."""
        assert feature
        if not feature in self._map:
            self._map[feature] = compiler.ArchSet.EMPTY()
        try:
            yield self._map[feature]
        finally:
            pass

    def all_archsets(self) -> list[tuple[str, compiler.ArchSet]]:
        """A set of all features and their feature data."""
        return [p for p in self._map.items()]


def run(args: argparse.Namespace):
    """Entry point for the 'gen' action."""
    # This step constructs a BuildData which has all the information we need to
    # construct build rules in GN. This is slow as it has to query `cargo tree`
    # a lot.
    build_data_set = _construct_build_data_from_3p_crates(args)

    NAME_PRINT_LEN = 30
    num_done = 0
    num_total = len(build_data_set.for_normal.all_crates())
    last_printed = 0
    for crate_key in build_data_set.for_normal.all_crates():
        last_printed = common.print_same_line(
            "Generating BUILD.gn for crates {}/{} {} v{}".format(
                1 + num_done, num_total, crate_key.name, crate_key.epoch),
            last_printed)
        # This uses the entire BuildData structure to construct a BuildRule for
        # each crate, one at at time. The BuildRule is a typed representation of
        # what will end up in the crate's BUILD.gn file.
        build_rule = _gen_build_rule(args, build_data_set, crate_key)
        assert build_rule.normal_usage.used_on_archs or \
            build_rule.buildrs_usage.used_on_archs or \
            build_rule.test_usage.used_on_archs

        copyright_year = _get_copyright_year(
            common.os_crate_version_dir(crate_key.name,
                                        crate_key.epoch,
                                        rel_path=["BUILD.gn"]))
        with open(
                common.os_crate_version_dir(crate_key.name,
                                            crate_key.epoch,
                                            rel_path=["BUILD.gn"]),
                "w") as build_file:
            build_file.write(
                _run_gn_format(build_rule.generate_gn(args, copyright_year)))
        num_done += 1
    common.print_same_line("Generating BUILD.gn for crates {}/{} ".format(
        num_done, num_total),
                           last_printed,
                           done=True)


def _get_copyright_year(path: str) -> str:
    try:
        with open(path, "r") as file:
            top_line = file.readline()
            m = consts.GN_HEADER_YEAR_REGEX.search(top_line)
            if m:
                return m.group("year")
    except FileNotFoundError:
        pass
    return str(datetime.now().year)


def _construct_build_data_from_3p_crates(args: argparse.Namespace) -> BuildData:
    """Read the `third_party.toml` data and populate a BuildData from it.

    Returns a dictionary containing information about all third-party crate
    dependencies, including transitive dependencies, build dependencies and
    dev (test-only) dependencies. They are all generated from the set of
    first-party dependencies (and their features) declared in
    `${THIRD_PARTY}/third_party.toml`.
    """
    toml_3p = common.load_toml(
        common.os_third_party_dir(rel_path=["third_party.toml"]))
    assert "dependencies" in toml_3p, \
        "third_party.toml must specify 'dependencies'"

    # Validate the dependencies listed in third_party.toml.
    TOML_DEPS_KEYS = ["dependencies", "build-dependencies", "dev-dependencies"]
    for toml_key in TOML_DEPS_KEYS:
        if not toml_key in toml_3p:
            continue
        for (dep, details) in toml_3p[toml_key].items():
            version = details["version"] if isinstance(details,
                                                       dict) else details
            # Verify that the version is specified as an epoch.
            epoch = common.version_epoch_dots(version)
            assert version == epoch, \
                "third_party.toml must specify version as an epoch for " \
                " '{}', " "found '{}', did you mean '{}'?".format(
                    dep, version, epoch)

            # Verify that a crate is only listed once.
            for other_toml_key in [k for k in TOML_DEPS_KEYS if k != toml_key]:
                if other_toml_key not in toml_3p:
                    continue
                assert not dep in toml_3p[other_toml_key], \
                    "A crate may only appear in one section of " \
                    "third_party.toml, but '{}' appears more than " \
                    "once.".format(dep)

    # For every crate in third_party, we will generate a patch to redirect
    # crates.io to that directory, so that if we have local changes to the
    # Cargo.toml files, running `cargo tree` will see them. To do this we
    # save a list of all 3p crates.
    list_of_3p_cargo_toml = cargo.gen_list_of_3p_cargo_toml()

    # We can't use these to see the deps tree, as it will put everything as a
    # dep of the root target, but it's helpful for failing faster on missing
    # crates.
    transitive_normal_build_data_set = BuildData()

    # The set of info needed to make a GN target for the crate, in each
    # CrateUsage.
    build_data_set = BuildData()

    last_printed = 0

    # We will take the third-party dependencies, specified in a toml format,
    # and build a Cargo.toml file for running `cargo tree` against.
    with tempfile.TemporaryDirectory() as workdir:
        # First we make a copy of the third-party deps input as-is, but with
        # additional required fields added. We can use that with `cargo tree` to
        # get a full set of all dependencies, including transitive ones.
        cargo_toml_path = cargo.write_cargo_toml_in_tempdir(
            workdir,
            list_of_3p_cargo_toml,
            orig_toml_parsed=cargo.add_required_cargo_fields(toml_3p),
            verbose=args.verbose)

        # This collects the direct dependencies from first-party code. For
        # normal dependencies, cargo can output them all here, which we collect
        # up front to try print out all the missing ones at once. This would
        # confuse our actual dependency calculations though, since we would
        # have all the deps on a single build target (the third-party root). So
        # we will have to throw them away.
        last_printed = common.print_same_line(
            "Collecting all transitive normal dependencies.", last_printed)
        _collect_deps_for_crate(args, cargo_toml_path,
                                transitive_normal_build_data_set,
                                cargo.CrateUsage.FOR_NORMAL, True)
        # We don't look for blocked stuff here since we won't have the data to
        # say what's depending on them yet anyways.
        _look_for_missing_or_blocked_deps(transitive_normal_build_data_set,
                                          find_blocked=False)

        last_printed = common.print_same_line(
            "Collecting top-level normal dependencies.", last_printed)
        _collect_deps_for_crate(args,
                                cargo_toml_path,
                                build_data_set,
                                cargo.CrateUsage.FOR_NORMAL,
                                True,
                                depth=1)
        last_printed = common.print_same_line(
            "Collecting top-level build-dependencies.", last_printed)
        _collect_deps_for_crate(args,
                                cargo_toml_path,
                                build_data_set,
                                cargo.CrateUsage.FOR_BUILDRS,
                                True,
                                depth=1)

        last_printed = common.print_same_line(
            "Collecting top-level dev-dependencies.", last_printed)
        _collect_deps_for_crate(args,
                                cargo_toml_path,
                                build_data_set,
                                cargo.CrateUsage.FOR_TESTS,
                                True,
                                depth=1)

    # Now we have enough to build our first-party code, from prebuilts of the
    # dependencies. But we don't have enough to build each dependency. We have
    # to query `cargo tree` for each one to learn what's needed to build it.
    #
    # So here we walk through each crate and query its Cargo.toml for its build
    # and dev dependencies.
    #
    # This step is slow...
    crate_keys_left = build_data_set.for_normal.all_crates()
    num_done = 0
    # TODO: Cache this work somewhere in `consts.THIRD_PARTY`.
    while crate_keys_left:
        crate_key = crate_keys_left.pop()
        crate_name = crate_key.name
        crate_epoch = crate_key.epoch

        orig_cargo_toml_path = common.os_crate_cargo_dir(
            crate_name, crate_epoch, rel_path=["Cargo.toml"])

        # Skip missing dependencies. We will print them all at the end.
        if not os.path.exists(orig_cargo_toml_path):
            num_done += 1
            continue

        with tempfile.TemporaryDirectory() as workdir:
            tmp_cargo_toml_path = cargo.write_cargo_toml_in_tempdir(
                workdir,
                list_of_3p_cargo_toml,
                orig_toml_path=orig_cargo_toml_path,
                verbose=args.verbose)

            last_printed = common.print_same_line(
                "Collecting normal dependencies: {}/{} {} v{}".format(
                    1 + num_done, 1 + num_done + len(crate_keys_left),
                    crate_name, crate_epoch), last_printed)
            new_keys = _collect_deps_for_crate(args,
                                               tmp_cargo_toml_path,
                                               build_data_set,
                                               cargo.CrateUsage.FOR_NORMAL,
                                               False,
                                               crate_key=crate_key,
                                               depth=1)
            crate_keys_left.update(new_keys)

            last_printed = common.print_same_line(
                "Collecting build dependencies: {}/{} {} v{}".format(
                    1 + num_done, 1 + num_done + len(crate_keys_left),
                    crate_name, crate_epoch), last_printed)
            new_keys = _collect_deps_for_crate(args,
                                               tmp_cargo_toml_path,
                                               build_data_set,
                                               cargo.CrateUsage.FOR_BUILDRS,
                                               False,
                                               crate_key=crate_key,
                                               depth=1)
            crate_keys_left.update(new_keys)
            find_test_deps = args.with_tests
            with build_data_set.for_tests.per_crate(crate_key) as per_crate:
                if per_crate.for_first_party:
                    find_test_deps = True
            if find_test_deps:
                last_printed = common.print_same_line(
                    "Collecting test dependencies: {}/{} {} v{}".format(
                        1 + num_done, 1 + num_done + len(crate_keys_left),
                        crate_name, crate_epoch), last_printed)
                new_keys = _collect_deps_for_crate(args,
                                                   tmp_cargo_toml_path,
                                                   build_data_set,
                                                   cargo.CrateUsage.FOR_TESTS,
                                                   False,
                                                   crate_key=crate_key,
                                                   depth=1)
                crate_keys_left.update(new_keys)

        num_done += 1
    common.print_same_line("Collecting dependencies: {}/{} ".format(
        num_done, num_done),
                           last_printed,
                           done=True)

    if args.verbose:
        pprint(build_data_set)
        print()

    # If anything was missing, we will abort here and print them out all at
    # once.
    _look_for_missing_or_blocked_deps(build_data_set)

    return build_data_set


def _run_gn_format(build_gn: str) -> str:
    """Formats the input GN file contents, and returns the formatted output."""
    import subprocess
    try:
        return subprocess.check_output(["gn", "format", "--stdin"],
                                       stderr=subprocess.PIPE,
                                       text=True,
                                       input=build_gn)
    except subprocess.CalledProcessError as e:
        print()
        print("stdout")
        print(e.stdout)
        print()
        print("stderr")
        print(e.stderr)
        raise e


def _look_for_missing_or_blocked_deps(build_data_set: BuildData,
                                      find_blocked: bool = True):
    """Find crate dependencies for which we have no local copy downloaded.

    If `find_blocked` is true, this also looks for dependencies listed in
    Cargo.toml files to crates that are considered blocked, and thus should not
    be used.

    If a problem is found, this function will terminate the program with
    `exit(1)`.
    """
    missing: list[tuple[str, str]] = []
    missing_version: list[tuple[str, str, str]] = []
    blocked: list[tuple[str, str]] = []
    # Every crate appears in all 3 CrateUsage entries, so we can just look at
    # one.
    for crate_key in build_data_set.for_normal.all_crates():
        name = crate_key.name
        epoch = crate_key.epoch
        if name in consts.BLOCKED_CRATES:
            if find_blocked:
                reason = consts.BLOCKED_CRATES[name]
                blocked += [(name, reason)]
            continue

        with build_data_set.for_normal.per_crate(crate_key) as data:
            for (ver, dir) in data.cargo_full_versions.items():
                if dir:
                    continue  # If cargo gave a path then it was found locally.
                epoch_path = common.os_crate_cargo_dir(name,
                                                       epoch,
                                                       rel_path=["Cargo.toml"])
                if not os.path.exists(epoch_path):
                    missing += [(name, ver, epoch)]
                else:
                    missing_version += [(name, ver, epoch)]

    if missing or missing_version or blocked:
        print(file=sys.stderr)
        print(file=sys.stderr)

    if missing_version:
        print("Missing correct version for {} dependencies:".format(
            len(missing_version)),
              file=sys.stderr)
        for (name, ver, epoch) in missing_version:
            print("Cargo wants version \"{}\" for crate \"{}\". But a different"
                  " version is downloaded for epoch v{}.".format(
                      ver, name, epoch),
                  file=sys.stderr)
        print(
            "To resolve these errors, remove the epoch and download the exact"
            " version by specifying it in third_party.toml. If multiple exact"
            " versions are required, we will need some sort of extension to"
            " third_party.toml to support this.",
            file=sys.stderr)
        print(file=sys.stderr)
    if missing:
        print("Missing {} dependencies. To download:".format(len(missing)),
              file=sys.stderr)
        for (name, ver, epoch) in missing:
            print("{} download {} {} --security-critical=yes_or_no".format(
                sys.argv[0], name, ver),
                  file=sys.stderr)
        print(file=sys.stderr)
    if blocked:
        for (name, reason) in blocked:
            found_from = []
            for usage in cargo.CrateUsage:
                with build_data_set.for_usage(usage) as for_usage:
                    for (from_key,
                         per_crate_data) in for_usage.all_crates_data():
                        for (usage, depset) in per_crate_data.deps.items():
                            for dep_key in depset.all_deps():
                                if (dep_key.name in consts.BLOCKED_CRATES
                                        and not from_key in found_from):
                                    found_from += [from_key]
            print("Found dependency on blocked crate \"{}\" from:".format(name),
                  file=sys.stderr)
            for crate_key in found_from:
                print("    {} v{}".format(
                    crate_key.name,
                    common.version_epoch_normalized(crate_key.epoch)),
                      file=sys.stderr)
            print("  Reason: {}".format(reason), file=sys.stderr)
        print("Remove blocked crates from any Cargo.toml files "
              "in {} before proceeding.".format(
                  os.path.join(*consts.THIRD_PARTY)),
              file=sys.stderr)
        print(
            "Note: You will also need to remove usage of the blocked "
            "crate from the dependent crate(s) as well.",
            file=sys.stderr)
        print(file=sys.stderr)

    if missing or missing_version or blocked:
        exit(1)


class ArchSpecific:
    def __init__(self, archset: compiler.ArchSet):
        self.archset = archset
        # Whether the crate has arch-specific dependencies. That is to say,
        # outgoing edges are arch-dependent.
        self.arch_specific_deps: list[cargo.CrateKey] = []
        # Whether the crate is an arch-specific dep from some other crate. That
        # is to say, incoming edges are arch-dependent.
        self.is_used_as_arch_specific_dep = False

    def archs_to_test(self) -> set[str]:
        """The ArchSet to test against as a list of target strings.

        These are strings we can pass to Cargo or rustc, and can build other
        ArchSets from.
        """
        return self.archset.as_strings()

    def is_single_arch(self) -> bool:
        """If we can consider only one architecture for accurate results."""
        return (not self.arch_specific_deps
                and not self.is_used_as_arch_specific_dep)

    def apply_results_to(self, target_arch: str) -> compiler.ArchSet:
        """Return an ArchSet to use for any new outgoing dependency edges."""
        if self.is_single_arch():
            # If we're applying results to other architectures, we expect that
            # we are only testing one.
            assert (len(self.archs_to_test()) == 1)
            # We're only checking one arch but if a dep is found, it should
            # be enabled for all archs. We're only checking one arch because
            # we have no arch-specific rules.
            return compiler.ArchSet.ALL()
        assert (target_arch in self.archs_to_test())
        # We need to check each arch, so only add deps to the arch they
        # are found on.
        return compiler.ArchSet(initial={target_arch})


def _get_archs_of_interest(cargo_toml: dict, crate_usage_data: PerCrateData,
                           user_target_arch: str) -> ArchSpecific:
    """Determine which archs to check dependencies for a crate.

    Args:
        cargo_toml: The Cargo.toml file of the crate as a dict.
        crate_usage_data: Data about how the crate is used by other crates. It
            can be None, in which case, it's assumed to not be depended on by
            any other crates.
        target: The user-specific target (one of the keys from
            compiler.RUSTC_ARCH_TO_BUILD_CONDITION) to generate build files
            for. If specified, only that target will be considered. Otherwise,
            all possible targets would be considered.
    """
    if user_target_arch:
        return ArchSpecific(compiler.ArchSet(initial={user_target_arch}))

    # Whether the crate has arch-specific dependencies. That is to say,
    # outgoing edges are arch-dependent.
    has_arch_specific_deps = False
    if "target" in cargo_toml:
        for target_data in cargo_toml["target"].values():
            if "dependencies" in target_data:
                has_arch_specific_deps = True
                break

    # Whether the crate is an arch-specific dep from some other crate. That
    # is to say, incoming edges are arch-dependent.
    is_used_as_arch_specific_dep = crate_usage_data.arch_specific \
        if crate_usage_data else False

    # When there's no arch-specific incoming or outgoing edges, we can just
    # choose any target architecture and collect its dependencies there,
    # applying them too all architectures.
    if not is_used_as_arch_specific_dep and not has_arch_specific_deps:
        # We can just test one arch even though it'll apply to all.
        return ArchSpecific(compiler.ArchSet.ONE())

    # This crate has architecture-specific dependencies, or is part of
    # an architecture-specific dependency chain.

    if crate_usage_data:
        a = ArchSpecific(crate_usage_data.used_on_archs)
    else:
        a = ArchSpecific(compiler.ArchSet.ALL())
    if has_arch_specific_deps:
        # A list of (crate name, dependency data) for each dependency that is
        # found under a "target" rule in the Cargo.toml.
        targetted_deps = []
        for (target_name, target_data) in cargo_toml["target"].items():
            targetted_deps += list(
                target_data.get("dependencies", dict()).items())
        # Convert the list of (crate name, dependency data) into a more useful
        # list of `cargo.CrateKey`s.

        def version_from_maybe_dict(maybe_dict: dict | str) -> str:
            if type(maybe_dict) is dict:
                return maybe_dict["version"]
            else:
                return maybe_dict

        a.arch_specific_deps = [
            cargo.CrateKey(crate_name, version_from_maybe_dict(dep_data))
            for (crate_name, dep_data) in targetted_deps
        ]
    a.is_used_as_arch_specific_dep = is_used_as_arch_specific_dep
    return a


def _crate_features_on_target_arch(crate_usage_data: PerCrateData,
                                   target_arch: str) -> list[str]:
    """Returns the subset of features requested of the crate for the target.

    A crate has a set of features that are enabled by the union of all other
    crates that depend upon it. The incoming dependency edges specify which
    features are requested.

    But some of those edges may be arch-specific. We return the set of features
    that are requested for the specific architecture named in `target_arch`.
    """
    all_features = crate_usage_data.features.all_archsets()
    return [f for (f, archset) in all_features if archset.has_arch(target_arch)]


class CargoTreeDependency:
    def __init__(self,
                 key: cargo.CrateKey,
                 full_version: str = None,
                 crate_path: str = None,
                 features: list[str] = [],
                 is_for_first_party_code: bool = False,
                 build_script_outputs: set[str] = set()):
        self.key = key
        self.full_version = full_version
        self.crate_path = crate_path
        self.features = features
        self.is_for_first_party_code = is_for_first_party_code
        self.build_script_outputs = build_script_outputs

    def __eq__(self, other: CargoTreeDependency) -> bool:
        return (self.key == other.key
                and self.full_version == other.full_version
                and self.crate_path == other.crate_path
                and self.features == other.features and
                self.is_for_first_party_code == other.is_for_first_party_code
                and self.build_script_outputs == other.build_script_outputs)

    def __repr__(self) -> str:
        return "CargoTreeDependency(key={}, full_version={}, crate_path={}, " \
            "features={}, " \
            "is_for_first_party_code={}, " \
            "build_script_outputs={})".format(
                self.key, self.full_version, self.crate_path, self.features,
                self.is_for_first_party_code,
                self.build_script_outputs)


def _parse_cargo_tree_dependency_line(args: argparse.Namespace,
                                      cargo_toml: dict,
                                      is_third_party_toml: bool,
                                      line: str) -> CargoTreeDependency:
    m = re.search(consts.CARGO_DEPS_REGEX, line)
    if not m:
        return None
    if args.verbose:
        print(m.group(0))

    dep_name = m.group("name")
    dep_version = m.group("version")
    dep_path = m.group("path") if "path" in m.groupdict() else None
    dep_key = cargo.CrateKey(dep_name, dep_version)

    dep_features = m.group("features").split(",") if m.group("features") else []
    dep_isprocmacro = bool(m.group("isprocmacro"))

    parse_ext_key = None
    if is_third_party_toml:
        parse_ext_key = "dependencies" if dep_name in cargo_toml.get(
            "dependencies",
            {}) else "dev-dependencies" if dep_name in cargo_toml.get(
                "dev-dependencies", {}) else None

    if not parse_ext_key:
        # Extensions from third_party.toml that aren't in normal Cargo.toml,
        # these are the defaults for stuff outside of third_party.toml.
        for_first_party_code = False
        build_script_outputs = set()
    else:
        for_first_party_code = True
        build_script_outputs = set()
        # Usually the dependency value is just a version number, but if it
        # is a dict, then it can declare values for extensions.
        extensions = cargo_toml[parse_ext_key][dep_name]
        if type(extensions) is dict:
            build_script_outputs = set(
                extensions.get("build-script-outputs", build_script_outputs))
            for_first_party_code = extensions.get("allow-first-party-usage",
                                                  for_first_party_code)
    return CargoTreeDependency(dep_key,
                               full_version=dep_version,
                               crate_path=dep_path,
                               features=dep_features,
                               is_for_first_party_code=for_first_party_code,
                               build_script_outputs=build_script_outputs)


def _add_edges_for_dep_on_target_arch(
        build_data_set: BuildData, parent_crate_key: cargo.CrateKey,
        parent_arch_specific: ArchSpecific, parent_usage: cargo.CrateUsage,
        parent_output: cargo.CrateBuildOutput, dep: CargoTreeDependency,
        target_arch: str, new_keys: set[cargo.CrateKey]) -> set[cargo.CrateKey]:
    """Adds edges between a parent crate and a dependency crate.

    Args:
        build_data_set: The data structure holding all the dependency edges.
        parent_crate_key: The parent crate. Can be None if the parent is our
            third_party.toml which is not actually a crate at all.
        parent_arch_specific: Data about the parent crate's relationship to
            target architectures, which can apply transitively to dependencies
            as well.
        parent_usage: The parent crate may be building a library for use by
            other crates. Those (ancestor) crates build different outputs such
            as normal binaries, build scripts, and tests. The `parent_usage` is
            which mode of the parent crate we are adding edges to dependencies
            from. The usage controls what features are enabled in the parent
            crate, which can influence its dependencies or configuration.
        parent_output: The parent crate itself also has these various build
            outputs and we're currently adding edges for one of those outputs.
            The `parent_output` specifies which build artifact from the parent
            we're adding dependency edges for.
        dep: Information about the dependency crate.
        target_arch: The target architecture we're currently evaluating. We may
            apply the edges more generally to multiple architectures
        new_keys: An out-param of crates that will need to be (re)visited to
            examine for dependencies.

    """
    archset_for_new_edges = parent_arch_specific.apply_results_to(target_arch)

    # First ensure this dependency crate exists in the `build_data_set` under
    # all CrateUsages.
    for u in cargo.CrateUsage:
        with build_data_set.per_crate_for_usage(u, dep.key) as crate_data:
            crate_data.for_first_party |= dep.is_for_first_party_code
            crate_data.build_script_outputs |= dep.build_script_outputs
            crate_data.cargo_full_versions.update(
                {dep.full_version: dep.crate_path})

    # The parent crate may use a dependency in various ways: to produce normal
    # output, a build script, or tests. We examine just one in this function.
    # Based on that usage from the parent, from the perspective of the
    # dependency, this is how it is being used.
    dep_usage = parent_output.as_dep_usage()

    # Determine if the dependency crate should be considered arch-specific.
    if parent_arch_specific.is_used_as_arch_specific_dep:
        # Firstly, arch-specific-ness is transitive. So if this crate is
        # arch-specific (is used as an arch-specific dependency), then its
        # dependencies are also. But if not, then we look for arch-specific
        # dependency edges come out of this crate to the dependency.
        dep_is_arch_specific = True
    else:
        # For this dependency of the crate, we need to see if a "target" rule
        # affects it, in which case it is considered arch-specific and we will
        # need to compute its deps/features for all configs.
        dep_is_arch_specific = \
            dep.key in parent_arch_specific.arch_specific_deps

    # Update the PreCrateData of the dependency crate. Any changes in here mean
    # we will need to (re)visit the dependency crate to determine its transitive
    # dependencies. This data is about how to build the dependency crate based
    # on its incoming edges from the parent.
    changed = False
    with build_data_set.per_crate_for_usage(dep_usage,
                                            dep.key) as dep_crate_data:
        # This marks the dependency crate as being used on the given target
        # architectures.
        changed |= dep_crate_data.used_on_archs.add_archset(
            archset_for_new_edges)

        if dep_is_arch_specific and not dep_crate_data.arch_specific:
            dep_crate_data.arch_specific = True
            changed = True

        for f in dep.features:
            with dep_crate_data.features.archset_for_feature(f) as archset:
                changed |= archset.add_archset(archset_for_new_edges)

    if changed:
        new_keys.add(dep.key)

    # Add outgoing edges from the parent crate to the dependency crate. If the
    # parent is the virtual third_party.toml crate, then the crate key will not
    # exist, and we don't need to set up any edges.
    if not parent_crate_key:
        return

    with build_data_set.for_usage(parent_usage) as for_usage:
        assert parent_crate_key in for_usage.all_crates()
        with for_usage.per_crate(parent_crate_key) as parent_data:
            with parent_data.deps[parent_output].dep_data(dep.key) as dep_data:
                # Adds the edge from parent to dependency for all given target
                # architectures.
                dep_data.archset.add_archset(archset_for_new_edges)
    return


def _collect_deps_for_crate(args: argparse.Namespace,
                            cargo_toml_path: str,
                            build_data_set: BuildData,
                            usage: cargo.CrateUsage,
                            is_third_party_toml: bool,
                            crate_key: cargo.CrateKey = None,
                            depth: int = None) -> set[cargo.CrateKey]:
    """Runs `cargo tree` and collects all dependency data for a specific crate.

    Args:
        args: The command line args that were given to the crates.py tool.
        cargo_toml_path: The Cargo.toml file for the crate to collect from.
        build_data_set: The BuildData structure to collect data into.
        usage: Dependencies are different depending how the crate is being used,
          this specifies to look for dependencies for a particular usage.
        is_third_party_toml: True if the crate being collected from is the
          synthetic top level crate in `third_party.toml`.
        crate_key: The CrateKey of the crate at cargo_toml_path. It will be None
          if `is_third_party_toml` is true, since there's no real crate there.
        depth: How deep to look for dependencies with `cargo tree`. Defaults to
          None, which will look through the whole tree.

    Returns:
        A set of crates for which we will need to run this function on. Either
        they have never been seen before, or the data we collected for the
        crate in the past has been diritied.
    """
    assert is_third_party_toml != bool(crate_key)

    # New dependencies, or ones with new features, that need to be visited
    # by this function.
    new_keys: set[cargo.CrateKey] = set()

    cargo_toml = common.load_toml(cargo_toml_path)

    arch_specific = _get_archs_of_interest(
        cargo_toml,
        build_data_set.get_per_crate_data_copy(usage, crate_key)
        if crate_key else None, args.target)

    # The crate can have a few different types of output: normal output like a
    # library, its build script, and its tests. We walk over and construct
    # the dependency graph for each.
    first = True
    for output_type in cargo.CrateBuildOutput:
        if output_type == cargo.CrateBuildOutput.TESTS:
            if not args.with_tests and not is_third_party_toml:
                continue
        for target_arch in arch_specific.archs_to_test():
            if is_third_party_toml:
                # For the root third_party.toml crate, nothing can depend on it,
                # so there's no features (and no crate_key).
                crate_derived_features = []
            else:
                crate_derived_features = _crate_features_on_target_arch(
                    build_data_set.get_per_crate_data_copy(usage, crate_key),
                    target_arch)

            if args.verbose:
                if first:
                    first = False
                    print()
                print("output: {} target: {}".format(output_type, target_arch))
            cargo_tree_output = cargo.run_cargo_tree(cargo_toml_path,
                                                     output_type, target_arch,
                                                     depth,
                                                     crate_derived_features)
            for line in cargo_tree_output:
                tree_dependency = _parse_cargo_tree_dependency_line(
                    args, cargo_toml, is_third_party_toml, line)
                if not tree_dependency:
                    # Some lines of `cargo tree` output may not point at a
                    # dependency.
                    continue
                _add_edges_for_dep_on_target_arch(build_data_set, crate_key,
                                                  arch_specific, usage,
                                                  output_type, tree_dependency,
                                                  target_arch, new_keys)
    return new_keys


def _gen_build_rule(args: argparse.Namespace, build_data_set: BuildData,
                    crate_key: cargo.CrateKey) -> BuildRule:
    """Generate a BuildRule for a single crate from the BuildData structure.

    Args:
        args: The command-line arguments.
        build_data_set: The fully-generated dependency tree data structure.
        crate_key: The crate to generate a BuildRule for, which will be used to
          generate its BUILD file.

    Returns:
        A BuildRule for the crate, which can be used to generate its BUILD file.
    """
    crate_name = crate_key.name
    crate_epoch = crate_key.epoch

    # We need the contents of the Cargo.toml to get a few pieces of data that
    # aren't part of the BuildData structure.
    cargo_toml = common.load_toml(
        common.os_crate_cargo_dir(crate_name,
                                  crate_epoch,
                                  rel_path=["Cargo.toml"]))

    build_rule = BuildRule(crate_name, crate_epoch)

    cargo_toml_has_lib_key = "lib" in cargo_toml
    cargo_toml_has_bin_key = "bin" in cargo_toml

    # Cargo defaults to 2015 for backward compatibility if it's not specified.
    build_rule.edition = cargo_toml["package"].get("edition", "2015")

    # Gather some cargo metadata which we will want to pass to rustc using
    # environment variables in case the crates use 'crate_authors!' or similar.
    build_rule.cargo_pkg_authors = cargo_toml["package"].get("authors")
    build_rule.cargo_pkg_version = cargo_toml["package"].get("version")
    build_rule.cargo_pkg_name = cargo_toml["package"].get("name")
    build_rule.cargo_pkg_description = cargo_toml["package"].get("description")

    # Prefix from the BUILD.gn file to the crate's source files.
    path_prefix = ("/".join(consts.CRATE_INNER_DIR) +
                   "/") if consts.CRATE_INNER_DIR else ""

    # We compute the lib type, but don't store it in BuildRule until we know
    # there is a lib target in the crate.
    lib_type = "rlib"
    if cargo_toml_has_lib_key:
        lib_type = "proc-macro" if cargo_toml["lib"].get("proc-macro",
                                                         False) else "rlib"

    default_lib_root_os_path = common.os_crate_cargo_dir(
        crate_name, crate_epoch, rel_path=["src", "lib.rs"])
    if cargo_toml_has_lib_key and "path" in cargo_toml["lib"]:
        build_rule.lib_root = path_prefix + cargo_toml["lib"]["path"]
    elif os.path.exists(default_lib_root_os_path):
        build_rule.lib_root = path_prefix + "src/lib.rs"

    if build_rule.lib_root:
        build_rule.lib_type = lib_type
        # TODO(danakj): Find all the sources.
        build_rule.lib_sources = [build_rule.lib_root]

    if cargo_toml_has_bin_key:
        for toml_bin in cargo_toml["bin"]:
            name = toml_bin["name"]

            default_bin_root_os_path = common.os_crate_cargo_dir(
                crate_name, crate_epoch, rel_path=["src", "main.rs"])
            named_bin_root_os_path = common.os_crate_cargo_dir(
                crate_name, crate_epoch, rel_path=["src", "bin", name + ".rs"])

            if "path" in toml_bin:
                path = path_prefix + toml_bin["path"]
            elif os.path.exists(named_bin_root_os_path):
                path = path_prefix + "src/bin/{}.rs".format(name)
            else:
                assert os.path.exists(default_bin_root_os_path)
                path = path_prefix + "src/main.rs"

            build_rule.bins += [{
                "name": name,
                "root": path,
                # TODO(danakj): Find all the sources.
                "sources": [path],
            }]

    build_script_root = None
    if "build" in cargo_toml["package"]:
        if cargo_toml["package"]["build"]:  # May be `false`
            build_script_root = path_prefix + cargo_toml["package"]["build"]
    elif os.path.exists(
            common.os_crate_cargo_dir(crate_name,
                                      crate_epoch,
                                      rel_path=["build.rs"])):
        build_script_root = path_prefix + "build.rs"

    if build_script_root:
        build_rule.build_root = build_script_root
        # TODO(danakj): Find all the sources.
        build_rule.build_sources = [build_rule.build_root]
        # The third_party.toml specifies this, as it has to come from a human.
        with build_data_set.for_normal.per_crate(crate_key) as crate_data:
            with build_data_set.for_buildrs.per_crate(
                    crate_key) as for_build_data:
                with build_data_set.for_tests.per_crate(
                        crate_key) as for_tests_data:
                    # We assume that the build.rs has the same outputs no matter
                    # what features are going to be enabled, but if that's not
                    # true, then we need a way to specify in the
                    # third_party.toml which outputs to expect depending on how
                    # the crate's being used. And then these asserts could fail.
                    # Then we would need to store these on the BuildRuleUsage
                    # instead of on the BuildRule.
                    assert crate_data.build_script_outputs == \
                        for_build_data.build_script_outputs
                    assert crate_data.build_script_outputs == \
                        for_tests_data.build_script_outputs
            build_rule.build_script_outputs = \
                list(crate_data.build_script_outputs)

    # Each usage can define its own set of features/deps and will turn into a
    # separate GN target.
    for usage in cargo.CrateUsage:
        with build_data_set.per_crate_for_usage(usage, crate_key) as crate_data:
            if not crate_data.used_on_archs:
                continue

            normal_deps: DepSet = crate_data.deps[cargo.CrateBuildOutput.NORMAL]
            build_deps: DepSet = crate_data.deps[cargo.CrateBuildOutput.BUILDRS]
            if usage == cargo.CrateUsage.FOR_NORMAL:
                dev_deps: DepSet = crate_data.deps[cargo.CrateBuildOutput.TESTS]
            else:
                # We only care to run tests for the featureset used at runtime.
                dev_deps = DepSet()

            build_rule_usage = build_rule.get_usage(usage)
            build_rule_usage.for_first_party = crate_data.for_first_party
            build_rule_usage.used_on_archs = compiler.BuildConditionSet(
                crate_data.used_on_archs)
            build_rule_usage.features = [
                (name, compiler.BuildConditionSet(archset))
                for (name, archset) in crate_data.features.all_archsets()
            ]

            build_rule_usage.deps = [{
                "deppath":
                common.gn_crate_path(k.name, k.epoch) + ":" +
                cargo.CrateBuildOutput.NORMAL.gn_target_name_for_dep(),
                "compile_modes":
                compiler.BuildConditionSet(depdata.archset),
            } for (k, depdata) in normal_deps.all_deps_data()]

            build_rule_usage.build_deps = [{
                "deppath":
                common.gn_crate_path(k.name, k.epoch) + ":" +
                cargo.CrateBuildOutput.BUILDRS.gn_target_name_for_dep(),
                "compile_modes":
                compiler.BuildConditionSet(depdata.archset),
            } for (k, depdata) in build_deps.all_deps_data()]

            build_rule_usage.dev_deps = [{
                "deppath":
                common.gn_crate_path(k.name, k.epoch) + ":" +
                cargo.CrateBuildOutput.TESTS.gn_target_name_for_dep(),
                "compile_modes":
                compiler.BuildConditionSet(depdata.archset),
            } for (k, depdata) in dev_deps.all_deps_data()]

    build_rule.sort_internals()
    if args.verbose:
        print()
        print("build_rule for {}", crate_key)
        pprint(vars(build_rule))
        print("for {normal}: ")
        pprint(vars(build_rule.normal_usage))
        print("for buildrs: ")
        pprint(vars(build_rule.buildrs_usage))
        print("for test: ")
        pprint(vars(build_rule.test_usage))
    return build_rule