summaryrefslogtreecommitdiffstats
path: root/chromium/tools/crates/lib/gen.py
blob: 3b6c57d0ed9fd20552d89970a3a5fc3a87f63eaf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
# python3
# Copyright 2021 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""The gen module implements the gen action.

The gen action will generate the dependency graph of from crates rooted in
`third_party/rust/third_party.toml` including transitive dependencies which are
all found in `third_party/rust/`, and then write the BUILD.gn files required to
build each such crate."""

from __future__ import annotations

from typing import Iterator
from contextlib import contextmanager
from datetime import datetime
from pprint import pprint
import argparse
import tempfile
import os
import re
import sys

from lib.build_rule import BuildRule
from lib import common
from lib import compiler
from lib import consts
from lib import cargo


class BuildData:
    """The root of the crate dependency tree.

    This holds information collected from the dependency tree that is used to
    generate BUILD.gn rules. This data needs to be collected and merged from
    across many crates, as each crate `X` depending on `Z` will contribute to
    data about what is needed from `Z`."""

    def __init__(self):
        self.for_normal = DataForUsage()
        self.for_buildrs = DataForUsage()
        self.for_tests = DataForUsage()

    @contextmanager
    def for_usage(self, usage: cargo.CrateUsage) -> Iterator[DataForUsage]:
        """A ContextManager for accessing the nested `DataForUsage` structure.

        The `DataForUsage` contains data which is different depending how
        crates are being used (for building a library/executable, for building
        a build script, or for building tests).
        """
        try:
            if usage == cargo.CrateUsage.FOR_NORMAL:
                yield self.for_normal
            elif usage == cargo.CrateUsage.FOR_BUILDRS:
                yield self.for_buildrs
            elif usage == cargo.CrateUsage.FOR_TESTS:
                yield self.for_tests
            else:
                assert False  # Unhandled CrateUsage
        finally:
            pass

    def has_crate_for_usage(self, usage: cargo.CrateUsage,
                            crate: cargo.CrateKey) -> bool:
        """Whether a crate has been seen yet as a dependency."""
        with self.for_usage(usage) as for_usage:
            return for_usage.has_crate(crate)

    @contextmanager
    def per_crate_for_usage(self, usage: cargo.CrateUsage,
                            crate: cargo.CrateKey) -> Iterator[PerCrateData]:
        """A shortcut to PerCrateData without going through `DataForUsage`.

        This is equivalent to using `for_usage(usage)` and then
        `per_crate(crate)`."""
        assert crate
        with self.for_usage(usage) as for_usage:
            if not crate in for_usage._map:
                for_usage._map[crate] = PerCrateData()
            try:
                yield for_usage._map[crate]
            finally:
                pass

    def get_per_crate_data_copy(self, usage: cargo.CrateUsage,
                                crate: cargo.CrateKey) -> PerCrateData:
        with self.per_crate_for_usage(usage, crate) as crate_data:
            return crate_data


class DataForUsage:
    """Data that differs depending on how crates are being consumed.

    Crates can be built up to 3 times, for use in building different types of
    targets.
    """

    def __init__(self):
        self._map: dict[cargo.CrateKey, PerCrateData] = {}

    @contextmanager
    def per_crate(self, crate: cargo.CrateKey) -> Iterator[PerCrateData]:
        """A ContextManager to access the PerCrateData structure.

        This contains data about how the crate should be build, how other crates
        depend on it and what it depends on.
        """
        assert crate
        if not crate in self._map:
            self._map[crate] = PerCrateData()
        try:
            yield self._map[crate]
        finally:
            pass

    def has_crate(self, crate: cargo.CrateKey) -> bool:
        """Whether a crate has been seen as a dependency yet.

        Note this includes crates depended on by our fictional top-level
        third_party.toml crate.
        """
        return crate in self._map

    def all_crates(self) -> set[cargo.CrateKey]:
        """All crates that appear as a dependency in the dependency tree.

        Note this includes crates depended on by our fictional top-level
        third_party.toml crate.
        """
        return set(self._map.keys())

    def all_crates_data(self) -> list[tuple[cargo.CrateKey, PerCrateData]]:
        """All crates along with their PerCrateData in the dependency tree.

        Note this includes crates depended on by our fictional top-level
        third_party.toml crate.
        """
        return list(self._map.items())


class PerCrateData:
    """The set of data about a Crate for a particular usage of that crate.

    This defines how other crates depend on this crate, as well as what it
    depends on."""

    def __init__(self):
        # Whether the crate is allowed to be used from first-party code. This is
        # the set of crates depended on by our fictional third_party.toml
        # crate.
        self.for_first_party = False
        # The set of features requested by other crates, and the architectures
        # that they requested them on.
        self.features = FeatureSet()
        # Dependencies of this crate, with different sets for the normal
        # (library or executable) output, the build script, or tests.
        self.deps = {o: DepSet() for o in cargo.CrateBuildOutput}
        # On what architectures this crate is depended on by another crate
        # (including by our fictional top-level third_party.toml). If this is
        # empty, the crate is not used at all (at least under a particular usage
        # aka DataForUsage).
        self.used_on_archs = compiler.ArchSet.EMPTY()
        # Whether the crate is considered "architecture-specific". When true,
        # the crate may have different dependencies or feature-requirements on
        # different architectures, and we must check each architecture for them.
        # As a result, any dependencies of this crate would also be considered
        # as "architecture-specific" and would get this flag set eventually as
        # well.
        self.arch_specific = False
        # The set of files generated from the crate's build.rs build script.
        self.build_script_outputs: set[str] = set()
        # The map of full version numbers that Cargo has requested for the crate
        # + epoch to the path where Cargo said it has found them. The path will
        # be missing if there's no local crate that matches.
        self.cargo_full_versions: map[str, str] = {}


class DepSet:
    """The set of dependencies, and what architectures they are needed on."""

    class Data:
        def __init__(self):
            # The architectures where the dependency is needed.
            self.archset = compiler.ArchSet.EMPTY()

    def __init__(self):
        self._map: dict[cargo.CrateKey, DepSet.Data] = {}

    @contextmanager
    def dep_data(self, dep: cargo.CrateKey) -> Iterator[DepSet.Data]:
        """A ContextManager to access the dependency data of a crate."""
        assert dep
        if not dep in self._map:
            self._map[dep] = DepSet.Data()
        try:
            yield self._map[dep]
        finally:
            pass

    def all_deps(self) -> set[cargo.CrateKey]:
        """The set of all crates which are dependencies, on any architecture."""
        return {k for k in self._map.keys()}

    def all_deps_data(self) -> list[tuple[cargo.CrateKey, DepSet.Data]]:
        """The set of all crates and their dependency data."""
        return [p for p in self._map.items()]


class FeatureSet:
    """The set of features requested of a crate.

    This contains the set of requested features, and the archictures they are
    requested for.
    """

    def __init__(self):
        self._map: dict[str, compiler.ArchSet] = {}

    def has_feature(self, feature: str) -> bool:
        """Whether the named feature is requested for any architecture."""
        return feature in self._map

    @contextmanager
    def archset_for_feature(self, feature: str) -> Iterator[compiler.ArchSet]:
        """A ContextManager to access the ArchSet for anamed feature.

        This is the set of architectures where the feature is requested."""
        assert feature
        if not feature in self._map:
            self._map[feature] = compiler.ArchSet.EMPTY()
        try:
            yield self._map[feature]
        finally:
            pass

    def all_archsets(self) -> list[tuple[str, compiler.ArchSet]]:
        """A set of all features and their feature data."""
        return [p for p in self._map.items()]


def run(args: argparse.Namespace):
    """Entry point for the 'gen' action."""
    # This step constructs a BuildData which has all the information we need to
    # construct build rules in GN. This is slow as it has to query `cargo tree`
    # a lot.
    build_data_set = _construct_build_data_from_3p_crates(args)

    NAME_PRINT_LEN = 30
    num_done = 0
    num_total = len(build_data_set.for_normal.all_crates())
    last_printed = 0
    for crate_key in build_data_set.for_normal.all_crates():
        last_printed = common.print_same_line(
            "Generating BUILD.gn for crates {}/{} {} v{}".format(
                1 + num_done, num_total, crate_key.name, crate_key.epoch),
            last_printed)
        # This uses the entire BuildData structure to construct a BuildRule for
        # each crate, one at at time. The BuildRule is a typed representation of
        # what will end up in the crate's BUILD.gn file.
        build_rule = _gen_build_rule(args, build_data_set, crate_key)
        assert build_rule.normal_usage.used_on_archs or \
            build_rule.buildrs_usage.used_on_archs or \
            build_rule.test_usage.used_on_archs

        copyright_year = _get_copyright_year(
            common.os_crate_version_dir(crate_key.name,
                                        crate_key.epoch,
                                        rel_path=["BUILD.gn"]))
        with open(
                common.os_crate_version_dir(crate_key.name,
                                            crate_key.epoch,
                                            rel_path=["BUILD.gn"]),
                "w") as build_file:
            build_file.write(
                _run_gn_format(build_rule.generate_gn(args, copyright_year)))
        num_done += 1
    common.print_same_line("Generating BUILD.gn for crates {}/{} ".format(
        num_done, num_total),
                           last_printed,
                           done=True)


def _get_copyright_year(path: str) -> str:
    try:
        with open(path, "r") as file:
            top_line = file.readline()
            m = consts.GN_HEADER_YEAR_REGEX.search(top_line)
            if m:
                return m.group("year")
    except FileNotFoundError:
        pass
    return str(datetime.now().year)


def _construct_build_data_from_3p_crates(args: argparse.Namespace) -> BuildData:
    """Read the `third_party.toml` data and populate a BuildData from it.

    Returns a dictionary containing information about all third-party crate
    dependencies, including transitive dependencies, build dependencies and
    dev (test-only) dependencies. They are all generated from the set of
    first-party dependencies (and their features) declared in
    `${THIRD_PARTY}/third_party.toml`.
    """
    toml_3p = common.load_toml(
        common.os_third_party_dir(rel_path=["third_party.toml"]))
    assert "dependencies" in toml_3p, \
        "third_party.toml must specify 'dependencies'"

    # Validate the dependencies listed in third_party.toml.
    TOML_DEPS_KEYS = ["dependencies", "build-dependencies", "dev-dependencies"]
    for toml_key in TOML_DEPS_KEYS:
        if not toml_key in toml_3p:
            continue
        for (dep, details) in toml_3p[toml_key].items():
            version = details["version"] if isinstance(details,
                                                       dict) else details
            # Verify that the version is specified as an epoch.
            epoch = common.version_epoch_dots(version)
            assert version == epoch, \
                "third_party.toml must specify version as an epoch for " \
                " '{}', " "found '{}', did you mean '{}'?".format(
                    dep, version, epoch)

            # Verify that a crate is only listed once.
            for other_toml_key in [k for k in TOML_DEPS_KEYS if k != toml_key]:
                if other_toml_key not in toml_3p:
                    continue
                assert not dep in toml_3p[other_toml_key], \
                    "A crate may only appear in one section of " \
                    "third_party.toml, but '{}' appears more than " \
                    "once.".format(dep)

    # For every crate in third_party, we will generate a patch to redirect
    # crates.io to that directory, so that if we have local changes to the
    # Cargo.toml files, running `cargo tree` will see them. To do this we
    # save a list of all 3p crates.
    list_of_3p_cargo_toml = cargo.gen_list_of_3p_cargo_toml()

    # We can't use these to see the deps tree, as it will put everything as a
    # dep of the root target, but it's helpful for failing faster on missing
    # crates.
    transitive_normal_build_data_set = BuildData()

    # The set of info needed to make a GN target for the crate, in each
    # CrateUsage.
    build_data_set = BuildData()

    last_printed = 0

    # We will take the third-party dependencies, specified in a toml format,
    # and build a Cargo.toml file for running `cargo tree` against.
    with tempfile.TemporaryDirectory() as workdir:
        # First we make a copy of the third-party deps input as-is, but with
        # additional required fields added. We can use that with `cargo tree` to
        # get a full set of all dependencies, including transitive ones.
        cargo_toml_path = cargo.write_cargo_toml_in_tempdir(
            workdir,
            list_of_3p_cargo_toml,
            orig_toml_parsed=cargo.add_required_cargo_fields(toml_3p),
            verbose=args.verbose)

        # This collects the direct dependencies from first-party code. For
        # normal dependencies, cargo can output them all here, which we collect
        # up front to try print out all the missing ones at once. This would
        # confuse our actual dependency calculations though, since we would
        # have all the deps on a single build target (the third-party root). So
        # we will have to throw them away.
        last_printed = common.print_same_line(
            "Collecting all transitive normal dependencies.", last_printed)
        _collect_deps_for_crate(args, cargo_toml_path,
                                transitive_normal_build_data_set,
                                cargo.CrateUsage.FOR_NORMAL, True)
        # We don't look for blocked stuff here since we won't have the data to
        # say what's depending on them yet anyways.
        _look_for_missing_or_blocked_deps(transitive_normal_build_data_set,
                                          find_blocked=False)

        last_printed = common.print_same_line(
            "Collecting top-level normal dependencies.", last_printed)
        _collect_deps_for_crate(args,
                                cargo_toml_path,
                                build_data_set,
                                cargo.CrateUsage.FOR_NORMAL,
                                True,
                                depth=1)
        last_printed = common.print_same_line(
            "Collecting top-level build-dependencies.", last_printed)
        _collect_deps_for_crate(args,
                                cargo_toml_path,
                                build_data_set,
                                cargo.CrateUsage.FOR_BUILDRS,
                                True,
                                depth=1)

        last_printed = common.print_same_line(
            "Collecting top-level dev-dependencies.", last_printed)
        _collect_deps_for_crate(args,
                                cargo_toml_path,
                                build_data_set,
                                cargo.CrateUsage.FOR_TESTS,
                                True,
                                depth=1)

    # Now we have enough to build our first-party code, from prebuilts of the
    # dependencies. But we don't have enough to build each dependency. We have
    # to query `cargo tree` for each one to learn what's needed to build it.
    #
    # So here we walk through each crate and query its Cargo.toml for its build
    # and dev dependencies.
    #
    # This step is slow...
    crate_keys_left = build_data_set.for_normal.all_crates()
    num_done = 0
    # TODO: Cache this work somewhere in `consts.THIRD_PARTY`.
    while crate_keys_left:
        crate_key = crate_keys_left.pop()
        crate_name = crate_key.name
        crate_epoch = crate_key.epoch

        orig_cargo_toml_path = common.os_crate_cargo_dir(
            crate_name, crate_epoch, rel_path=["Cargo.toml"])

        # Skip missing dependencies. We will print them all at the end.
        if not os.path.exists(orig_cargo_toml_path):
            num_done += 1
            continue

        with tempfile.TemporaryDirectory() as workdir:
            tmp_cargo_toml_path = cargo.write_cargo_toml_in_tempdir(
                workdir,
                list_of_3p_cargo_toml,
                orig_toml_path=orig_cargo_toml_path,
                verbose=args.verbose)

            last_printed = common.print_same_line(
                "Collecting normal dependencies: {}/{} {} v{}".format(
                    1 + num_done, 1 + num_done + len(crate_keys_left),
                    crate_name, crate_epoch), last_printed)
            new_keys = _collect_deps_for_crate(args,
                                               tmp_cargo_toml_path,
                                               build_data_set,
                                               cargo.CrateUsage.FOR_NORMAL,
                                               False,
                                               crate_key=crate_key,
                                               depth=1)
            crate_keys_left.update(new_keys)

            last_printed = common.print_same_line(
                "Collecting build dependencies: {}/{} {} v{}".format(
                    1 + num_done, 1 + num_done + len(crate_keys_left),
                    crate_name, crate_epoch), last_printed)
            new_keys = _collect_deps_for_crate(args,
                                               tmp_cargo_toml_path,
                                               build_data_set,
                                               cargo.CrateUsage.FOR_BUILDRS,
                                               False,
                                               crate_key=crate_key,
                                               depth=1)
            crate_keys_left.update(new_keys)
            find_test_deps = args.with_tests
            with build_data_set.for_tests.per_crate(crate_key) as per_crate:
                if per_crate.for_first_party:
                    find_test_deps = True
            if find_test_deps:
                last_printed = common.print_same_line(
                    "Collecting test dependencies: {}/{} {} v{}".format(
                        1 + num_done, 1 + num_done + len(crate_keys_left),
                        crate_name, crate_epoch), last_printed)
                new_keys = _collect_deps_for_crate(args,
                                                   tmp_cargo_toml_path,
                                                   build_data_set,
                                                   cargo.CrateUsage.FOR_TESTS,
                                                   False,
                                                   crate_key=crate_key,
                                                   depth=1)
                crate_keys_left.update(new_keys)

        num_done += 1
    common.print_same_line("Collecting dependencies: {}/{} ".format(
        num_done, num_done),
                           last_printed,
                           done=True)

    if args.verbose:
        pprint(build_data_set)
        print()

    # If anything was missing, we will abort here and print them out all at
    # once.
    _look_for_missing_or_blocked_deps(build_data_set)

    return build_data_set


def _run_gn_format(build_gn: str) -> str:
    """Formats the input GN file contents, and returns the formatted output."""
    import subprocess
    try:
        return subprocess.check_output(["gn", "format", "--stdin"],
                                       stderr=subprocess.PIPE,
                                       text=True,
                                       input=build_gn)
    except subprocess.CalledProcessError as e:
        print()
        print("stdout")
        print(e.stdout)
        print()
        print("stderr")
        print(e.stderr)
        raise e


def _look_for_missing_or_blocked_deps(build_data_set: BuildData,
                                      find_blocked: bool = True):
    """Find crate dependencies for which we have no local copy downloaded.

    If `find_blocked` is true, this also looks for dependencies listed in
    Cargo.toml files to crates that are considered blocked, and thus should not
    be used.

    If a problem is found, this function will terminate the program with
    `exit(1)`.
    """
    missing: list[tuple[str, str]] = []
    missing_version: list[tuple[str, str, str]] = []
    blocked: list[tuple[str, str]] = []
    # Every crate appears in all 3 CrateUsage entries, so we can just look at
    # one.
    for crate_key in build_data_set.for_normal.all_crates():
        name = crate_key.name
        epoch = crate_key.epoch
        if name in consts.BLOCKED_CRATES:
            if find_blocked:
                reason = consts.BLOCKED_CRATES[name]
                blocked += [(name, reason)]
            continue

        with build_data_set.for_normal.per_crate(crate_key) as data:
            for (ver, dir) in data.cargo_full_versions.items():
                if dir:
                    continue  # If cargo gave a path then it was found locally.
                epoch_path = common.os_crate_cargo_dir(name,
                                                       epoch,
                                                       rel_path=["Cargo.toml"])
                if not os.path.exists(epoch_path):
                    missing += [(name, ver, epoch)]
                else:
                    missing_version += [(name, ver, epoch)]

    if missing or missing_version or blocked:
        print(file=sys.stderr)
        print(file=sys.stderr)

    if missing_version:
        print("Missing correct version for {} dependencies:".format(
            len(missing_version)),
              file=sys.stderr)
        for (name, ver, epoch) in missing_version:
            print("Cargo wants version \"{}\" for crate \"{}\". But a different"
                  " version is downloaded for epoch v{}.".format(
                      ver, name, epoch),
                  file=sys.stderr)
        print(
            "To resolve these errors, remove the epoch and download the exact"
            " version by specifying it in third_party.toml. If multiple exact"
            " versions are required, we will need some sort of extension to"
            " third_party.toml to support this.",
            file=sys.stderr)
        print(file=sys.stderr)
    if missing:
        print("Missing {} dependencies. To download:".format(len(missing)),
              file=sys.stderr)
        for (name, ver, epoch) in missing:
            print("{} download {} {} --security-critical=yes_or_no".format(
                sys.argv[0], name, ver),
                  file=sys.stderr)
        print(file=sys.stderr)
    if blocked:
        for (name, reason) in blocked:
            found_from = []
            for usage in cargo.CrateUsage:
                with build_data_set.for_usage(usage) as for_usage:
                    for (from_key,
                         per_crate_data) in for_usage.all_crates_data():
                        for (usage, depset) in per_crate_data.deps.items():
                            for dep_key in depset.all_deps():
                                if (dep_key.name in consts.BLOCKED_CRATES
                                        and not from_key in found_from):
                                    found_from += [from_key]
            print("Found dependency on blocked crate \"{}\" from:".format(name),
                  file=sys.stderr)
            for crate_key in found_from:
                print("    {} v{}".format(
                    crate_key.name,
                    common.version_epoch_normalized(crate_key.epoch)),
                      file=sys.stderr)
            print("  Reason: {}".format(reason), file=sys.stderr)
        print("Remove blocked crates from any Cargo.toml files "
              "in {} before proceeding.".format(
                  os.path.join(*consts.THIRD_PARTY)),
              file=sys.stderr)
        print(
            "Note: You will also need to remove usage of the blocked "
            "crate from the dependent crate(s) as well.",
            file=sys.stderr)
        print(file=sys.stderr)

    if missing or missing_version or blocked:
        exit(1)


class ArchSpecific:
    def __init__(self, archset: compiler.ArchSet):
        self.archset = archset
        # Whether the crate has arch-specific dependencies. That is to say,
        # outgoing edges are arch-dependent.
        self.arch_specific_deps: list[cargo.CrateKey] = []
        # Whether the crate is an arch-specific dep from some other crate. That
        # is to say, incoming edges are arch-dependent.
        self.is_used_as_arch_specific_dep = False

    def archs_to_test(self) -> set[str]:
        """The ArchSet to test against as a list of target strings.

        These are strings we can pass to Cargo or rustc, and can build other
        ArchSets from.
        """
        return self.archset.as_strings()

    def is_single_arch(self) -> bool:
        """If we can consider only one architecture for accurate results."""
        return (not self.arch_specific_deps
                and not self.is_used_as_arch_specific_dep)

    def apply_results_to(self, target_arch: str) -> compiler.ArchSet:
        """Return an ArchSet to use for any new outgoing dependency edges."""
        if self.is_single_arch():
            # If we're applying results to other architectures, we expect that
            # we are only testing one.
            assert (len(self.archs_to_test()) == 1)
            # We're only checking one arch but if a dep is found, it should
            # be enabled for all archs. We're only checking one arch because
            # we have no arch-specific rules.
            return compiler.ArchSet.ALL()
        assert (target_arch in self.archs_to_test())
        # We need to check each arch, so only add deps to the arch they
        # are found on.
        return compiler.ArchSet(initial={target_arch})


def _get_archs_of_interest(cargo_toml: dict, crate_usage_data: PerCrateData,
                           user_target_arch: str) -> ArchSpecific:
    """Determine which archs to check dependencies for a crate.

    Args:
        cargo_toml: The Cargo.toml file of the crate as a dict.
        crate_usage_data: Data about how the crate is used by other crates. It
            can be None, in which case, it's assumed to not be depended on by
            any other crates.
        target: The user-specific target (one of the keys from
            compiler.RUSTC_ARCH_TO_BUILD_CONDITION) to generate build files
            for. If specified, only that target will be considered. Otherwise,
            all possible targets would be considered.
    """
    if user_target_arch:
        return ArchSpecific(compiler.ArchSet(initial={user_target_arch}))

    # Whether the crate has arch-specific dependencies. That is to say,
    # outgoing edges are arch-dependent.
    has_arch_specific_deps = False
    if "target" in cargo_toml:
        for target_data in cargo_toml["target"].values():
            if "dependencies" in target_data:
                has_arch_specific_deps = True
                break

    # Whether the crate is an arch-specific dep from some other crate. That
    # is to say, incoming edges are arch-dependent.
    is_used_as_arch_specific_dep = crate_usage_data.arch_specific \
        if crate_usage_data else False

    # When there's no arch-specific incoming or outgoing edges, we can just
    # choose any target architecture and collect its dependencies there,
    # applying them too all architectures.
    if not is_used_as_arch_specific_dep and not has_arch_specific_deps:
        # We can just test one arch even though it'll apply to all.
        return ArchSpecific(compiler.ArchSet.ONE())

    # This crate has architecture-specific dependencies, or is part of
    # an architecture-specific dependency chain.

    if crate_usage_data:
        a = ArchSpecific(crate_usage_data.used_on_archs)
    else:
        a = ArchSpecific(compiler.ArchSet.ALL())
    if has_arch_specific_deps:
        # A list of (crate name, dependency data) for each dependency that is
        # found under a "target" rule in the Cargo.toml.
        targetted_deps = []
        for (target_name, target_data) in cargo_toml["target"].items():
            targetted_deps += list(
                target_data.get("dependencies", dict()).items())
        # Convert the list of (crate name, dependency data) into a more useful
        # list of `cargo.CrateKey`s.

        def version_from_maybe_dict(maybe_dict: dict | str) -> str:
            if type(maybe_dict) is dict:
                return maybe_dict["version"]
            else:
                return maybe_dict

        a.arch_specific_deps = [
            cargo.CrateKey(crate_name, version_from_maybe_dict(dep_data))
            for (crate_name, dep_data) in targetted_deps
        ]
    a.is_used_as_arch_specific_dep = is_used_as_arch_specific_dep
    return a


def _crate_features_on_target_arch(crate_usage_data: PerCrateData,
                                   target_arch: str) -> list[str]:
    """Returns the subset of features requested of the crate for the target.

    A crate has a set of features that are enabled by the union of all other
    crates that depend upon it. The incoming dependency edges specify which
    features are requested.

    But some of those edges may be arch-specific. We return the set of features
    that are requested for the specific architecture named in `target_arch`.
    """
    all_features = crate_usage_data.features.all_archsets()
    return [f for (f, archset) in all_features if archset.has_arch(target_arch)]


class CargoTreeDependency:
    def __init__(self,
                 key: cargo.CrateKey,
                 full_version: str = None,
                 crate_path: str = None,
                 features: list[str] = [],
                 is_for_first_party_code: bool = False,
                 build_script_outputs: set[str] = set()):
        self.key = key
        self.full_version = full_version
        self.crate_path = crate_path
        self.features = features
        self.is_for_first_party_code = is_for_first_party_code
        self.build_script_outputs = build_script_outputs

    def __eq__(self, other: CargoTreeDependency) -> bool:
        return (self.key == other.key
                and self.full_version == other.full_version
                and self.crate_path == other.crate_path
                and self.features == other.features and
                self.is_for_first_party_code == other.is_for_first_party_code
                and self.build_script_outputs == other.build_script_outputs)

    def __repr__(self) -> str:
        return "CargoTreeDependency(key={}, full_version={}, crate_path={}, " \
            "features={}, " \
            "is_for_first_party_code={}, " \
            "build_script_outputs={})".format(
                self.key, self.full_version, self.crate_path, self.features,
                self.is_for_first_party_code,
                self.build_script_outputs)


def _parse_cargo_tree_dependency_line(args: argparse.Namespace,
                                      cargo_toml: dict,
                                      is_third_party_toml: bool,
                                      line: str) -> CargoTreeDependency:
    m = re.search(consts.CARGO_DEPS_REGEX, line)
    if not m:
        return None
    if args.verbose:
        print(m.group(0))

    dep_name = m.group("name")
    dep_version = m.group("version")
    dep_path = m.group("path") if "path" in m.groupdict() else None
    dep_key = cargo.CrateKey(dep_name, dep_version)

    dep_features = m.group("features").split(",") if m.group("features") else []
    dep_isprocmacro = bool(m.group("isprocmacro"))

    parse_ext_key = None
    if is_third_party_toml:
        parse_ext_key = "dependencies" if dep_name in cargo_toml.get(
            "dependencies",
            {}) else "dev-dependencies" if dep_name in cargo_toml.get(
                "dev-dependencies", {}) else None

    if not parse_ext_key:
        # Extensions from third_party.toml that aren't in normal Cargo.toml,
        # these are the defaults for stuff outside of third_party.toml.
        for_first_party_code = False
        build_script_outputs = set()
    else:
        for_first_party_code = True
        build_script_outputs = set()
        # Usually the dependency value is just a version number, but if it
        # is a dict, then it can declare values for extensions.
        extensions = cargo_toml[parse_ext_key][dep_name]
        if type(extensions) is dict:
            build_script_outputs = set(
                extensions.get("build-script-outputs", build_script_outputs))
            for_first_party_code = extensions.get("allow-first-party-usage",
                                                  for_first_party_code)
    return CargoTreeDependency(dep_key,
                               full_version=dep_version,
                               crate_path=dep_path,
                               features=dep_features,
                               is_for_first_party_code=for_first_party_code,
                               build_script_outputs=build_script_outputs)


def _add_edges_for_dep_on_target_arch(
        build_data_set: BuildData, parent_crate_key: cargo.CrateKey,
        parent_arch_specific: ArchSpecific, parent_usage: cargo.CrateUsage,
        parent_output: cargo.CrateBuildOutput, dep: CargoTreeDependency,
        target_arch: str, new_keys: set[cargo.CrateKey]) -> set[cargo.CrateKey]:
    """Adds edges between a parent crate and a dependency crate.

    Args:
        build_data_set: The data structure holding all the dependency edges.
        parent_crate_key: The parent crate. Can be None if the parent is our
            third_party.toml which is not actually a crate at all.
        parent_arch_specific: Data about the parent crate's relationship to
            target architectures, which can apply transitively to dependencies
            as well.
        parent_usage: The parent crate may be building a library for use by
            other crates. Those (ancestor) crates build different outputs such
            as normal binaries, build scripts, and tests. The `parent_usage` is
            which mode of the parent crate we are adding edges to dependencies
            from. The usage controls what features are enabled in the parent
            crate, which can influence its dependencies or configuration.
        parent_output: The parent crate itself also has these various build
            outputs and we're currently adding edges for one of those outputs.
            The `parent_output` specifies which build artifact from the parent
            we're adding dependency edges for.
        dep: Information about the dependency crate.
        target_arch: The target architecture we're currently evaluating. We may
            apply the edges more generally to multiple architectures
        new_keys: An out-param of crates that will need to be (re)visited to
            examine for dependencies.

    """
    archset_for_new_edges = parent_arch_specific.apply_results_to(target_arch)

    # First ensure this dependency crate exists in the `build_data_set` under
    # all CrateUsages.
    for u in cargo.CrateUsage:
        with build_data_set.per_crate_for_usage(u, dep.key) as crate_data:
            crate_data.for_first_party |= dep.is_for_first_party_code
            crate_data.build_script_outputs |= dep.build_script_outputs
            crate_data.cargo_full_versions.update(
                {dep.full_version: dep.crate_path})

    # The parent crate may use a dependency in various ways: to produce normal
    # output, a build script, or tests. We examine just one in this function.
    # Based on that usage from the parent, from the perspective of the
    # dependency, this is how it is being used.
    dep_usage = parent_output.as_dep_usage()

    # Determine if the dependency crate should be considered arch-specific.
    if parent_arch_specific.is_used_as_arch_specific_dep:
        # Firstly, arch-specific-ness is transitive. So if this crate is
        # arch-specific (is used as an arch-specific dependency), then its
        # dependencies are also. But if not, then we look for arch-specific
        # dependency edges come out of this crate to the dependency.
        dep_is_arch_specific = True
    else:
        # For this dependency of the crate, we need to see if a "target" rule
        # affects it, in which case it is considered arch-specific and we will
        # need to compute its deps/features for all configs.
        dep_is_arch_specific = \
            dep.key in parent_arch_specific.arch_specific_deps

    # Update the PreCrateData of the dependency crate. Any changes in here mean
    # we will need to (re)visit the dependency crate to determine its transitive
    # dependencies. This data is about how to build the dependency crate based
    # on its incoming edges from the parent.
    changed = False
    with build_data_set.per_crate_for_usage(dep_usage,
                                            dep.key) as dep_crate_data:
        # This marks the dependency crate as being used on the given target
        # architectures.
        changed |= dep_crate_data.used_on_archs.add_archset(
            archset_for_new_edges)

        if dep_is_arch_specific and not dep_crate_data.arch_specific:
            dep_crate_data.arch_specific = True
            changed = True

        for f in dep.features:
            with dep_crate_data.features.archset_for_feature(f) as archset:
                changed |= archset.add_archset(archset_for_new_edges)

    if changed:
        new_keys.add(dep.key)

    # Add outgoing edges from the parent crate to the dependency crate. If the
    # parent is the virtual third_party.toml crate, then the crate key will not
    # exist, and we don't need to set up any edges.
    if not parent_crate_key:
        return

    with build_data_set.for_usage(parent_usage) as for_usage:
        assert parent_crate_key in for_usage.all_crates()
        with for_usage.per_crate(parent_crate_key) as parent_data:
            with parent_data.deps[parent_output].dep_data(dep.key) as dep_data:
                # Adds the edge from parent to dependency for all given target
                # architectures.
                dep_data.archset.add_archset(archset_for_new_edges)
    return


def _collect_deps_for_crate(args: argparse.Namespace,
                            cargo_toml_path: str,
                            build_data_set: BuildData,
                            usage: cargo.CrateUsage,
                            is_third_party_toml: bool,
                            crate_key: cargo.CrateKey = None,
                            depth: int = None) -> set[cargo.CrateKey]:
    """Runs `cargo tree` and collects all dependency data for a specific crate.

    Args:
        args: The command line args that were given to the crates.py tool.
        cargo_toml_path: The Cargo.toml file for the crate to collect from.
        build_data_set: The BuildData structure to collect data into.
        usage: Dependencies are different depending how the crate is being used,
          this specifies to look for dependencies for a particular usage.
        is_third_party_toml: True if the crate being collected from is the
          synthetic top level crate in `third_party.toml`.
        crate_key: The CrateKey of the crate at cargo_toml_path. It will be None
          if `is_third_party_toml` is true, since there's no real crate there.
        depth: How deep to look for dependencies with `cargo tree`. Defaults to
          None, which will look through the whole tree.

    Returns:
        A set of crates for which we will need to run this function on. Either
        they have never been seen before, or the data we collected for the
        crate in the past has been diritied.
    """
    assert is_third_party_toml != bool(crate_key)

    # New dependencies, or ones with new features, that need to be visited
    # by this function.
    new_keys: set[cargo.CrateKey] = set()

    cargo_toml = common.load_toml(cargo_toml_path)

    arch_specific = _get_archs_of_interest(
        cargo_toml,
        build_data_set.get_per_crate_data_copy(usage, crate_key)
        if crate_key else None, args.target)

    # The crate can have a few different types of output: normal output like a
    # library, its build script, and its tests. We walk over and construct
    # the dependency graph for each.
    first = True
    for output_type in cargo.CrateBuildOutput:
        if output_type == cargo.CrateBuildOutput.TESTS:
            if not args.with_tests and not is_third_party_toml:
                continue
        for target_arch in arch_specific.archs_to_test():
            if is_third_party_toml:
                # For the root third_party.toml crate, nothing can depend on it,
                # so there's no features (and no crate_key).
                crate_derived_features = []
            else:
                crate_derived_features = _crate_features_on_target_arch(
                    build_data_set.get_per_crate_data_copy(usage, crate_key),
                    target_arch)

            if args.verbose:
                if first:
                    first = False
                    print()
                print("output: {} target: {}".format(output_type, target_arch))
            cargo_tree_output = cargo.run_cargo_tree(cargo_toml_path,
                                                     output_type, target_arch,
                                                     depth,
                                                     crate_derived_features)
            for line in cargo_tree_output:
                tree_dependency = _parse_cargo_tree_dependency_line(
                    args, cargo_toml, is_third_party_toml, line)
                if not tree_dependency:
                    # Some lines of `cargo tree` output may not point at a
                    # dependency.
                    continue
                _add_edges_for_dep_on_target_arch(build_data_set, crate_key,
                                                  arch_specific, usage,
                                                  output_type, tree_dependency,
                                                  target_arch, new_keys)
    return new_keys


def _gen_build_rule(args: argparse.Namespace, build_data_set: BuildData,
                    crate_key: cargo.CrateKey) -> BuildRule:
    """Generate a BuildRule for a single crate from the BuildData structure.

    Args:
        args: The command-line arguments.
        build_data_set: The fully-generated dependency tree data structure.
        crate_key: The crate to generate a BuildRule for, which will be used to
          generate its BUILD file.

    Returns:
        A BuildRule for the crate, which can be used to generate its BUILD file.
    """
    crate_name = crate_key.name
    crate_epoch = crate_key.epoch

    # We need the contents of the Cargo.toml to get a few pieces of data that
    # aren't part of the BuildData structure.
    cargo_toml = common.load_toml(
        common.os_crate_cargo_dir(crate_name,
                                  crate_epoch,
                                  rel_path=["Cargo.toml"]))

    build_rule = BuildRule(crate_name, crate_epoch)

    cargo_toml_has_lib_key = "lib" in cargo_toml
    cargo_toml_has_bin_key = "bin" in cargo_toml

    # Cargo defaults to 2015 for backward compatibility if it's not specified.
    build_rule.edition = cargo_toml["package"].get("edition", "2015")

    # Gather some cargo metadata which we will want to pass to rustc using
    # environment variables in case the crates use 'crate_authors!' or similar.
    build_rule.cargo_pkg_authors = cargo_toml["package"].get("authors")
    build_rule.cargo_pkg_version = cargo_toml["package"].get("version")
    build_rule.cargo_pkg_name = cargo_toml["package"].get("name")
    build_rule.cargo_pkg_description = cargo_toml["package"].get("description")

    # Prefix from the BUILD.gn file to the crate's source files.
    path_prefix = ("/".join(consts.CRATE_INNER_DIR) +
                   "/") if consts.CRATE_INNER_DIR else ""

    # We compute the lib type, but don't store it in BuildRule until we know
    # there is a lib target in the crate.
    lib_type = "rlib"
    if cargo_toml_has_lib_key:
        lib_type = "proc-macro" if cargo_toml["lib"].get("proc-macro",
                                                         False) else "rlib"

    default_lib_root_os_path = common.os_crate_cargo_dir(
        crate_name, crate_epoch, rel_path=["src", "lib.rs"])
    if cargo_toml_has_lib_key and "path" in cargo_toml["lib"]:
        build_rule.lib_root = path_prefix + cargo_toml["lib"]["path"]
    elif os.path.exists(default_lib_root_os_path):
        build_rule.lib_root = path_prefix + "src/lib.rs"

    if build_rule.lib_root:
        build_rule.lib_type = lib_type
        # TODO(danakj): Find all the sources.
        build_rule.lib_sources = [build_rule.lib_root]

    if cargo_toml_has_bin_key:
        for toml_bin in cargo_toml["bin"]:
            name = toml_bin["name"]

            default_bin_root_os_path = common.os_crate_cargo_dir(
                crate_name, crate_epoch, rel_path=["src", "main.rs"])
            named_bin_root_os_path = common.os_crate_cargo_dir(
                crate_name, crate_epoch, rel_path=["src", "bin", name + ".rs"])

            if "path" in toml_bin:
                path = path_prefix + toml_bin["path"]
            elif os.path.exists(named_bin_root_os_path):
                path = path_prefix + "src/bin/{}.rs".format(name)
            else:
                assert os.path.exists(default_bin_root_os_path)
                path = path_prefix + "src/main.rs"

            build_rule.bins += [{
                "name": name,
                "root": path,
                # TODO(danakj): Find all the sources.
                "sources": [path],
            }]

    build_script_root = None
    if "build" in cargo_toml["package"]:
        if cargo_toml["package"]["build"]:  # May be `false`
            build_script_root = path_prefix + cargo_toml["package"]["build"]
    elif os.path.exists(
            common.os_crate_cargo_dir(crate_name,
                                      crate_epoch,
                                      rel_path=["build.rs"])):
        build_script_root = path_prefix + "build.rs"

    if build_script_root:
        build_rule.build_root = build_script_root
        # TODO(danakj): Find all the sources.
        build_rule.build_sources = [build_rule.build_root]
        # The third_party.toml specifies this, as it has to come from a human.
        with build_data_set.for_normal.per_crate(crate_key) as crate_data:
            with build_data_set.for_buildrs.per_crate(
                    crate_key) as for_build_data:
                with build_data_set.for_tests.per_crate(
                        crate_key) as for_tests_data:
                    # We assume that the build.rs has the same outputs no matter
                    # what features are going to be enabled, but if that's not
                    # true, then we need a way to specify in the
                    # third_party.toml which outputs to expect depending on how
                    # the crate's being used. And then these asserts could fail.
                    # Then we would need to store these on the BuildRuleUsage
                    # instead of on the BuildRule.
                    assert crate_data.build_script_outputs == \
                        for_build_data.build_script_outputs
                    assert crate_data.build_script_outputs == \
                        for_tests_data.build_script_outputs
            build_rule.build_script_outputs = \
                list(crate_data.build_script_outputs)

    # Each usage can define its own set of features/deps and will turn into a
    # separate GN target.
    for usage in cargo.CrateUsage:
        with build_data_set.per_crate_for_usage(usage, crate_key) as crate_data:
            if not crate_data.used_on_archs:
                continue

            normal_deps: DepSet = crate_data.deps[cargo.CrateBuildOutput.NORMAL]
            build_deps: DepSet = crate_data.deps[cargo.CrateBuildOutput.BUILDRS]
            if usage == cargo.CrateUsage.FOR_NORMAL:
                dev_deps: DepSet = crate_data.deps[cargo.CrateBuildOutput.TESTS]
            else:
                # We only care to run tests for the featureset used at runtime.
                dev_deps = DepSet()

            build_rule_usage = build_rule.get_usage(usage)
            build_rule_usage.for_first_party = crate_data.for_first_party
            build_rule_usage.used_on_archs = compiler.BuildConditionSet(
                crate_data.used_on_archs)
            build_rule_usage.features = [
                (name, compiler.BuildConditionSet(archset))
                for (name, archset) in crate_data.features.all_archsets()
            ]

            build_rule_usage.deps = [{
                "deppath":
                common.gn_crate_path(k.name, k.epoch) + ":" +
                cargo.CrateBuildOutput.NORMAL.gn_target_name_for_dep(),
                "compile_modes":
                compiler.BuildConditionSet(depdata.archset),
            } for (k, depdata) in normal_deps.all_deps_data()]

            build_rule_usage.build_deps = [{
                "deppath":
                common.gn_crate_path(k.name, k.epoch) + ":" +
                cargo.CrateBuildOutput.BUILDRS.gn_target_name_for_dep(),
                "compile_modes":
                compiler.BuildConditionSet(depdata.archset),
            } for (k, depdata) in build_deps.all_deps_data()]

            build_rule_usage.dev_deps = [{
                "deppath":
                common.gn_crate_path(k.name, k.epoch) + ":" +
                cargo.CrateBuildOutput.TESTS.gn_target_name_for_dep(),
                "compile_modes":
                compiler.BuildConditionSet(depdata.archset),
            } for (k, depdata) in dev_deps.all_deps_data()]

    build_rule.sort_internals()
    if args.verbose:
        print()
        print("build_rule for {}", crate_key)
        pprint(vars(build_rule))
        print("for {normal}: ")
        pprint(vars(build_rule.normal_usage))
        print("for buildrs: ")
        pprint(vars(build_rule.buildrs_usage))
        print("for test: ")
        pprint(vars(build_rule.test_usage))
    return build_rule