Skip to content

Commit 90c9ff2

Browse files
authored
Delete Container Registry images left after Functions deployment (#3439)
* Delete Container Registry images left after Functions deployment * Simplify caching * Improve error handling and report next steps to users * lint fixes * Fix typo
1 parent ca904cb commit 90c9ff2

File tree

5 files changed

+515
-4
lines changed

5 files changed

+515
-4
lines changed

src/api.js

+1
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ var api = {
9999
"FIREBASE_CLOUDLOGGING_URL",
100100
"https://logging.googleapis.com"
101101
),
102+
containerRegistryDomain: utils.envOverride("CONTAINER_REGISTRY_DOMAIN", "gcr.io"),
102103
appDistributionOrigin: utils.envOverride(
103104
"FIREBASE_APP_DISTRIBUTION_URL",
104105
"https://firebaseappdistribution.googleapis.com"
+209
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
// This code is very aggressive about running requests in parallel and does not use
2+
// a task queue, because the quota limits for GCR.io are absurdly high. At the time
3+
// of writing, we can make 50K requests per 10m.
4+
// https://cloud.google.com/container-registry/quotas
5+
6+
import * as clc from "cli-color";
7+
8+
import { containerRegistryDomain } from "../../api";
9+
import { logger } from "../../logger";
10+
import * as docker from "../../gcp/docker";
11+
import * as backend from "./backend";
12+
import * as utils from "../../utils";
13+
14+
// A flattening of container_registry_hosts and
15+
// region_multiregion_map from regionconfig.borg
16+
const SUBDOMAIN_MAPPING: Record<string, string> = {
17+
"us-west2": "us",
18+
"us-west3": "us",
19+
"us-west4": "us",
20+
"us-central1": "us",
21+
"us-central2": "us",
22+
"us-east1": "us",
23+
"us-east4": "us",
24+
"northamerica-northeast1": "us",
25+
"southamerica-east1": "us",
26+
"europe-west1": "eu",
27+
"europe-west2": "eu",
28+
"europe-west3": "eu",
29+
"europe-west5": "eu",
30+
"europe-west6": "eu",
31+
"europe-central2": "eu",
32+
"asia-east1": "asia",
33+
"asia-east2": "asia",
34+
"asia-northeast1": "asia",
35+
"asia-northeast2": "asia",
36+
"asia-northeast3": "asia",
37+
"asia-south1": "asia",
38+
"asia-southeast2": "asia",
39+
"australia-southeast1": "asia",
40+
};
41+
42+
export async function cleanupBuildImages(functions: backend.FunctionSpec[]): Promise<void> {
43+
utils.logBullet(clc.bold.cyan("functions: ") + "cleaning up build files...");
44+
const gcrCleaner = new ContainerRegistryCleaner();
45+
const failedDomains: Set<string> = new Set();
46+
await Promise.all(
47+
functions.map((func) =>
48+
(async () => {
49+
try {
50+
await gcrCleaner.cleanupFunction(func);
51+
} catch (err) {
52+
const path = `${func.project}/${SUBDOMAIN_MAPPING[func.region]}/gcf`;
53+
failedDomains.add(`https://console.cloud.google.com/gcr/images/${path}`);
54+
}
55+
})()
56+
)
57+
);
58+
if (failedDomains.size) {
59+
let message =
60+
"Unhandled error cleaning up build images. This could result in a small monthly bill if not corrected. ";
61+
message +=
62+
"You can attempt to delete these images by redeploying or you can delete them manually at";
63+
if (failedDomains.size == 1) {
64+
message += " " + failedDomains.values().next().value;
65+
} else {
66+
message += [...failedDomains].map((domain) => "\n\t" + domain).join("");
67+
}
68+
utils.logLabeledWarning("functions", message);
69+
}
70+
71+
// TODO: clean up Artifact Registry images as well.
72+
}
73+
74+
export class ContainerRegistryCleaner {
75+
readonly helpers: Record<string, DockerHelper> = {};
76+
77+
private helper(location: string): DockerHelper {
78+
const subdomain = SUBDOMAIN_MAPPING[location] || "us";
79+
if (!this.helpers[subdomain]) {
80+
const origin = `https://${subdomain}.${containerRegistryDomain}`;
81+
this.helpers[subdomain] = new DockerHelper(origin);
82+
}
83+
return this.helpers[subdomain];
84+
}
85+
86+
// GCFv1 has the directory structure:
87+
// gcf/
88+
// +- <region>/
89+
// +- <uuid>
90+
// +- <hash> (tags: <FuncName>_version-<#>)
91+
// +- cache/ (Only present in first deploy of region)
92+
// | +- <hash> (tags: latest)
93+
// +- worker/ (Only present in first deploy of region)
94+
// +- <hash> (tags: latest)
95+
//
96+
// We'll parallel search for the valid <uuid> and their children
97+
// until we find one with the right tag for the function name.
98+
// The underlying Helper's caching should make this expensive for
99+
// the first function and free for the next functions in the same
100+
// region.
101+
async cleanupFunction(func: backend.FunctionSpec): Promise<void> {
102+
const helper = this.helper(func.region);
103+
const uuids = (await helper.ls(`${func.project}/gcf/${func.region}`)).children;
104+
105+
const uuidTags: Record<string, string[]> = {};
106+
const loadUuidTags: Promise<void>[] = [];
107+
for (const uuid of uuids) {
108+
loadUuidTags.push(
109+
(async () => {
110+
const path = `${func.project}/gcf/${func.region}/${uuid}`;
111+
const tags = (await helper.ls(path)).tags;
112+
uuidTags[path] = tags;
113+
})()
114+
);
115+
}
116+
await Promise.all(loadUuidTags);
117+
118+
const extractFunction = /^(.*)_version-\d+$/;
119+
const entry = Object.entries(uuidTags).find(([, tags]) => {
120+
return tags.find((tag) => extractFunction.exec(tag)?.[1] === func.id);
121+
});
122+
123+
if (!entry) {
124+
logger.debug("Could not find image for function", backend.functionName(func));
125+
return;
126+
}
127+
await helper.rm(entry[0]);
128+
}
129+
}
130+
131+
export interface Stat {
132+
children: string[];
133+
digests: docker.Digest[];
134+
tags: docker.Tag[];
135+
}
136+
137+
export class DockerHelper {
138+
readonly client: docker.Client;
139+
readonly cache: Record<string, Stat> = {};
140+
141+
constructor(origin: string) {
142+
this.client = new docker.Client(origin);
143+
}
144+
145+
async ls(path: string): Promise<Stat> {
146+
if (!this.cache[path]) {
147+
const raw = await this.client.listTags(path);
148+
this.cache[path] = {
149+
tags: raw.tags,
150+
digests: Object.keys(raw.manifest),
151+
children: raw.child,
152+
};
153+
}
154+
return this.cache[path];
155+
}
156+
157+
// While we can't guarantee all promises will succeed, we can do our darndest
158+
// to expunge as much as possible before throwing.
159+
async rm(path: string): Promise<void> {
160+
let toThrowLater: any = undefined;
161+
const stat = await this.ls(path);
162+
const recursive = stat.children.map((child) =>
163+
(async () => {
164+
try {
165+
await this.rm(`${path}/${child}`);
166+
stat.children.splice(stat.children.indexOf(child), 1);
167+
} catch (err) {
168+
toThrowLater = err;
169+
}
170+
})()
171+
);
172+
// Unlike a filesystem, we can delete a "directory" while its children are still being
173+
// deleted. Run these in parallel to improve performance and just wait for the result
174+
// before the function's end.
175+
176+
// An image cannot be deleted until its tags have been removed. Do this in two phases.
177+
const deleteTags = stat.tags.map((tag) =>
178+
(async () => {
179+
try {
180+
await this.client.deleteTag(path, tag);
181+
stat.tags.splice(stat.tags.indexOf(tag), 1);
182+
} catch (err) {
183+
logger.debug("Got error trying to remove docker tag:", err);
184+
toThrowLater = err;
185+
}
186+
})()
187+
);
188+
await Promise.all(deleteTags);
189+
190+
const deleteImages = stat.digests.map((digest) =>
191+
(async () => {
192+
try {
193+
await this.client.deleteImage(path, digest);
194+
stat.digests.splice(stat.digests.indexOf(digest), 1);
195+
} catch (err) {
196+
logger.debug("Got error trying to remove docker image:", err);
197+
toThrowLater = err;
198+
}
199+
})()
200+
);
201+
await Promise.all(deleteImages);
202+
203+
await Promise.all(recursive);
204+
205+
if (toThrowLater) {
206+
throw toThrowLater;
207+
}
208+
}
209+
}

src/deploy/functions/release.ts

+6-4
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@ import { getAppEngineLocation } from "../../functionsConfig";
77
import { promptForFunctionDeletion } from "./prompts";
88
import { DeploymentTimer } from "./deploymentTimer";
99
import { ErrorHandler } from "./errorHandler";
10-
import * as utils from "../../utils";
10+
import { Options } from "../../options";
11+
import * as args from "./args";
12+
import * as backend from "./backend";
13+
import * as containerCleaner from "./containerCleaner";
1114
import * as helper from "./functionsDeployHelper";
1215
import * as tasks from "./tasks";
13-
import * as backend from "./backend";
14-
import * as args from "./args";
15-
import { Options } from "../../options";
16+
import * as utils from "../../utils";
1617

1718
export async function release(context: args.Context, options: Options, payload: args.Payload) {
1819
if (!options.config.has("functions")) {
@@ -133,6 +134,7 @@ export async function release(context: args.Context, options: Options, payload:
133134
);
134135
}
135136
helper.logAndTrackDeployStats(cloudFunctionsQueue, errorHandler);
137+
await containerCleaner.cleanupBuildImages(payload.functions!.backend.cloudFunctions);
136138
await helper.printTriggerUrls(context);
137139
errorHandler.printWarnings();
138140
errorHandler.printErrors();

src/gcp/docker.ts

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// Note: unlike Google APIs, the documentation for the GCR API is
2+
// actually the Docker REST API. This can be found at
3+
// https://docs.docker.com/registry/spec/api/
4+
// This API is _very_ complex in its entirety and is very subtle (e.g. tags and digests
5+
// are both strings and can both be put in the same route to get completely different
6+
// response document types).
7+
// This file will only implement a minimal subset as needed.
8+
import { FirebaseError } from "../error";
9+
import * as api from "../apiv2";
10+
11+
// A Digest is a string in the format <algorithm>:<hex>. For example:
12+
// sha256:146d8c9dff0344fb01417ef28673ed196e38215f3c94837ae733d3b064ba439e
13+
export type Digest = string;
14+
export type Tag = string;
15+
16+
export interface Tags {
17+
name: string;
18+
tags: string[];
19+
20+
// These fields are not documented in the Docker API but are
21+
// present in the GCR API.
22+
manifest: Record<Digest, ImageInfo>;
23+
child: string[];
24+
}
25+
26+
export interface ImageInfo {
27+
// times are string milliseconds
28+
timeCreatedMs: string;
29+
timeUploadedMs: string;
30+
tag: string[];
31+
mediaType: string;
32+
imageSizeBytes: string;
33+
layerId: string;
34+
}
35+
36+
interface ErrorsResponse {
37+
errors?: {
38+
code: string;
39+
message: string;
40+
details: unknown;
41+
}[];
42+
}
43+
44+
function isErrors(response: unknown): response is ErrorsResponse {
45+
return Object.prototype.hasOwnProperty.call(response, "errors");
46+
}
47+
48+
const API_VERSION = "v2";
49+
50+
export class Client {
51+
readonly client: api.Client;
52+
53+
constructor(origin: string) {
54+
this.client = new api.Client({
55+
apiVersion: API_VERSION,
56+
auth: true,
57+
urlPrefix: origin,
58+
});
59+
}
60+
61+
async listTags(path: string): Promise<Tags> {
62+
const response = await this.client.get<Tags | ErrorsResponse>(`${path}/tags/list`);
63+
if (isErrors(response.body)) {
64+
throw new FirebaseError(`Failed to list GCR tags at ${path}`, {
65+
children: response.body.errors,
66+
});
67+
}
68+
return response.body;
69+
}
70+
71+
async deleteTag(path: string, tag: Tag): Promise<void> {
72+
const response = await this.client.delete<ErrorsResponse>(`${path}/manifests/${tag}`);
73+
if (response.body.errors?.length != 0) {
74+
throw new FirebaseError(`Failed to delete tag ${tag} at path ${path}`, {
75+
children: response.body.errors,
76+
});
77+
}
78+
}
79+
80+
async deleteImage(path: string, digest: Digest): Promise<void> {
81+
const response = await this.client.delete<ErrorsResponse>(`${path}/manifests/${digest}`);
82+
if (response.body.errors?.length != 0) {
83+
throw new FirebaseError(`Failed to delete image ${digest} at path ${path}`, {
84+
children: response.body.errors,
85+
});
86+
}
87+
}
88+
}

0 commit comments

Comments
 (0)