summaryrefslogtreecommitdiffstats
path: root/chromium/content/browser/site_info.h
blob: 94673682a77dd270c416060f3c5c564f6807f268 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
// Copyright 2021 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_BROWSER_SITE_INFO_H_
#define CONTENT_BROWSER_SITE_INFO_H_

#include "content/browser/url_info.h"
#include "content/browser/web_exposed_isolation_info.h"
#include "content/common/content_export.h"
#include "content/public/browser/storage_partition_config.h"
#include "url/gurl.h"
#include "url/origin.h"

namespace content {

class BrowserContext;
class IsolationContext;
class StoragePartitionConfig;
struct UrlInfo;

// SiteInfo represents the principal of a SiteInstance. All documents and
// workers within a SiteInstance are considered part of this principal and will
// share a renderer process. Any two documents within the same browsing context
// group (i.e., BrowsingInstance) that are allowed to script each other *must*
// have the same SiteInfo principal, so that they end up in the same renderer
// process.
//
// As a result, SiteInfo is primarily defined in terms of "site URL," which is
// often the scheme plus the eTLD+1 of a URL. This allows same-site URLs to
// always share a process even when document.domain is modified. However, some
// site URLs can be finer grained (e.g., origins) or coarser grained (e.g.,
// file://). See |site_url()| for more considerations.
//
// In the future, we may add more information to SiteInfo for cases where the
// site URL is not sufficient to identify which process a document belongs in.
// For example, origin isolation (https://crbug.com/1067389) will introduce a
// 'keying' bit ('site' or 'origin') to avoid an ambiguity between sites and
// origins, and it will be possible for two SiteInstances with different keying
// values to have the same site URL. It is important that any extra members of
// SiteInfo do not cause two documents that can script each other to end up in
// different SiteInfos and thus different processes.
class CONTENT_EXPORT SiteInfo {
 public:
  // Helper to create a SiteInfo that will be used for an error page.  This is
  // used only when error page isolation is enabled.  Note that when site
  // isolation for guests is enabled, an error page SiteInfo may also be
  // associated with a guest.
  static SiteInfo CreateForErrorPage(
      const StoragePartitionConfig storage_partition_config,
      bool is_guest);

  // Helper to create a SiteInfo for default SiteInstances.  Default
  // SiteInstances are used for non-isolated sites on platforms without strict
  // site isolation, such as on Android.
  static SiteInfo CreateForDefaultSiteInstance(
      BrowserContext* browser_context,
      const StoragePartitionConfig storage_partition_config,
      const WebExposedIsolationInfo& web_exposed_isolation_info);

  // Helper to create a SiteInfo for a <webview> guest.  This helper can be
  // used for a new guest associated with a specific StoragePartitionConfig
  // (prior to navigations).
  static SiteInfo CreateForGuest(
      BrowserContext* browser_context,
      const StoragePartitionConfig& partition_config);

  // This function returns a SiteInfo with the appropriate site_url and
  // process_lock_url computed. This function can only be called on the UI
  // thread because it must be able to compute an effective URL.
  static SiteInfo Create(const IsolationContext& isolation_context,
                         const UrlInfo& url_info);

  // Similar to the function above, but this method can only be called on the
  // IO thread. All fields except for the site_url should be the same as
  // the other method. The site_url field will match the process_lock_url
  // in the object returned by this function. This is because we cannot compute
  // the effective URL from the IO thread.
  //
  // `url_info` MUST contain a StoragePartitionConfig because we can't ask the
  // embedder which StoragePartitionConfig to use from the IO thread.
  //
  // NOTE: Do not use this method unless there is a very clear and good reason
  // to do so. It primarily exists to facilitate the creation of ProcessLocks
  // from any thread. ProcessLocks do not rely on the site_url field so the
  // difference between this method and Create() does not cause problems for
  // that usecase.
  static SiteInfo CreateOnIOThread(const IsolationContext& isolation_context,
                                   const UrlInfo& url_info);

  // Method to make creating SiteInfo objects for tests easier. It is a thin
  // wrapper around Create() that uses UrlInfo::CreateForTesting(),
  // and WebExposedIsolationInfo::CreateNonIsolated() to generate the
  // information that is not provided.
  static SiteInfo CreateForTesting(const IsolationContext& isolation_context,
                                   const GURL& url);

  // Returns the site of a given |origin|.  Unlike Create(), this does
  // not utilize effective URLs, isolated origins, or other special logic.  It
  // only translates an origin into a site (i.e., scheme and eTLD+1) and is
  // used internally by GetSiteForURLInternal().  For making process model
  // decisions, Create() should be used instead.
  static GURL GetSiteForOrigin(const url::Origin& origin);

  // Returns a StoragePartitionConfig for the specified URL.
  // If |is_site_url| is set to true, then |url| MUST be a site URL that
  // was generated by a SiteInfo. Otherwise the URL is interpreted as a
  // user-provided URL or origin.
  //
  // Note: New callers of this method should be discouraged. New code should
  // have access to a SiteInfo object and call GetStoragePartitionConfig() on
  // that. For cases where code just needs the StoragePartition for a user
  // provided URL or origin, it should use
  // BrowserContext::GetStoragePartitionForUrl() instead of directly calling
  // this method.
  static StoragePartitionConfig GetStoragePartitionConfigForUrl(
      BrowserContext* browser_context,
      const GURL& url,
      bool is_site_url);

  // Initializes |storage_partition_config_| with a value appropriate for
  // |browser_context|.
  explicit SiteInfo(BrowserContext* browser_context);
  // The SiteInfo constructor should take in all values needed for comparing two
  // SiteInfos, to help ensure all creation sites are updated accordingly when
  // new values are added. The private function MakeTie() should be updated
  // accordingly.
  SiteInfo(const GURL& site_url,
           const GURL& process_lock_url,
           bool requires_origin_keyed_process,
           bool is_sandboxed,
           const StoragePartitionConfig storage_partition_config,
           const WebExposedIsolationInfo& web_exposed_isolation_info,
           bool is_guest,
           bool does_site_request_dedicated_process_for_coop,
           bool is_jit_disabled,
           bool is_pdf);
  SiteInfo() = delete;
  SiteInfo(const SiteInfo& rhs);
  ~SiteInfo();

  // This function returns a new SiteInfo which is equivalent to the original,
  // except that (1) is_origin_keyed is false, and (2) the remaining SiteInfo
  // state is used to compute a new SiteInfo from a UrlInfo reconstructed from
  // the original SiteInfo, minus any OAC opt-in request.
  SiteInfo GetNonOriginKeyedEquivalentForMetrics(
      const IsolationContext& isolation_context) const;

  // Returns a copy of `this` but with `is_sandboxed_` set to true.
  SiteInfo SandboxedClone() const;

  // Returns the site URL associated with all of the documents and workers in
  // this principal, as described above.
  //
  // NOTE: In most cases, code should be performing checks against the origin
  // returned by |RenderFrameHost::GetLastCommittedOrigin()|. In contrast, the
  // GURL returned by |site_url()| should not be considered authoritative
  // because:
  // - A SiteInstance can host pages from multiple sites if "site per process"
  //   is not enabled and the SiteInstance isn't hosting pages that require
  //   process isolation (e.g. WebUI or extensions).
  // - Even with site per process, the site URL is not an origin: while often
  //   derived from the origin, it only contains the scheme and the eTLD + 1,
  //   i.e. an origin with the host "deeply.nested.subdomain.example.com"
  //   corresponds to a site URL with the host "example.com".
  // - When origin isolation is in use, there may be multiple SiteInstance with
  //   the same site_url() but that differ in other properties.
  const GURL& site_url() const { return site_url_; }

  // Returns the URL which should be used in a SetProcessLock call for this
  // SiteInfo's process.  This is the same as |site_url_| except for cases
  // involving effective URLs, such as hosted apps.  In those cases, this URL is
  // a site URL that is computed without the use of effective URLs.
  //
  // NOTE: This URL is currently set even in cases where this SiteInstance's
  //       process is *not* going to be locked to it.  Callers should be careful
  //       to consider this case when comparing lock URLs;
  //       ShouldLockProcessToSite() may be used to determine whether the
  //       process lock will actually be used.
  //
  // TODO(alexmos): See if we can clean this up and not set |process_lock_url_|
  //                if the SiteInstance's process isn't going to be locked.
  const GURL& process_lock_url() const { return process_lock_url_; }

  // Returns whether this SiteInfo requires an origin-keyed process, such as for
  // an OriginAgentCluster response header. This resolves an ambiguity of
  // whether a process with a lock_url() like "https://foo.example" is allowed
  // to include "https://sub.foo.example" or not. In opt-in isolation, it is
  // possible for example.com to be isolated, and sub.example.com not be
  // isolated. In contrast, if command-line isolation is used to isolate
  // example.com, then sub.example.com is also (automatically) isolated.
  // Also note that opt-in isolated origins will include ports (if non-default)
  // in their site urls.
  bool requires_origin_keyed_process() const {
    return requires_origin_keyed_process_;
  }

  // The following accessor is for the `is_sandboxed` flag, which is true when
  // this SiteInfo is for an origin-restricted-sandboxed iframe.
  bool is_sandboxed() const { return is_sandboxed_; }

  // Returns the web-exposed isolation status of pages hosted by the
  // SiteInstance. The level of isolation which a page opts-into has
  // implications for the set of other pages which can live in this
  // SiteInstance, process allocation decisions, and API exposure in the page's
  // JavaScript context.
  const WebExposedIsolationInfo& web_exposed_isolation_info() const {
    return web_exposed_isolation_info_;
  }

  bool is_guest() const { return is_guest_; }
  bool is_error_page() const;
  bool is_jit_disabled() const { return is_jit_disabled_; }
  bool is_pdf() const { return is_pdf_; }

  // See comments on `does_site_request_dedicated_process_for_coop_` for more
  // details.
  bool does_site_request_dedicated_process_for_coop() const {
    return does_site_request_dedicated_process_for_coop_;
  }

  // Returns true if the site_url() is empty.
  bool is_empty() const { return site_url().possibly_invalid_spec().empty(); }

  SiteInfo& operator=(const SiteInfo& rhs);

  // Determine whether one SiteInfo represents the same security principal as
  // another SiteInfo.  Note that this does not necessarily translate to an
  // equality comparison of all the fields in SiteInfo (see comments in the
  // implementation).
  bool IsSamePrincipalWith(const SiteInfo& other) const;

  // Returns true if all fields in `other` match the corresponding fields in
  // this object.
  bool IsExactMatch(const SiteInfo& other) const;

  // Determines how a ProcessLock based on this SiteInfo compares to a
  // ProcessLock based on the `other` SiteInfo. Note that this doesn't just
  // compare all SiteInfo fields, e.g. it doesn't use site_url_ since that
  // may include effective URLs.
  // Returns -1 if `this` < `other`, 1 if `this` > `other`, 0 otherwise.
  int ProcessLockCompareTo(const SiteInfo& other) const;

  // Note: equality operators are defined in terms of IsSamePrincipalWith().
  bool operator==(const SiteInfo& other) const;
  bool operator!=(const SiteInfo& other) const;

  // Defined to allow this object to act as a key for std::map and std::set.
  // Note that the key is determined based on what distinguishes one security
  // principal from another (see IsSamePrincipalWith) and does not necessarily
  // include all the fields in SiteInfo.
  bool operator<(const SiteInfo& other) const;

  // Returns a string representation of this SiteInfo principal.
  std::string GetDebugString() const;

  // Returns true if pages loaded with this SiteInfo ought to be handled only
  // by a renderer process isolated from other sites. If --site-per-process is
  // used, like it is on desktop platforms, then this is true for all sites. In
  // other site isolation modes, only a subset of sites will require dedicated
  // processes.
  bool RequiresDedicatedProcess(
      const IsolationContext& isolation_context) const;

  // Returns true if a process for this SiteInfo should be locked to a
  // ProcessLock whose is_locked_to_site() method returns true. Returning true
  // here also implies that this SiteInfo requires a dedicated process. However,
  // the converse does not hold: this might still return false for certain
  // special cases where a site specific process lock can't be applied even when
  // this SiteInfo requires a dedicated process (e.g., with
  // --site-per-process). Examples of those cases include <webview> guests,
  // single-process mode, or extensions where a process is currently allowed to
  // be reused for different extensions.  Most of these special cases should
  // eventually be removed, and this function should become equivalent to
  // RequiresDedicatedProcess().
  bool ShouldLockProcessToSite(const IsolationContext& isolation_context) const;

  // Returns whether the process-per-site model is in use (globally or just for
  // the current site), in which case we should ensure there is only one
  // RenderProcessHost per site for the entire browser context.
  bool ShouldUseProcessPerSite(BrowserContext* browser_context) const;

  // Get the StoragePartitionConfig, which describes the StoragePartition this
  // SiteInfo is associated with.  For example, this will correspond to a
  // non-default StoragePartition for <webview> guests.
  const StoragePartitionConfig& storage_partition_config() const {
    return storage_partition_config_;
  }

  // Write a representation of this object into a trace.
  void WriteIntoTrace(perfetto::TracedValue context) const;

 private:
  // Helper that returns a tuple of all the fields that are relevant for
  // comparing one SiteInfo to another, to tell whether they represent the same
  // underlying security principal.   This determines the SiteInfo's key for
  // containers; two SiteInfos that return the same value here will map to the
  // same entry in std::map, etc.
  static auto MakeSecurityPrincipalKey(const SiteInfo& site_info);

  // Helper method containing common logic used by the public
  // Create() and CreateOnIOThread() methods. Most of the parameters simply
  // match the values passed into the caller. `compute_site_url` controls
  // whether the site_url field is computed from an effective URL or simply
  // copied from the `process_lock_url_`. `compute_site_url` is set to false in
  // contexts where it may not be possible to get the effective URL (e.g. on the
  // IO thread).
  static SiteInfo CreateInternal(const IsolationContext& isolation_context,
                                 const UrlInfo& url_info,
                                 bool compute_site_url);

  // Returns the URL to which a process should be locked for the given UrlInfo.
  // This is computed similarly to the site URL but without resolving effective
  // URLs.
  static GURL DetermineProcessLockURL(const IsolationContext& isolation_context,
                                      const UrlInfo& url_info);

  // Returns the site for the given UrlInfo, which includes only the scheme and
  // registered domain.  Returns an empty GURL if the UrlInfo has no host.
  // |should_use_effective_urls| specifies whether to resolve |url| to an
  // effective URL (via ContentBrowserClient::GetEffectiveURL()) before
  // determining the site.
  static GURL GetSiteForURLInternal(const IsolationContext& isolation_context,
                                    const UrlInfo& url,
                                    bool should_use_effective_urls);

  // Helper function for ProcessLockCompareTo(). Returns a std::tie of the
  // SiteInfo elements required for doing a ProcessLock comparison.
  auto MakeProcessLockComparisonKey() const;

  GURL site_url_;

  // The URL to use when locking a process to this SiteInstance's site via
  // SetProcessLock(). This is the same as |site_url_| except for cases
  // involving effective URLs, such as hosted apps.  In those cases, this URL is
  // a site URL that is computed without the use of effective URLs.
  GURL process_lock_url_;

  // Indicates whether this SiteInfo is specific to a single origin and requires
  // an origin-keyed process, rather than including all subdomains of that
  // origin. Only used for OriginAgentCluster header opt-ins. In contrast, the
  // site-level URLs that are typically used in SiteInfo include subdomains, as
  // do command-line isolated origins.
  bool requires_origin_keyed_process_ = false;

  // When true, indicates this SiteInfo is for a origin-restricted-sandboxed
  // iframe.
  bool is_sandboxed_ = false;

  // The StoragePartitionConfig to use when loading content belonging to this
  // SiteInfo.
  StoragePartitionConfig storage_partition_config_;

  // Indicates the web-exposed isolation status of pages hosted by the
  // SiteInstance. The level of isolation which a page opts-into has
  // implications for the set of other pages which can live in this
  // SiteInstance, process allocation decisions, and API exposure in the page's
  // JavaScript context.
  WebExposedIsolationInfo web_exposed_isolation_info_ =
      WebExposedIsolationInfo::CreateNonIsolated();

  // Indicates this SiteInfo is for a <webview> guest.
  bool is_guest_ = false;

  // Indicates that there is a request to require a dedicated process for this
  // SiteInfo due to a hint from the Cross-Origin-Opener-Policy header.
  bool does_site_request_dedicated_process_for_coop_ = false;

  // Indicates that JIT is disabled for this SiteInfo.
  bool is_jit_disabled_ = false;

  // Indicates that this SiteInfo is for PDF content.
  bool is_pdf_ = false;
};

CONTENT_EXPORT std::ostream& operator<<(std::ostream& out,
                                        const SiteInfo& site_info);

}  // namespace content

#endif  // CONTENT_BROWSER_SITE_INFO_H_