// Copyright 2021 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef CONTENT_BROWSER_SITE_INFO_H_ #define CONTENT_BROWSER_SITE_INFO_H_ #include "content/browser/url_info.h" #include "content/browser/web_exposed_isolation_info.h" #include "content/common/content_export.h" #include "content/public/browser/storage_partition_config.h" #include "url/gurl.h" #include "url/origin.h" namespace content { class BrowserContext; class IsolationContext; class StoragePartitionConfig; struct UrlInfo; // SiteInfo represents the principal of a SiteInstance. All documents and // workers within a SiteInstance are considered part of this principal and will // share a renderer process. Any two documents within the same browsing context // group (i.e., BrowsingInstance) that are allowed to script each other *must* // have the same SiteInfo principal, so that they end up in the same renderer // process. // // As a result, SiteInfo is primarily defined in terms of "site URL," which is // often the scheme plus the eTLD+1 of a URL. This allows same-site URLs to // always share a process even when document.domain is modified. However, some // site URLs can be finer grained (e.g., origins) or coarser grained (e.g., // file://). See |site_url()| for more considerations. // // In the future, we may add more information to SiteInfo for cases where the // site URL is not sufficient to identify which process a document belongs in. // For example, origin isolation (https://crbug.com/1067389) will introduce a // 'keying' bit ('site' or 'origin') to avoid an ambiguity between sites and // origins, and it will be possible for two SiteInstances with different keying // values to have the same site URL. It is important that any extra members of // SiteInfo do not cause two documents that can script each other to end up in // different SiteInfos and thus different processes. class CONTENT_EXPORT SiteInfo { public: // Helper to create a SiteInfo that will be used for an error page. This is // used only when error page isolation is enabled. Note that when site // isolation for guests is enabled, an error page SiteInfo may also be // associated with a guest. static SiteInfo CreateForErrorPage( const StoragePartitionConfig storage_partition_config, bool is_guest); // Helper to create a SiteInfo for default SiteInstances. Default // SiteInstances are used for non-isolated sites on platforms without strict // site isolation, such as on Android. static SiteInfo CreateForDefaultSiteInstance( BrowserContext* browser_context, const StoragePartitionConfig storage_partition_config, const WebExposedIsolationInfo& web_exposed_isolation_info); // Helper to create a SiteInfo for a guest. This helper can be // used for a new guest associated with a specific StoragePartitionConfig // (prior to navigations). static SiteInfo CreateForGuest( BrowserContext* browser_context, const StoragePartitionConfig& partition_config); // This function returns a SiteInfo with the appropriate site_url and // process_lock_url computed. This function can only be called on the UI // thread because it must be able to compute an effective URL. static SiteInfo Create(const IsolationContext& isolation_context, const UrlInfo& url_info); // Similar to the function above, but this method can only be called on the // IO thread. All fields except for the site_url should be the same as // the other method. The site_url field will match the process_lock_url // in the object returned by this function. This is because we cannot compute // the effective URL from the IO thread. // // `url_info` MUST contain a StoragePartitionConfig because we can't ask the // embedder which StoragePartitionConfig to use from the IO thread. // // NOTE: Do not use this method unless there is a very clear and good reason // to do so. It primarily exists to facilitate the creation of ProcessLocks // from any thread. ProcessLocks do not rely on the site_url field so the // difference between this method and Create() does not cause problems for // that usecase. static SiteInfo CreateOnIOThread(const IsolationContext& isolation_context, const UrlInfo& url_info); // Method to make creating SiteInfo objects for tests easier. It is a thin // wrapper around Create() that uses UrlInfo::CreateForTesting(), // and WebExposedIsolationInfo::CreateNonIsolated() to generate the // information that is not provided. static SiteInfo CreateForTesting(const IsolationContext& isolation_context, const GURL& url); // Returns the site of a given |origin|. Unlike Create(), this does // not utilize effective URLs, isolated origins, or other special logic. It // only translates an origin into a site (i.e., scheme and eTLD+1) and is // used internally by GetSiteForURLInternal(). For making process model // decisions, Create() should be used instead. static GURL GetSiteForOrigin(const url::Origin& origin); // Returns a StoragePartitionConfig for the specified URL. // If |is_site_url| is set to true, then |url| MUST be a site URL that // was generated by a SiteInfo. Otherwise the URL is interpreted as a // user-provided URL or origin. // // Note: New callers of this method should be discouraged. New code should // have access to a SiteInfo object and call GetStoragePartitionConfig() on // that. For cases where code just needs the StoragePartition for a user // provided URL or origin, it should use // BrowserContext::GetStoragePartitionForUrl() instead of directly calling // this method. static StoragePartitionConfig GetStoragePartitionConfigForUrl( BrowserContext* browser_context, const GURL& url, bool is_site_url); // Initializes |storage_partition_config_| with a value appropriate for // |browser_context|. explicit SiteInfo(BrowserContext* browser_context); // The SiteInfo constructor should take in all values needed for comparing two // SiteInfos, to help ensure all creation sites are updated accordingly when // new values are added. The private function MakeTie() should be updated // accordingly. SiteInfo(const GURL& site_url, const GURL& process_lock_url, bool requires_origin_keyed_process, bool is_sandboxed, const StoragePartitionConfig storage_partition_config, const WebExposedIsolationInfo& web_exposed_isolation_info, bool is_guest, bool does_site_request_dedicated_process_for_coop, bool is_jit_disabled, bool is_pdf); SiteInfo() = delete; SiteInfo(const SiteInfo& rhs); ~SiteInfo(); // This function returns a new SiteInfo which is equivalent to the original, // except that (1) is_origin_keyed is false, and (2) the remaining SiteInfo // state is used to compute a new SiteInfo from a UrlInfo reconstructed from // the original SiteInfo, minus any OAC opt-in request. SiteInfo GetNonOriginKeyedEquivalentForMetrics( const IsolationContext& isolation_context) const; // Returns a copy of `this` but with `is_sandboxed_` set to true. SiteInfo SandboxedClone() const; // Returns the site URL associated with all of the documents and workers in // this principal, as described above. // // NOTE: In most cases, code should be performing checks against the origin // returned by |RenderFrameHost::GetLastCommittedOrigin()|. In contrast, the // GURL returned by |site_url()| should not be considered authoritative // because: // - A SiteInstance can host pages from multiple sites if "site per process" // is not enabled and the SiteInstance isn't hosting pages that require // process isolation (e.g. WebUI or extensions). // - Even with site per process, the site URL is not an origin: while often // derived from the origin, it only contains the scheme and the eTLD + 1, // i.e. an origin with the host "deeply.nested.subdomain.example.com" // corresponds to a site URL with the host "example.com". // - When origin isolation is in use, there may be multiple SiteInstance with // the same site_url() but that differ in other properties. const GURL& site_url() const { return site_url_; } // Returns the URL which should be used in a SetProcessLock call for this // SiteInfo's process. This is the same as |site_url_| except for cases // involving effective URLs, such as hosted apps. In those cases, this URL is // a site URL that is computed without the use of effective URLs. // // NOTE: This URL is currently set even in cases where this SiteInstance's // process is *not* going to be locked to it. Callers should be careful // to consider this case when comparing lock URLs; // ShouldLockProcessToSite() may be used to determine whether the // process lock will actually be used. // // TODO(alexmos): See if we can clean this up and not set |process_lock_url_| // if the SiteInstance's process isn't going to be locked. const GURL& process_lock_url() const { return process_lock_url_; } // Returns whether this SiteInfo requires an origin-keyed process, such as for // an OriginAgentCluster response header. This resolves an ambiguity of // whether a process with a lock_url() like "https://foo.example" is allowed // to include "https://sub.foo.example" or not. In opt-in isolation, it is // possible for example.com to be isolated, and sub.example.com not be // isolated. In contrast, if command-line isolation is used to isolate // example.com, then sub.example.com is also (automatically) isolated. // Also note that opt-in isolated origins will include ports (if non-default) // in their site urls. bool requires_origin_keyed_process() const { return requires_origin_keyed_process_; } // The following accessor is for the `is_sandboxed` flag, which is true when // this SiteInfo is for an origin-restricted-sandboxed iframe. bool is_sandboxed() const { return is_sandboxed_; } // Returns the web-exposed isolation status of pages hosted by the // SiteInstance. The level of isolation which a page opts-into has // implications for the set of other pages which can live in this // SiteInstance, process allocation decisions, and API exposure in the page's // JavaScript context. const WebExposedIsolationInfo& web_exposed_isolation_info() const { return web_exposed_isolation_info_; } bool is_guest() const { return is_guest_; } bool is_error_page() const; bool is_jit_disabled() const { return is_jit_disabled_; } bool is_pdf() const { return is_pdf_; } // See comments on `does_site_request_dedicated_process_for_coop_` for more // details. bool does_site_request_dedicated_process_for_coop() const { return does_site_request_dedicated_process_for_coop_; } // Returns true if the site_url() is empty. bool is_empty() const { return site_url().possibly_invalid_spec().empty(); } SiteInfo& operator=(const SiteInfo& rhs); // Determine whether one SiteInfo represents the same security principal as // another SiteInfo. Note that this does not necessarily translate to an // equality comparison of all the fields in SiteInfo (see comments in the // implementation). bool IsSamePrincipalWith(const SiteInfo& other) const; // Returns true if all fields in `other` match the corresponding fields in // this object. bool IsExactMatch(const SiteInfo& other) const; // Determines how a ProcessLock based on this SiteInfo compares to a // ProcessLock based on the `other` SiteInfo. Note that this doesn't just // compare all SiteInfo fields, e.g. it doesn't use site_url_ since that // may include effective URLs. // Returns -1 if `this` < `other`, 1 if `this` > `other`, 0 otherwise. int ProcessLockCompareTo(const SiteInfo& other) const; // Note: equality operators are defined in terms of IsSamePrincipalWith(). bool operator==(const SiteInfo& other) const; bool operator!=(const SiteInfo& other) const; // Defined to allow this object to act as a key for std::map and std::set. // Note that the key is determined based on what distinguishes one security // principal from another (see IsSamePrincipalWith) and does not necessarily // include all the fields in SiteInfo. bool operator<(const SiteInfo& other) const; // Returns a string representation of this SiteInfo principal. std::string GetDebugString() const; // Returns true if pages loaded with this SiteInfo ought to be handled only // by a renderer process isolated from other sites. If --site-per-process is // used, like it is on desktop platforms, then this is true for all sites. In // other site isolation modes, only a subset of sites will require dedicated // processes. bool RequiresDedicatedProcess( const IsolationContext& isolation_context) const; // Returns true if a process for this SiteInfo should be locked to a // ProcessLock whose is_locked_to_site() method returns true. Returning true // here also implies that this SiteInfo requires a dedicated process. However, // the converse does not hold: this might still return false for certain // special cases where a site specific process lock can't be applied even when // this SiteInfo requires a dedicated process (e.g., with // --site-per-process). Examples of those cases include guests, // single-process mode, or extensions where a process is currently allowed to // be reused for different extensions. Most of these special cases should // eventually be removed, and this function should become equivalent to // RequiresDedicatedProcess(). bool ShouldLockProcessToSite(const IsolationContext& isolation_context) const; // Returns whether the process-per-site model is in use (globally or just for // the current site), in which case we should ensure there is only one // RenderProcessHost per site for the entire browser context. bool ShouldUseProcessPerSite(BrowserContext* browser_context) const; // Get the StoragePartitionConfig, which describes the StoragePartition this // SiteInfo is associated with. For example, this will correspond to a // non-default StoragePartition for guests. const StoragePartitionConfig& storage_partition_config() const { return storage_partition_config_; } // Write a representation of this object into a trace. void WriteIntoTrace(perfetto::TracedValue context) const; private: // Helper that returns a tuple of all the fields that are relevant for // comparing one SiteInfo to another, to tell whether they represent the same // underlying security principal. This determines the SiteInfo's key for // containers; two SiteInfos that return the same value here will map to the // same entry in std::map, etc. static auto MakeSecurityPrincipalKey(const SiteInfo& site_info); // Helper method containing common logic used by the public // Create() and CreateOnIOThread() methods. Most of the parameters simply // match the values passed into the caller. `compute_site_url` controls // whether the site_url field is computed from an effective URL or simply // copied from the `process_lock_url_`. `compute_site_url` is set to false in // contexts where it may not be possible to get the effective URL (e.g. on the // IO thread). static SiteInfo CreateInternal(const IsolationContext& isolation_context, const UrlInfo& url_info, bool compute_site_url); // Returns the URL to which a process should be locked for the given UrlInfo. // This is computed similarly to the site URL but without resolving effective // URLs. static GURL DetermineProcessLockURL(const IsolationContext& isolation_context, const UrlInfo& url_info); // Returns the site for the given UrlInfo, which includes only the scheme and // registered domain. Returns an empty GURL if the UrlInfo has no host. // |should_use_effective_urls| specifies whether to resolve |url| to an // effective URL (via ContentBrowserClient::GetEffectiveURL()) before // determining the site. static GURL GetSiteForURLInternal(const IsolationContext& isolation_context, const UrlInfo& url, bool should_use_effective_urls); // Helper function for ProcessLockCompareTo(). Returns a std::tie of the // SiteInfo elements required for doing a ProcessLock comparison. auto MakeProcessLockComparisonKey() const; GURL site_url_; // The URL to use when locking a process to this SiteInstance's site via // SetProcessLock(). This is the same as |site_url_| except for cases // involving effective URLs, such as hosted apps. In those cases, this URL is // a site URL that is computed without the use of effective URLs. GURL process_lock_url_; // Indicates whether this SiteInfo is specific to a single origin and requires // an origin-keyed process, rather than including all subdomains of that // origin. Only used for OriginAgentCluster header opt-ins. In contrast, the // site-level URLs that are typically used in SiteInfo include subdomains, as // do command-line isolated origins. bool requires_origin_keyed_process_ = false; // When true, indicates this SiteInfo is for a origin-restricted-sandboxed // iframe. bool is_sandboxed_ = false; // The StoragePartitionConfig to use when loading content belonging to this // SiteInfo. StoragePartitionConfig storage_partition_config_; // Indicates the web-exposed isolation status of pages hosted by the // SiteInstance. The level of isolation which a page opts-into has // implications for the set of other pages which can live in this // SiteInstance, process allocation decisions, and API exposure in the page's // JavaScript context. WebExposedIsolationInfo web_exposed_isolation_info_ = WebExposedIsolationInfo::CreateNonIsolated(); // Indicates this SiteInfo is for a guest. bool is_guest_ = false; // Indicates that there is a request to require a dedicated process for this // SiteInfo due to a hint from the Cross-Origin-Opener-Policy header. bool does_site_request_dedicated_process_for_coop_ = false; // Indicates that JIT is disabled for this SiteInfo. bool is_jit_disabled_ = false; // Indicates that this SiteInfo is for PDF content. bool is_pdf_ = false; }; CONTENT_EXPORT std::ostream& operator<<(std::ostream& out, const SiteInfo& site_info); } // namespace content #endif // CONTENT_BROWSER_SITE_INFO_H_