// Copyright 2021 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_BROWSER_SITE_INFO_H_
#define CONTENT_BROWSER_SITE_INFO_H_

#include "content/browser/url_info.h"
#include "content/browser/web_exposed_isolation_info.h"
#include "content/common/content_export.h"
#include "content/public/browser/storage_partition_config.h"
#include "url/gurl.h"
#include "url/origin.h"

namespace content {

class BrowserContext;
class IsolationContext;
class StoragePartitionConfig;
struct UrlInfo;

// SiteInfo represents the principal of a SiteInstance. All documents and
// workers within a SiteInstance are considered part of this principal and will
// share a renderer process. Any two documents within the same browsing context
// group (i.e., BrowsingInstance) that are allowed to script each other *must*
// have the same SiteInfo principal, so that they end up in the same renderer
// process.
//
// As a result, SiteInfo is primarily defined in terms of "site URL," which is
// often the scheme plus the eTLD+1 of a URL. This allows same-site URLs to
// always share a process even when document.domain is modified. However, some
// site URLs can be finer grained (e.g., origins) or coarser grained (e.g.,
// file://). See |site_url()| for more considerations.
//
// In the future, we may add more information to SiteInfo for cases where the
// site URL is not sufficient to identify which process a document belongs in.
// For example, origin isolation (https://crbug.com/1067389) will introduce a
// 'keying' bit ('site' or 'origin') to avoid an ambiguity between sites and
// origins, and it will be possible for two SiteInstances with different keying
// values to have the same site URL. It is important that any extra members of
// SiteInfo do not cause two documents that can script each other to end up in
// different SiteInfos and thus different processes.
class CONTENT_EXPORT SiteInfo {
 public:
  // Helper to create a SiteInfo that will be used for an error page.  This is
  // used only when error page isolation is enabled.  Note that when site
  // isolation for guests is enabled, an error page SiteInfo may also be
  // associated with a guest.
  static SiteInfo CreateForErrorPage(
      const StoragePartitionConfig storage_partition_config,
      bool is_guest);

  // Helper to create a SiteInfo for default SiteInstances.  Default
  // SiteInstances are used for non-isolated sites on platforms without strict
  // site isolation, such as on Android.
  static SiteInfo CreateForDefaultSiteInstance(
      BrowserContext* browser_context,
      const StoragePartitionConfig storage_partition_config,
      const WebExposedIsolationInfo& web_exposed_isolation_info);

  // Helper to create a SiteInfo for a <webview> guest.  This helper can be
  // used for a new guest associated with a specific StoragePartitionConfig
  // (prior to navigations).
  static SiteInfo CreateForGuest(
      BrowserContext* browser_context,
      const StoragePartitionConfig& partition_config);

  // This function returns a SiteInfo with the appropriate site_url and
  // process_lock_url computed. This function can only be called on the UI
  // thread because it must be able to compute an effective URL.
  static SiteInfo Create(const IsolationContext& isolation_context,
                         const UrlInfo& url_info);

  // Similar to the function above, but this method can only be called on the
  // IO thread. All fields except for the site_url should be the same as
  // the other method. The site_url field will match the process_lock_url
  // in the object returned by this function. This is because we cannot compute
  // the effective URL from the IO thread.
  //
  // `url_info` MUST contain a StoragePartitionConfig because we can't ask the
  // embedder which StoragePartitionConfig to use from the IO thread.
  //
  // NOTE: Do not use this method unless there is a very clear and good reason
  // to do so. It primarily exists to facilitate the creation of ProcessLocks
  // from any thread. ProcessLocks do not rely on the site_url field so the
  // difference between this method and Create() does not cause problems for
  // that usecase.
  static SiteInfo CreateOnIOThread(const IsolationContext& isolation_context,
                                   const UrlInfo& url_info);

  // Method to make creating SiteInfo objects for tests easier. It is a thin
  // wrapper around Create() that uses UrlInfo::CreateForTesting(),
  // and WebExposedIsolationInfo::CreateNonIsolated() to generate the
  // information that is not provided.
  static SiteInfo CreateForTesting(const IsolationContext& isolation_context,
                                   const GURL& url);

  // Returns the site of a given |origin|.  Unlike Create(), this does
  // not utilize effective URLs, isolated origins, or other special logic.  It
  // only translates an origin into a site (i.e., scheme and eTLD+1) and is
  // used internally by GetSiteForURLInternal().  For making process model
  // decisions, Create() should be used instead.
  static GURL GetSiteForOrigin(const url::Origin& origin);

  // Returns a StoragePartitionConfig for the specified URL.
  // If |is_site_url| is set to true, then |url| MUST be a site URL that
  // was generated by a SiteInfo. Otherwise the URL is interpreted as a
  // user-provided URL or origin.
  //
  // Note: New callers of this method should be discouraged. New code should
  // have access to a SiteInfo object and call GetStoragePartitionConfig() on
  // that. For cases where code just needs the StoragePartition for a user
  // provided URL or origin, it should use
  // BrowserContext::GetStoragePartitionForUrl() instead of directly calling
  // this method.
  static StoragePartitionConfig GetStoragePartitionConfigForUrl(
      BrowserContext* browser_context,
      const GURL& url,
      bool is_site_url);

  // Initializes |storage_partition_config_| with a value appropriate for
  // |browser_context|.
  explicit SiteInfo(BrowserContext* browser_context);
  // The SiteInfo constructor should take in all values needed for comparing two
  // SiteInfos, to help ensure all creation sites are updated accordingly when
  // new values are added. The private function MakeTie() should be updated
  // accordingly.
  SiteInfo(const GURL& site_url,
           const GURL& process_lock_url,
           bool requires_origin_keyed_process,
           bool is_sandboxed,
           const StoragePartitionConfig storage_partition_config,
           const WebExposedIsolationInfo& web_exposed_isolation_info,
           bool is_guest,
           bool does_site_request_dedicated_process_for_coop,
           bool is_jit_disabled,
           bool is_pdf);
  SiteInfo() = delete;
  SiteInfo(const SiteInfo& rhs);
  ~SiteInfo();

  // This function returns a new SiteInfo which is equivalent to the original,
  // except that (1) is_origin_keyed is false, and (2) the remaining SiteInfo
  // state is used to compute a new SiteInfo from a UrlInfo reconstructed from
  // the original SiteInfo, minus any OAC opt-in request.
  SiteInfo GetNonOriginKeyedEquivalentForMetrics(
      const IsolationContext& isolation_context) const;

  // Returns a copy of `this` but with `is_sandboxed_` set to true.
  SiteInfo SandboxedClone() const;

  // Returns the site URL associated with all of the documents and workers in
  // this principal, as described above.
  //
  // NOTE: In most cases, code should be performing checks against the origin
  // returned by |RenderFrameHost::GetLastCommittedOrigin()|. In contrast, the
  // GURL returned by |site_url()| should not be considered authoritative
  // because:
  // - A SiteInstance can host pages from multiple sites if "site per process"
  //   is not enabled and the SiteInstance isn't hosting pages that require
  //   process isolation (e.g. WebUI or extensions).
  // - Even with site per process, the site URL is not an origin: while often
  //   derived from the origin, it only contains the scheme and the eTLD + 1,
  //   i.e. an origin with the host "deeply.nested.subdomain.example.com"
  //   corresponds to a site URL with the host "example.com".
  // - When origin isolation is in use, there may be multiple SiteInstance with
  //   the same site_url() but that differ in other properties.
  const GURL& site_url() const { return site_url_; }

  // Returns the URL which should be used in a SetProcessLock call for this
  // SiteInfo's process.  This is the same as |site_url_| except for cases
  // involving effective URLs, such as hosted apps.  In those cases, this URL is
  // a site URL that is computed without the use of effective URLs.
  //
  // NOTE: This URL is currently set even in cases where this SiteInstance's
  //       process is *not* going to be locked to it.  Callers should be careful
  //       to consider this case when comparing lock URLs;
  //       ShouldLockProcessToSite() may be used to determine whether the
  //       process lock will actually be used.
  //
  // TODO(alexmos): See if we can clean this up and not set |process_lock_url_|
  //                if the SiteInstance's process isn't going to be locked.
  const GURL& process_lock_url() const { return process_lock_url_; }

  // Returns whether this SiteInfo requires an origin-keyed process, such as for
  // an OriginAgentCluster response header. This resolves an ambiguity of
  // whether a process with a lock_url() like "https://foo.example" is allowed
  // to include "https://sub.foo.example" or not. In opt-in isolation, it is
  // possible for example.com to be isolated, and sub.example.com not be
  // isolated. In contrast, if command-line isolation is used to isolate
  // example.com, then sub.example.com is also (automatically) isolated.
  // Also note that opt-in isolated origins will include ports (if non-default)
  // in their site urls.
  bool requires_origin_keyed_process() const {
    return requires_origin_keyed_process_;
  }

  // The following accessor is for the `is_sandboxed` flag, which is true when
  // this SiteInfo is for an origin-restricted-sandboxed iframe.
  bool is_sandboxed() const { return is_sandboxed_; }

  // Returns the web-exposed isolation status of pages hosted by the
  // SiteInstance. The level of isolation which a page opts-into has
  // implications for the set of other pages which can live in this
  // SiteInstance, process allocation decisions, and API exposure in the page's
  // JavaScript context.
  const WebExposedIsolationInfo& web_exposed_isolation_info() const {
    return web_exposed_isolation_info_;
  }

  bool is_guest() const { return is_guest_; }
  bool is_error_page() const;
  bool is_jit_disabled() const { return is_jit_disabled_; }
  bool is_pdf() const { return is_pdf_; }

  // See comments on `does_site_request_dedicated_process_for_coop_` for more
  // details.
  bool does_site_request_dedicated_process_for_coop() const {
    return does_site_request_dedicated_process_for_coop_;
  }

  // Returns true if the site_url() is empty.
  bool is_empty() const { return site_url().possibly_invalid_spec().empty(); }

  SiteInfo& operator=(const SiteInfo& rhs);

  // Determine whether one SiteInfo represents the same security principal as
  // another SiteInfo.  Note that this does not necessarily translate to an
  // equality comparison of all the fields in SiteInfo (see comments in the
  // implementation).
  bool IsSamePrincipalWith(const SiteInfo& other) const;

  // Returns true if all fields in `other` match the corresponding fields in
  // this object.
  bool IsExactMatch(const SiteInfo& other) const;

  // Determines how a ProcessLock based on this SiteInfo compares to a
  // ProcessLock based on the `other` SiteInfo. Note that this doesn't just
  // compare all SiteInfo fields, e.g. it doesn't use site_url_ since that
  // may include effective URLs.
  // Returns -1 if `this` < `other`, 1 if `this` > `other`, 0 otherwise.
  int ProcessLockCompareTo(const SiteInfo& other) const;

  // Note: equality operators are defined in terms of IsSamePrincipalWith().
  bool operator==(const SiteInfo& other) const;
  bool operator!=(const SiteInfo& other) const;

  // Defined to allow this object to act as a key for std::map and std::set.
  // Note that the key is determined based on what distinguishes one security
  // principal from another (see IsSamePrincipalWith) and does not necessarily
  // include all the fields in SiteInfo.
  bool operator<(const SiteInfo& other) const;

  // Returns a string representation of this SiteInfo principal.
  std::string GetDebugString() const;

  // Returns true if pages loaded with this SiteInfo ought to be handled only
  // by a renderer process isolated from other sites. If --site-per-process is
  // used, like it is on desktop platforms, then this is true for all sites. In
  // other site isolation modes, only a subset of sites will require dedicated
  // processes.
  bool RequiresDedicatedProcess(
      const IsolationContext& isolation_context) const;

  // Returns true if a process for this SiteInfo should be locked to a
  // ProcessLock whose is_locked_to_site() method returns true. Returning true
  // here also implies that this SiteInfo requires a dedicated process. However,
  // the converse does not hold: this might still return false for certain
  // special cases where a site specific process lock can't be applied even when
  // this SiteInfo requires a dedicated process (e.g., with
  // --site-per-process). Examples of those cases include <webview> guests,
  // single-process mode, or extensions where a process is currently allowed to
  // be reused for different extensions.  Most of these special cases should
  // eventually be removed, and this function should become equivalent to
  // RequiresDedicatedProcess().
  bool ShouldLockProcessToSite(const IsolationContext& isolation_context) const;

  // Returns whether the process-per-site model is in use (globally or just for
  // the current site), in which case we should ensure there is only one
  // RenderProcessHost per site for the entire browser context.
  bool ShouldUseProcessPerSite(BrowserContext* browser_context) const;

  // Get the StoragePartitionConfig, which describes the StoragePartition this
  // SiteInfo is associated with.  For example, this will correspond to a
  // non-default StoragePartition for <webview> guests.
  const StoragePartitionConfig& storage_partition_config() const {
    return storage_partition_config_;
  }

  // Write a representation of this object into a trace.
  void WriteIntoTrace(perfetto::TracedValue context) const;

 private:
  // Helper that returns a tuple of all the fields that are relevant for
  // comparing one SiteInfo to another, to tell whether they represent the same
  // underlying security principal.   This determines the SiteInfo's key for
  // containers; two SiteInfos that return the same value here will map to the
  // same entry in std::map, etc.
  static auto MakeSecurityPrincipalKey(const SiteInfo& site_info);

  // Helper method containing common logic used by the public
  // Create() and CreateOnIOThread() methods. Most of the parameters simply
  // match the values passed into the caller. `compute_site_url` controls
  // whether the site_url field is computed from an effective URL or simply
  // copied from the `process_lock_url_`. `compute_site_url` is set to false in
  // contexts where it may not be possible to get the effective URL (e.g. on the
  // IO thread).
  static SiteInfo CreateInternal(const IsolationContext& isolation_context,
                                 const UrlInfo& url_info,
                                 bool compute_site_url);

  // Returns the URL to which a process should be locked for the given UrlInfo.
  // This is computed similarly to the site URL but without resolving effective
  // URLs.
  static GURL DetermineProcessLockURL(const IsolationContext& isolation_context,
                                      const UrlInfo& url_info);

  // Returns the site for the given UrlInfo, which includes only the scheme and
  // registered domain.  Returns an empty GURL if the UrlInfo has no host.
  // |should_use_effective_urls| specifies whether to resolve |url| to an
  // effective URL (via ContentBrowserClient::GetEffectiveURL()) before
  // determining the site.
  static GURL GetSiteForURLInternal(const IsolationContext& isolation_context,
                                    const UrlInfo& url,
                                    bool should_use_effective_urls);

  // Helper function for ProcessLockCompareTo(). Returns a std::tie of the
  // SiteInfo elements required for doing a ProcessLock comparison.
  auto MakeProcessLockComparisonKey() const;

  GURL site_url_;

  // The URL to use when locking a process to this SiteInstance's site via
  // SetProcessLock(). This is the same as |site_url_| except for cases
  // involving effective URLs, such as hosted apps.  In those cases, this URL is
  // a site URL that is computed without the use of effective URLs.
  GURL process_lock_url_;

  // Indicates whether this SiteInfo is specific to a single origin and requires
  // an origin-keyed process, rather than including all subdomains of that
  // origin. Only used for OriginAgentCluster header opt-ins. In contrast, the
  // site-level URLs that are typically used in SiteInfo include subdomains, as
  // do command-line isolated origins.
  bool requires_origin_keyed_process_ = false;

  // When true, indicates this SiteInfo is for a origin-restricted-sandboxed
  // iframe.
  bool is_sandboxed_ = false;

  // The StoragePartitionConfig to use when loading content belonging to this
  // SiteInfo.
  StoragePartitionConfig storage_partition_config_;

  // Indicates the web-exposed isolation status of pages hosted by the
  // SiteInstance. The level of isolation which a page opts-into has
  // implications for the set of other pages which can live in this
  // SiteInstance, process allocation decisions, and API exposure in the page's
  // JavaScript context.
  WebExposedIsolationInfo web_exposed_isolation_info_ =
      WebExposedIsolationInfo::CreateNonIsolated();

  // Indicates this SiteInfo is for a <webview> guest.
  bool is_guest_ = false;

  // Indicates that there is a request to require a dedicated process for this
  // SiteInfo due to a hint from the Cross-Origin-Opener-Policy header.
  bool does_site_request_dedicated_process_for_coop_ = false;

  // Indicates that JIT is disabled for this SiteInfo.
  bool is_jit_disabled_ = false;

  // Indicates that this SiteInfo is for PDF content.
  bool is_pdf_ = false;
};

CONTENT_EXPORT std::ostream& operator<<(std::ostream& out,
                                        const SiteInfo& site_info);

}  // namespace content

#endif  // CONTENT_BROWSER_SITE_INFO_H_