Avi Drissman | 4e1b7bc3 | 2022-09-15 14:03:50 | [diff] [blame] | 1 | // Copyright 2022 The Chromium Authors |
Sreeja Kamishetty | 530e8c2 | 2022-06-11 02:20:16 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
Sreeja Kamishetty | 8ef652b | 2022-06-22 22:43:17 | [diff] [blame] | 5 | #ifndef CONTENT_BROWSER_PRELOADING_PRELOADING_PREDICTION_H_ |
| 6 | #define CONTENT_BROWSER_PRELOADING_PRELOADING_PREDICTION_H_ |
Sreeja Kamishetty | 530e8c2 | 2022-06-11 02:20:16 | [diff] [blame] | 7 | |
Arthur Sonzogni | c686e8f | 2024-01-11 08:36:37 | [diff] [blame] | 8 | #include <optional> |
Kevin McNee | cc19871 | 2024-03-20 16:00:31 | [diff] [blame] | 9 | #include <string_view> |
Sreeja Kamishetty | 530e8c2 | 2022-06-11 02:20:16 | [diff] [blame] | 10 | |
William Liu | 60e005f | 2023-01-18 16:17:22 | [diff] [blame] | 11 | #include "base/timer/elapsed_timer.h" |
Kevin McNee | 842eb0a | 2024-04-11 20:14:16 | [diff] [blame] | 12 | #include "content/browser/preloading/preloading_confidence.h" |
Arthur Sonzogni | c686e8f | 2024-01-11 08:36:37 | [diff] [blame] | 13 | #include "content/public/browser/preloading_data.h" |
Sreeja Kamishetty | 530e8c2 | 2022-06-11 02:20:16 | [diff] [blame] | 14 | #include "services/metrics/public/cpp/ukm_source_id.h" |
Sreeja Kamishetty | 530e8c2 | 2022-06-11 02:20:16 | [diff] [blame] | 15 | #include "url/gurl.h" |
| 16 | |
| 17 | namespace content { |
| 18 | |
| 19 | // PreloadingPrediction keeps track of every preloading prediction associated |
| 20 | // with various predictors as defined in content/public/preloading.h |
| 21 | // (please see for more details); whether the prediction is accurate or not; |
| 22 | // whether the prediction is confident enough or not. |
| 23 | class PreloadingPrediction { |
| 24 | public: |
| 25 | ~PreloadingPrediction(); |
| 26 | |
| 27 | // Disallow copy and assign. |
| 28 | PreloadingPrediction(const PreloadingPrediction& other) = delete; |
| 29 | PreloadingPrediction& operator=(const PreloadingPrediction& other) = delete; |
Kevin McNee | bf9af61 | 2024-05-02 23:22:39 | [diff] [blame] | 30 | PreloadingPrediction(PreloadingPrediction&&); |
| 31 | PreloadingPrediction& operator=(PreloadingPrediction&&); |
Sreeja Kamishetty | 530e8c2 | 2022-06-11 02:20:16 | [diff] [blame] | 32 | |
| 33 | // Records both UKMs Preloading_Prediction and |
| 34 | // Preloading_Prediction_PreviousPrimaryPage. Metrics for both these are same. |
| 35 | // Only difference is that the Preloading_Prediction_PreviousPrimaryPage UKM |
| 36 | // is associated with the WebContents primary page that triggered the |
| 37 | // preloading prediction. This is done to easily analyze the impact of the |
| 38 | // preloading prediction on the primary visible page. |
Kevin McNee | 8efb8f7 | 2024-05-27 19:58:24 | [diff] [blame] | 39 | void RecordPreloadingPredictionUKMs( |
| 40 | ukm::SourceId navigated_page_source_id, |
| 41 | std::optional<double> sampling_likelihood); |
Sreeja Kamishetty | ac12140e | 2022-07-14 22:16:51 | [diff] [blame] | 42 | |
| 43 | // Sets `is_accurate_prediction_` to true if `navigated_url` matches the URL |
William Liu | 60e005f | 2023-01-18 16:17:22 | [diff] [blame] | 44 | // predicate. It also records `time_to_next_navigation_`. |
Sreeja Kamishetty | ac12140e | 2022-07-14 22:16:51 | [diff] [blame] | 45 | void SetIsAccuratePrediction(const GURL& navigated_url); |
Sreeja Kamishetty | 530e8c2 | 2022-06-11 02:20:16 | [diff] [blame] | 46 | |
Iman Saboori | 6a245ec | 2023-02-22 20:03:20 | [diff] [blame] | 47 | bool IsAccuratePrediction() const { return is_accurate_prediction_; } |
| 48 | |
Takashi Toyoshima | f3cd15d7 | 2023-06-16 09:27:33 | [diff] [blame] | 49 | PreloadingPrediction( |
Sreeja Kamishetty | 530e8c2 | 2022-06-11 02:20:16 | [diff] [blame] | 50 | PreloadingPredictor predictor, |
Kevin McNee | 842eb0a | 2024-04-11 20:14:16 | [diff] [blame] | 51 | PreloadingConfidence confidence, |
Sreeja Kamishetty | 530e8c2 | 2022-06-11 02:20:16 | [diff] [blame] | 52 | ukm::SourceId triggered_primary_page_source_id, |
| 53 | base::RepeatingCallback<bool(const GURL&)> url_match_predicate); |
| 54 | |
William Liu | 238a9e5 | 2023-01-23 20:32:40 | [diff] [blame] | 55 | // Called by the `PreloadingDataImpl` that owns this prediction, to check the |
| 56 | // validity of `predictor_type_`. |
| 57 | PreloadingPredictor predictor_type() const { return predictor_type_; } |
| 58 | |
Sreeja Kamishetty | 530e8c2 | 2022-06-11 02:20:16 | [diff] [blame] | 59 | private: |
| 60 | // Preloading predictor of this preloading prediction. |
Kevin McNee | bf9af61 | 2024-05-02 23:22:39 | [diff] [blame] | 61 | PreloadingPredictor predictor_type_; |
Sreeja Kamishetty | 530e8c2 | 2022-06-11 02:20:16 | [diff] [blame] | 62 | |
| 63 | // Holds the triggered primary page of preloading operation ukm::SourceId. |
Kevin McNee | bf9af61 | 2024-05-02 23:22:39 | [diff] [blame] | 64 | ukm::SourceId triggered_primary_page_source_id_; |
Sreeja Kamishetty | 530e8c2 | 2022-06-11 02:20:16 | [diff] [blame] | 65 | |
| 66 | // Triggers can specify their own predicate to judge whether two URLs are |
| 67 | // considered as pointing to the same destination as this varies for different |
| 68 | // predictors. |
Kevin McNee | bf9af61 | 2024-05-02 23:22:39 | [diff] [blame] | 69 | PreloadingURLMatchCallback url_match_predicate_; |
| 70 | |
| 71 | // Confidence percentage of predictor's preloading prediction. This value |
| 72 | // should be between 0 - 100. |
| 73 | PreloadingConfidence confidence_; |
Sreeja Kamishetty | ac12140e | 2022-07-14 22:16:51 | [diff] [blame] | 74 | |
| 75 | // Set to true when preloading prediction was correct i.e., when the |
| 76 | // navigation happens to the same predicted URL. |
| 77 | bool is_accurate_prediction_ = false; |
William Liu | 60e005f | 2023-01-18 16:17:22 | [diff] [blame] | 78 | |
| 79 | // Records when the preloading prediction was first recorded. |
Kevin McNee | bf9af61 | 2024-05-02 23:22:39 | [diff] [blame] | 80 | base::ElapsedTimer elapsed_timer_; |
William Liu | 60e005f | 2023-01-18 16:17:22 | [diff] [blame] | 81 | |
| 82 | // The time between the creation of the prediction and the start of the next |
| 83 | // navigation, whether accurate or not. The latency is reported as standard |
| 84 | // buckets, of 1.15 spacing. |
Arthur Sonzogni | c686e8f | 2024-01-11 08:36:37 | [diff] [blame] | 85 | std::optional<base::TimeDelta> time_to_next_navigation_; |
Sreeja Kamishetty | 530e8c2 | 2022-06-11 02:20:16 | [diff] [blame] | 86 | }; |
| 87 | |
Iman Saboori | 31f0949 | 2023-06-26 20:15:20 | [diff] [blame] | 88 | // The output of many predictors is a logit/probability score. To use this score |
| 89 | // for binary classification, we compare it to a threshold. If the score is |
| 90 | // above the threshold, we classify the instance as positive; otherwise, we |
| 91 | // classify it as negative. Threshold choice affects classifier precision and |
| 92 | // recall. There is a trade-off between precision and recall. If we set the |
| 93 | // threshold too low, we will have high precision but low recall. If we set the |
| 94 | // threshold too high, we will have high recall but low precision. To choose the |
| 95 | // best threshold, we can use ROC curves, precision-recall curves, or |
| 96 | // logit-precision and logit-recall curves. `ExperimentalPreloadingPrediction` |
| 97 | // helps us collect the UMA data required to achieve this. |
| 98 | class ExperimentalPreloadingPrediction { |
| 99 | public: |
| 100 | ExperimentalPreloadingPrediction() = delete; |
| 101 | ExperimentalPreloadingPrediction( |
Kevin McNee | cc19871 | 2024-03-20 16:00:31 | [diff] [blame] | 102 | std::string_view name, |
Iman Saboori | 31f0949 | 2023-06-26 20:15:20 | [diff] [blame] | 103 | PreloadingURLMatchCallback url_match_predicate, |
| 104 | float score, |
| 105 | float min_score, |
| 106 | float max_score, |
| 107 | size_t buckets); |
| 108 | ~ExperimentalPreloadingPrediction(); |
| 109 | |
Kevin McNee | bf9af61 | 2024-05-02 23:22:39 | [diff] [blame] | 110 | ExperimentalPreloadingPrediction( |
| 111 | const ExperimentalPreloadingPrediction& other) = delete; |
| 112 | ExperimentalPreloadingPrediction& operator=( |
| 113 | const ExperimentalPreloadingPrediction& other) = delete; |
| 114 | ExperimentalPreloadingPrediction(ExperimentalPreloadingPrediction&&); |
| 115 | ExperimentalPreloadingPrediction& operator=( |
| 116 | ExperimentalPreloadingPrediction&&); |
| 117 | |
Kevin McNee | cc19871 | 2024-03-20 16:00:31 | [diff] [blame] | 118 | std::string_view PredictorName() const { return name_; } |
Iman Saboori | 31f0949 | 2023-06-26 20:15:20 | [diff] [blame] | 119 | bool IsAccuratePrediction() const { return is_accurate_prediction_; } |
Iman Saboori | 31f0949 | 2023-06-26 20:15:20 | [diff] [blame] | 120 | |
| 121 | void SetIsAccuratePrediction(const GURL& navigated_url); |
| 122 | void RecordToUMA() const; |
| 123 | |
| 124 | private: |
| 125 | // Experimental predictor's name |
Kevin McNee | cc19871 | 2024-03-20 16:00:31 | [diff] [blame] | 126 | std::string_view name_; |
Iman Saboori | 31f0949 | 2023-06-26 20:15:20 | [diff] [blame] | 127 | // Set to true when preloading prediction was correct i.e., when the |
| 128 | // navigation happens to the same predicted URL. |
| 129 | bool is_accurate_prediction_ = false; |
Iman Saboori | 31f0949 | 2023-06-26 20:15:20 | [diff] [blame] | 130 | // The number of buckets that will be used for UMA aggregation. It must be |
| 131 | // less than 101. |
Kevin McNee | bf9af61 | 2024-05-02 23:22:39 | [diff] [blame] | 132 | uint8_t buckets_; |
| 133 | // The logit or probability score output of the predictor model. |
| 134 | // Normalized based on the min and max score values. |
| 135 | float normalized_score_; |
Iman Saboori | 31f0949 | 2023-06-26 20:15:20 | [diff] [blame] | 136 | // The callback to verify that the navigated URL is a match. |
| 137 | PreloadingURLMatchCallback url_match_predicate_; |
| 138 | }; |
| 139 | |
Kevin McNee | 49b7801 | 2024-07-20 03:18:53 | [diff] [blame] | 140 | // Stores data relating to a prediction made by the preloading ML model. Once |
| 141 | // the outcome of whether the prediction is accurate is known, the provided |
| 142 | // callback is invoked. |
| 143 | class ModelPredictionTrainingData { |
| 144 | public: |
| 145 | using OutcomeCallback = |
| 146 | base::OnceCallback<void(std::optional<double> sampling_likelihood, |
| 147 | bool is_accurate_prediction)>; |
| 148 | |
| 149 | ModelPredictionTrainingData(OutcomeCallback on_record_outcome, |
| 150 | PreloadingURLMatchCallback url_match_predicate); |
| 151 | |
| 152 | ~ModelPredictionTrainingData(); |
| 153 | ModelPredictionTrainingData(const ModelPredictionTrainingData&) = delete; |
| 154 | ModelPredictionTrainingData& operator=(const ModelPredictionTrainingData&) = |
| 155 | delete; |
| 156 | ModelPredictionTrainingData(ModelPredictionTrainingData&&); |
| 157 | ModelPredictionTrainingData& operator=(ModelPredictionTrainingData&&); |
| 158 | |
| 159 | void SetIsAccuratePrediction(const GURL& navigated_url); |
| 160 | void Record(std::optional<double> sampling_likelihood); |
| 161 | |
| 162 | private: |
| 163 | OutcomeCallback on_record_outcome_; |
| 164 | PreloadingURLMatchCallback url_match_predicate_; |
| 165 | bool is_accurate_prediction_ = false; |
| 166 | }; |
| 167 | |
Sreeja Kamishetty | 530e8c2 | 2022-06-11 02:20:16 | [diff] [blame] | 168 | } // namespace content |
| 169 | |
Sreeja Kamishetty | 8ef652b | 2022-06-22 22:43:17 | [diff] [blame] | 170 | #endif // CONTENT_BROWSER_PRELOADING_PRELOADING_PREDICTION_H_ |