Evan Stade | 6a19920 | 2024-02-12 22:42:57 | [diff] [blame] | 1 | // Copyright 2024 The Chromium Authors |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "content/browser/indexed_db/file_path_util.h" |
| 6 | |
| 7 | #include <inttypes.h> |
Evan Stade | e8e4548 | 2025-07-02 16:48:52 | [diff] [blame] | 8 | #include <stddef.h> |
| 9 | #include <stdint.h> |
Evan Stade | 6a19920 | 2024-02-12 22:42:57 | [diff] [blame] | 10 | |
Evan Stade | e8e4548 | 2025-07-02 16:48:52 | [diff] [blame] | 11 | #include <string> |
| 12 | #include <string_view> |
| 13 | |
| 14 | #include "base/containers/span.h" |
| 15 | #include "base/files/file_enumerator.h" |
| 16 | #include "base/files/file_path.h" |
Evan Stade | 4121af3 | 2024-05-21 17:12:43 | [diff] [blame] | 17 | #include "base/files/file_util.h" |
Evan Stade | e8e4548 | 2025-07-02 16:48:52 | [diff] [blame] | 18 | #include "base/functional/function_ref.h" |
| 19 | #include "base/logging.h" |
Evan Stade | 4121af3 | 2024-05-21 17:12:43 | [diff] [blame] | 20 | #include "base/metrics/histogram_functions.h" |
Evan Stade | e8e4548 | 2025-07-02 16:48:52 | [diff] [blame] | 21 | #include "base/strings/string_util.h" |
Evan Stade | 6a19920 | 2024-02-12 22:42:57 | [diff] [blame] | 22 | #include "base/strings/stringprintf.h" |
Evan Stade | e8e4548 | 2025-07-02 16:48:52 | [diff] [blame] | 23 | #include "build/buildflag.h" |
| 24 | #include "components/base32/base32.h" |
Evan Stade | 6a19920 | 2024-02-12 22:42:57 | [diff] [blame] | 25 | #include "components/services/storage/public/cpp/buckets/bucket_locator.h" |
Evan Stade | e8e4548 | 2025-07-02 16:48:52 | [diff] [blame] | 26 | #include "crypto/hash.h" |
Evan Stade | 6a19920 | 2024-02-12 22:42:57 | [diff] [blame] | 27 | #include "storage/common/database/database_identifier.h" |
| 28 | #include "third_party/blink/public/common/storage_key/storage_key.h" |
| 29 | |
Evan Stade | cbb1e00 | 2024-09-13 20:06:57 | [diff] [blame] | 30 | namespace content::indexed_db { |
Evan Stade | 6a19920 | 2024-02-12 22:42:57 | [diff] [blame] | 31 | |
| 32 | namespace { |
| 33 | constexpr base::FilePath::CharType kBlobExtension[] = |
| 34 | FILE_PATH_LITERAL(".blob"); |
Evan Stade | e8e4548 | 2025-07-02 16:48:52 | [diff] [blame] | 35 | |
| 36 | // The file name used for databases that have an empty name. |
Evan Stade | f016662 | 2025-07-04 05:42:20 | [diff] [blame] | 37 | constexpr char kSqliteEmptyDatabaseNameFileName[] = "0"; |
Evan Stade | 6a19920 | 2024-02-12 22:42:57 | [diff] [blame] | 38 | } // namespace |
| 39 | |
Evan Stade | 6a19920 | 2024-02-12 22:42:57 | [diff] [blame] | 40 | bool ShouldUseLegacyFilePath(const storage::BucketLocator& bucket_locator) { |
| 41 | return bucket_locator.storage_key.IsFirstPartyContext() && |
| 42 | bucket_locator.is_default; |
| 43 | } |
| 44 | |
| 45 | base::FilePath GetBlobStoreFileName( |
| 46 | const storage::BucketLocator& bucket_locator) { |
| 47 | if (ShouldUseLegacyFilePath(bucket_locator)) { |
| 48 | // First-party blob files, for legacy reasons, are stored at: |
| 49 | // {{first_party_data_path}}/{{serialized_origin}}.indexeddb.blob |
| 50 | return base::FilePath() |
| 51 | .AppendASCII(storage::GetIdentifierFromOrigin( |
| 52 | bucket_locator.storage_key.origin())) |
| 53 | .AddExtension(kIndexedDBExtension) |
| 54 | .AddExtension(kBlobExtension); |
| 55 | } |
| 56 | |
| 57 | // Third-party blob files are stored at: |
| 58 | // {{third_party_data_path}}/{{bucket_id}}/IndexedDB/indexeddb.blob |
| 59 | return base::FilePath(kIndexedDBFile).AddExtension(kBlobExtension); |
| 60 | } |
| 61 | |
| 62 | base::FilePath GetLevelDBFileName( |
| 63 | const storage::BucketLocator& bucket_locator) { |
| 64 | if (ShouldUseLegacyFilePath(bucket_locator)) { |
| 65 | // First-party leveldb files, for legacy reasons, are stored at: |
| 66 | // {{first_party_data_path}}/{{serialized_origin}}.indexeddb.leveldb |
| 67 | // TODO(crbug.com/40855748): Migrate all first party buckets to the new |
| 68 | // path. |
| 69 | return base::FilePath() |
| 70 | .AppendASCII(storage::GetIdentifierFromOrigin( |
| 71 | bucket_locator.storage_key.origin())) |
| 72 | .AddExtension(kIndexedDBExtension) |
| 73 | .AddExtension(kLevelDBExtension); |
| 74 | } |
| 75 | |
| 76 | // Third-party leveldb files are stored at: |
| 77 | // {{third_party_data_path}}/{{bucket_id}}/IndexedDB/indexeddb.leveldb |
| 78 | return base::FilePath(kIndexedDBFile).AddExtension(kLevelDBExtension); |
| 79 | } |
| 80 | |
| 81 | base::FilePath GetBlobDirectoryName(const base::FilePath& path_base, |
| 82 | int64_t database_id) { |
| 83 | return path_base.AppendASCII(base::StringPrintf("%" PRIx64, database_id)); |
| 84 | } |
| 85 | |
| 86 | base::FilePath GetBlobDirectoryNameForKey(const base::FilePath& path_base, |
| 87 | int64_t database_id, |
| 88 | int64_t blob_number) { |
| 89 | base::FilePath path = GetBlobDirectoryName(path_base, database_id); |
| 90 | path = path.AppendASCII(base::StringPrintf( |
| 91 | "%02x", static_cast<int>(blob_number & 0x000000000000ff00) >> 8)); |
| 92 | return path; |
| 93 | } |
| 94 | |
| 95 | base::FilePath GetBlobFileNameForKey(const base::FilePath& path_base, |
| 96 | int64_t database_id, |
| 97 | int64_t blob_number) { |
| 98 | base::FilePath path = |
| 99 | GetBlobDirectoryNameForKey(path_base, database_id, blob_number); |
| 100 | path = path.AppendASCII(base::StringPrintf("%" PRIx64, blob_number)); |
| 101 | return path; |
| 102 | } |
| 103 | |
Evan Stade | 60a48c30 | 2025-07-11 08:10:05 | [diff] [blame] | 104 | bool IsPathTooLong(const base::FilePath& path) { |
| 105 | int limit = base::GetMaximumPathComponentLength(path.DirName()); |
Evan Stade | 4121af3 | 2024-05-21 17:12:43 | [diff] [blame] | 106 | if (limit < 0) { |
Raphael Kubo da Costa | 6240492 | 2025-07-08 04:04:36 | [diff] [blame] | 107 | DPLOG(WARNING) << "GetMaximumPathComponentLength returned -1 for " |
Evan Stade | 60a48c30 | 2025-07-11 08:10:05 | [diff] [blame] | 108 | << path.DirName(); |
Evan Stade | 4121af3 | 2024-05-21 17:12:43 | [diff] [blame] | 109 | // In limited testing, ChromeOS returns 143, other OSes 255. |
| 110 | #if BUILDFLAG(IS_CHROMEOS) |
| 111 | limit = 143; |
| 112 | #else |
| 113 | limit = 255; |
| 114 | #endif |
| 115 | } |
Evan Stade | 60a48c30 | 2025-07-11 08:10:05 | [diff] [blame] | 116 | return path.BaseName().value().length() > static_cast<uint32_t>(limit); |
| 117 | } |
| 118 | |
| 119 | base::FilePath GetSqliteDbDirectory( |
| 120 | const storage::BucketLocator& bucket_locator) { |
| 121 | if (ShouldUseLegacyFilePath(bucket_locator)) { |
| 122 | // All sites share a single data path for their default bucket. Append a |
| 123 | // directory for this specific site. |
| 124 | return base::FilePath().AppendASCII( |
| 125 | storage::GetIdentifierFromOrigin(bucket_locator.storage_key.origin())); |
Evan Stade | 4121af3 | 2024-05-21 17:12:43 | [diff] [blame] | 126 | } |
Evan Stade | 60a48c30 | 2025-07-11 08:10:05 | [diff] [blame] | 127 | |
| 128 | // The base data path is already specific to the site and bucket. The SQLite |
| 129 | // DB will be stored within it. |
| 130 | return base::FilePath(); |
Evan Stade | 4121af3 | 2024-05-21 17:12:43 | [diff] [blame] | 131 | } |
| 132 | |
Evan Stade | e8e4548 | 2025-07-02 16:48:52 | [diff] [blame] | 133 | base::FilePath DatabaseNameToFileName(std::u16string_view db_name) { |
| 134 | // The goal is to create a deterministic mapping from DB name to file name. |
| 135 | // There are essentially no constraints on `db_name`, in terms of length or |
| 136 | // contents. File names have to conform to a certain character set and length, |
| 137 | // (which depends on the file system). Thus, the space of all file names is |
| 138 | // smaller than the space of all database names, and we can't simply use the |
| 139 | // db name as the file name. |
| 140 | // |
| 141 | // To address this, we first hash the db name using SHA256, which ensures a |
| 142 | // negligible probability of collisions. Then we encode using Base32, because |
| 143 | // it uses only a character set that is safe for all file systems, including |
| 144 | // case-insensitive ones. |
| 145 | return db_name.empty() |
Evan Stade | f016662 | 2025-07-04 05:42:20 | [diff] [blame] | 146 | ? base::FilePath::FromASCII(kSqliteEmptyDatabaseNameFileName) |
Evan Stade | e8e4548 | 2025-07-02 16:48:52 | [diff] [blame] | 147 | : base::FilePath::FromASCII(base32::Base32Encode( |
| 148 | crypto::hash::Sha256(base::as_byte_span(db_name)), |
| 149 | base32::Base32EncodePolicy::OMIT_PADDING)); |
| 150 | } |
| 151 | |
| 152 | void EnumerateDatabasesInDirectory( |
| 153 | const base::FilePath& directory, |
| 154 | base::FunctionRef<void(const base::FilePath& path)> ref) { |
| 155 | base::FileEnumerator enumerator(directory, /*recursive=*/false, |
| 156 | base::FileEnumerator::FILES); |
| 157 | enumerator.ForEach([&](const base::FilePath& path) { |
| 158 | if (path.BaseName() == |
Evan Stade | f016662 | 2025-07-04 05:42:20 | [diff] [blame] | 159 | base::FilePath::FromASCII(kSqliteEmptyDatabaseNameFileName)) { |
Evan Stade | e8e4548 | 2025-07-02 16:48:52 | [diff] [blame] | 160 | ref(path); |
| 161 | return; |
| 162 | } |
| 163 | |
| 164 | std::string ascii_name = path.BaseName().MaybeAsASCII(); |
| 165 | if (ascii_name.empty()) { |
| 166 | return; |
| 167 | } |
| 168 | |
| 169 | if (base32::Base32Decode(ascii_name).size() != |
| 170 | crypto::hash::DigestSizeForHashKind(crypto::hash::HashKind::kSha256)) { |
| 171 | return; |
| 172 | } |
| 173 | |
| 174 | ref(path); |
| 175 | }); |
| 176 | } |
| 177 | |
Evan Stade | cbb1e00 | 2024-09-13 20:06:57 | [diff] [blame] | 178 | } // namespace content::indexed_db |