blob: e199f21a4cd750e2380a7a0836fc269d21e8f835 [file] [log] [blame]
Evan Stade6a199202024-02-12 22:42:571// Copyright 2024 The Chromium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "content/browser/indexed_db/file_path_util.h"
6
7#include <inttypes.h>
Evan Stadee8e45482025-07-02 16:48:528#include <stddef.h>
9#include <stdint.h>
Evan Stade6a199202024-02-12 22:42:5710
Evan Stadee8e45482025-07-02 16:48:5211#include <string>
12#include <string_view>
13
14#include "base/containers/span.h"
15#include "base/files/file_enumerator.h"
16#include "base/files/file_path.h"
Evan Stade4121af32024-05-21 17:12:4317#include "base/files/file_util.h"
Evan Stadee8e45482025-07-02 16:48:5218#include "base/functional/function_ref.h"
19#include "base/logging.h"
Evan Stade4121af32024-05-21 17:12:4320#include "base/metrics/histogram_functions.h"
Evan Stadee8e45482025-07-02 16:48:5221#include "base/strings/string_util.h"
Evan Stade6a199202024-02-12 22:42:5722#include "base/strings/stringprintf.h"
Evan Stadee8e45482025-07-02 16:48:5223#include "build/buildflag.h"
24#include "components/base32/base32.h"
Evan Stade6a199202024-02-12 22:42:5725#include "components/services/storage/public/cpp/buckets/bucket_locator.h"
Evan Stadee8e45482025-07-02 16:48:5226#include "crypto/hash.h"
Evan Stade6a199202024-02-12 22:42:5727#include "storage/common/database/database_identifier.h"
28#include "third_party/blink/public/common/storage_key/storage_key.h"
29
Evan Stadecbb1e002024-09-13 20:06:5730namespace content::indexed_db {
Evan Stade6a199202024-02-12 22:42:5731
32namespace {
33constexpr base::FilePath::CharType kBlobExtension[] =
34 FILE_PATH_LITERAL(".blob");
Evan Stadee8e45482025-07-02 16:48:5235
36// The file name used for databases that have an empty name.
Evan Stadef0166622025-07-04 05:42:2037constexpr char kSqliteEmptyDatabaseNameFileName[] = "0";
Evan Stade6a199202024-02-12 22:42:5738} // namespace
39
Evan Stade6a199202024-02-12 22:42:5740bool ShouldUseLegacyFilePath(const storage::BucketLocator& bucket_locator) {
41 return bucket_locator.storage_key.IsFirstPartyContext() &&
42 bucket_locator.is_default;
43}
44
45base::FilePath GetBlobStoreFileName(
46 const storage::BucketLocator& bucket_locator) {
47 if (ShouldUseLegacyFilePath(bucket_locator)) {
48 // First-party blob files, for legacy reasons, are stored at:
49 // {{first_party_data_path}}/{{serialized_origin}}.indexeddb.blob
50 return base::FilePath()
51 .AppendASCII(storage::GetIdentifierFromOrigin(
52 bucket_locator.storage_key.origin()))
53 .AddExtension(kIndexedDBExtension)
54 .AddExtension(kBlobExtension);
55 }
56
57 // Third-party blob files are stored at:
58 // {{third_party_data_path}}/{{bucket_id}}/IndexedDB/indexeddb.blob
59 return base::FilePath(kIndexedDBFile).AddExtension(kBlobExtension);
60}
61
62base::FilePath GetLevelDBFileName(
63 const storage::BucketLocator& bucket_locator) {
64 if (ShouldUseLegacyFilePath(bucket_locator)) {
65 // First-party leveldb files, for legacy reasons, are stored at:
66 // {{first_party_data_path}}/{{serialized_origin}}.indexeddb.leveldb
67 // TODO(crbug.com/40855748): Migrate all first party buckets to the new
68 // path.
69 return base::FilePath()
70 .AppendASCII(storage::GetIdentifierFromOrigin(
71 bucket_locator.storage_key.origin()))
72 .AddExtension(kIndexedDBExtension)
73 .AddExtension(kLevelDBExtension);
74 }
75
76 // Third-party leveldb files are stored at:
77 // {{third_party_data_path}}/{{bucket_id}}/IndexedDB/indexeddb.leveldb
78 return base::FilePath(kIndexedDBFile).AddExtension(kLevelDBExtension);
79}
80
81base::FilePath GetBlobDirectoryName(const base::FilePath& path_base,
82 int64_t database_id) {
83 return path_base.AppendASCII(base::StringPrintf("%" PRIx64, database_id));
84}
85
86base::FilePath GetBlobDirectoryNameForKey(const base::FilePath& path_base,
87 int64_t database_id,
88 int64_t blob_number) {
89 base::FilePath path = GetBlobDirectoryName(path_base, database_id);
90 path = path.AppendASCII(base::StringPrintf(
91 "%02x", static_cast<int>(blob_number & 0x000000000000ff00) >> 8));
92 return path;
93}
94
95base::FilePath GetBlobFileNameForKey(const base::FilePath& path_base,
96 int64_t database_id,
97 int64_t blob_number) {
98 base::FilePath path =
99 GetBlobDirectoryNameForKey(path_base, database_id, blob_number);
100 path = path.AppendASCII(base::StringPrintf("%" PRIx64, blob_number));
101 return path;
102}
103
Evan Stade60a48c302025-07-11 08:10:05104bool IsPathTooLong(const base::FilePath& path) {
105 int limit = base::GetMaximumPathComponentLength(path.DirName());
Evan Stade4121af32024-05-21 17:12:43106 if (limit < 0) {
Raphael Kubo da Costa62404922025-07-08 04:04:36107 DPLOG(WARNING) << "GetMaximumPathComponentLength returned -1 for "
Evan Stade60a48c302025-07-11 08:10:05108 << path.DirName();
Evan Stade4121af32024-05-21 17:12:43109// In limited testing, ChromeOS returns 143, other OSes 255.
110#if BUILDFLAG(IS_CHROMEOS)
111 limit = 143;
112#else
113 limit = 255;
114#endif
115 }
Evan Stade60a48c302025-07-11 08:10:05116 return path.BaseName().value().length() > static_cast<uint32_t>(limit);
117}
118
119base::FilePath GetSqliteDbDirectory(
120 const storage::BucketLocator& bucket_locator) {
121 if (ShouldUseLegacyFilePath(bucket_locator)) {
122 // All sites share a single data path for their default bucket. Append a
123 // directory for this specific site.
124 return base::FilePath().AppendASCII(
125 storage::GetIdentifierFromOrigin(bucket_locator.storage_key.origin()));
Evan Stade4121af32024-05-21 17:12:43126 }
Evan Stade60a48c302025-07-11 08:10:05127
128 // The base data path is already specific to the site and bucket. The SQLite
129 // DB will be stored within it.
130 return base::FilePath();
Evan Stade4121af32024-05-21 17:12:43131}
132
Evan Stadee8e45482025-07-02 16:48:52133base::FilePath DatabaseNameToFileName(std::u16string_view db_name) {
134 // The goal is to create a deterministic mapping from DB name to file name.
135 // There are essentially no constraints on `db_name`, in terms of length or
136 // contents. File names have to conform to a certain character set and length,
137 // (which depends on the file system). Thus, the space of all file names is
138 // smaller than the space of all database names, and we can't simply use the
139 // db name as the file name.
140 //
141 // To address this, we first hash the db name using SHA256, which ensures a
142 // negligible probability of collisions. Then we encode using Base32, because
143 // it uses only a character set that is safe for all file systems, including
144 // case-insensitive ones.
145 return db_name.empty()
Evan Stadef0166622025-07-04 05:42:20146 ? base::FilePath::FromASCII(kSqliteEmptyDatabaseNameFileName)
Evan Stadee8e45482025-07-02 16:48:52147 : base::FilePath::FromASCII(base32::Base32Encode(
148 crypto::hash::Sha256(base::as_byte_span(db_name)),
149 base32::Base32EncodePolicy::OMIT_PADDING));
150}
151
152void EnumerateDatabasesInDirectory(
153 const base::FilePath& directory,
154 base::FunctionRef<void(const base::FilePath& path)> ref) {
155 base::FileEnumerator enumerator(directory, /*recursive=*/false,
156 base::FileEnumerator::FILES);
157 enumerator.ForEach([&](const base::FilePath& path) {
158 if (path.BaseName() ==
Evan Stadef0166622025-07-04 05:42:20159 base::FilePath::FromASCII(kSqliteEmptyDatabaseNameFileName)) {
Evan Stadee8e45482025-07-02 16:48:52160 ref(path);
161 return;
162 }
163
164 std::string ascii_name = path.BaseName().MaybeAsASCII();
165 if (ascii_name.empty()) {
166 return;
167 }
168
169 if (base32::Base32Decode(ascii_name).size() !=
170 crypto::hash::DigestSizeForHashKind(crypto::hash::HashKind::kSha256)) {
171 return;
172 }
173
174 ref(path);
175 });
176}
177
Evan Stadecbb1e002024-09-13 20:06:57178} // namespace content::indexed_db