Skip to content

Commit b5ce1c1

Browse files
fix: Output documents in a streaming fashion to avoid 2GB limit (#190)
1 parent 5e3ced0 commit b5ce1c1

File tree

1 file changed

+36
-7
lines changed

1 file changed

+36
-7
lines changed

scip_indexer/SCIPIndexer.cc

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,6 +1128,34 @@ class CFGTraversal final {
11281128

11291129
namespace sorbet::pipeline::semantic_extension {
11301130

1131+
struct IndexWriter {
1132+
scip::Index index;
1133+
ostream &outputStream;
1134+
1135+
~IndexWriter() {
1136+
this->write();
1137+
}
1138+
1139+
void writeDocument(scip::Document &&doc) {
1140+
*this->index.add_documents() = std::move(doc);
1141+
this->write();
1142+
}
1143+
1144+
void writeExternalSymbol(scip::SymbolInformation &&symbolInfo) {
1145+
*this->index.add_external_symbols() = std::move(symbolInfo);
1146+
if (this->index.external_symbols_size() % 1024 == 0) {
1147+
this->write();
1148+
}
1149+
}
1150+
1151+
private:
1152+
void write() {
1153+
this->index.SerializeToOstream(&this->outputStream);
1154+
this->index.clear_documents();
1155+
this->index.clear_external_symbols();
1156+
}
1157+
};
1158+
11311159
using LocalSymbolTable = UnorderedMap<core::LocalVariable, core::Loc>;
11321160

11331161
class SCIPSemanticExtension : public SemanticExtension {
@@ -1269,19 +1297,20 @@ class SCIPSemanticExtension : public SemanticExtension {
12691297
return s1.symbol() < s2.symbol();
12701298
});
12711299

1300+
// TODO: Is it OK to do I/O here? Or should it be elsewhere?
1301+
ofstream out(indexFilePath);
1302+
12721303
scip::Index index;
12731304
*index.mutable_metadata() = metadata;
1305+
index.SerializeToOstream(&out);
1306+
1307+
IndexWriter writer{scip::Index{}, out};
12741308
for (auto &document : allDocuments) {
1275-
*index.add_documents() = move(document);
1309+
writer.writeDocument(move(document));
12761310
}
12771311
for (auto &symbol : allExternalSymbols) {
1278-
*index.add_external_symbols() = move(symbol);
1312+
writer.writeExternalSymbol(move(symbol));
12791313
}
1280-
1281-
ofstream out(indexFilePath);
1282-
// TODO: Is it OK to do I/O here? Or should it be elsewhere?
1283-
index.SerializeToOstream(&out);
1284-
out.close();
12851314
};
12861315

12871316
virtual void typecheck(const core::GlobalState &gs, core::FileRef file, cfg::CFG &cfg,

0 commit comments

Comments
 (0)