Skip to content

Commit fdb0ab6

Browse files
slarenarthw
authored andcommitted
llama : skip token bounds check when evaluating embeddings (ggml-org#9437)
1 parent 33dd5fa commit fdb0ab6

File tree

1 file changed

+18
-14
lines changed

1 file changed

+18
-14
lines changed

src/llama.cpp

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16080,19 +16080,21 @@ static int llama_decode_internal(
1608016080
return -1;
1608116081
}
1608216082

16083-
for (uint32_t i = 0; i < n_tokens_all; ++i) {
16084-
if (batch_all.token[i] < 0 || (uint32_t)batch_all.token[i] >= lctx.model.vocab.n_vocab) {
16085-
LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch_all.token[i]);
16086-
return -1;
16087-
}
16088-
}
16089-
1609016083
const auto & model = lctx.model;
1609116084
const auto & hparams = model.hparams;
1609216085
const auto & cparams = lctx.cparams;
1609316086

1609416087
GGML_ASSERT((!batch_all.token && batch_all.embd) || (batch_all.token && !batch_all.embd)); // NOLINT
1609516088

16089+
if (batch_all.token) {
16090+
for (uint32_t i = 0; i < n_tokens_all; ++i) {
16091+
if (batch_all.token[i] < 0 || (uint32_t)batch_all.token[i] >= model.vocab.n_vocab) {
16092+
LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch_all.token[i]);
16093+
return -1;
16094+
}
16095+
}
16096+
}
16097+
1609616098
GGML_ASSERT(n_tokens_all <= cparams.n_batch);
1609716099

1609816100
GGML_ASSERT((cparams.causal_attn || cparams.n_ubatch >= n_tokens_all) && "non-causal attention requires n_ubatch >= n_tokens");
@@ -16379,19 +16381,21 @@ static int llama_encode_internal(
1637916381
return -1;
1638016382
}
1638116383

16382-
for (uint32_t i = 0; i < n_tokens; ++i) {
16383-
if (batch.token[i] < 0 || (uint32_t)batch.token[i] >= lctx.model.vocab.n_vocab) {
16384-
LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch.token[i]);
16385-
return -1;
16386-
}
16387-
}
16388-
1638916384
const auto & model = lctx.model;
1639016385
const auto & hparams = model.hparams;
1639116386
const auto & cparams = lctx.cparams;
1639216387

1639316388
GGML_ASSERT((!batch.token && batch.embd) || (batch.token && !batch.embd)); // NOLINT
1639416389

16390+
if (batch.token) {
16391+
for (uint32_t i = 0; i < n_tokens; ++i) {
16392+
if (batch.token[i] < 0 || (uint32_t)batch.token[i] >= model.vocab.n_vocab) {
16393+
LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch.token[i]);
16394+
return -1;
16395+
}
16396+
}
16397+
}
16398+
1639516399
// micro-batching is not possible for non-causal encoding, so we process the batch in a single shot
1639616400
GGML_ASSERT(cparams.n_ubatch >= n_tokens && "encoder requires n_ubatch >= n_tokens");
1639716401

0 commit comments

Comments
 (0)