Skip to content

Commit 580fe20

Browse files
committed
metal : simplify soft_max encoding
ggml-ci
1 parent 390a445 commit 580fe20

File tree

2 files changed

+2
-7
lines changed

2 files changed

+2
-7
lines changed

ggml-metal.m

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,12 +1040,7 @@ void ggml_metal_graph_compute(
10401040
const float scale = ((float *) dst->op_params)[0];
10411041

10421042
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
1043-
if (id_src1) {
1044-
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
1045-
} else {
1046-
[encoder setBuffer:nil offset:0 atIndex:1];
1047-
}
1048-
1043+
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
10491044
[encoder setBuffer:id_dst offset:offs_dst atIndex:2];
10501045
[encoder setBytes:&ne00 length:sizeof(ne00) atIndex:3];
10511046
[encoder setBytes:&ne01 length:sizeof(ne01) atIndex:4];

llama.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3705,8 +3705,8 @@ static struct ggml_tensor * llm_build_kqv(
37053705
struct ggml_tensor * kq = ggml_mul_mat(ctx, k, q);
37063706
cb(kq, "kq", il);
37073707

3708-
// TODO: !!!!!!!!!
37093708
if (max_alibi_bias > 0.0f) {
3709+
// temporary branch until we figure out how to handle ggml_alibi through ggml_add
37103710
kq = ggml_scale(ctx, kq, kq_scale);
37113711
cb(kq, "kq_scaled", il);
37123712

0 commit comments

Comments
 (0)