metal : simplify soft_max encoding

ggerganov · ggerganov · commit 580fe2064cc4 · 2023-11-29T17:30:34.000+02:00
ggml-ci
diff --git a/ggml-metal.m b/ggml-metal.m
@@ -1040,12 +1040,7 @@ void ggml_metal_graph_compute(
                             const float scale = ((float *) dst->op_params)[0];
 
                             [encoder setBuffer:id_src0 offset:offs_src0   atIndex:0];
-                            if (id_src1) {
-                                [encoder setBuffer:id_src1 offset:offs_src1   atIndex:1];
-                            } else {
-                                [encoder setBuffer:nil     offset:0           atIndex:1];
-                            }
-
+                            [encoder setBuffer:id_src1 offset:offs_src1   atIndex:1];
                             [encoder setBuffer:id_dst  offset:offs_dst    atIndex:2];
                             [encoder setBytes:&ne00  length:sizeof(ne00)  atIndex:3];
                             [encoder setBytes:&ne01  length:sizeof(ne01)  atIndex:4];
diff --git a/llama.cpp b/llama.cpp
@@ -3705,8 +3705,8 @@ static struct ggml_tensor * llm_build_kqv(
     struct ggml_tensor * kq = ggml_mul_mat(ctx, k, q);
     cb(kq, "kq", il);
 
-    // TODO: !!!!!!!!!
     if (max_alibi_bias > 0.0f) {
+        // temporary branch until we figure out how to handle ggml_alibi through ggml_add
         kq = ggml_scale(ctx, kq, kq_scale);
         cb(kq, "kq_scaled", il);