From a449fbb0008b65721c8b03f8cb10340c48d74aab Mon Sep 17 00:00:00 2001
From: Laura Trotta <laura.trotta@elastic.co>
Date: Thu, 8 May 2025 11:51:47 +0200
Subject: [PATCH 1/2] bump version

---
 config/version.txt                                              | 2 +-
 .../java/co/elastic/clients/transport/VersionInfo.java          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/version.txt b/config/version.txt
index 5e460d892..b3b01cfd8 100644
--- a/config/version.txt
+++ b/config/version.txt
@@ -1 +1 @@
-8.18.1
+8.18.2
diff --git a/java-client/src/main-flavored/java/co/elastic/clients/transport/VersionInfo.java b/java-client/src/main-flavored/java/co/elastic/clients/transport/VersionInfo.java
index 59e711e13..f59899c62 100644
--- a/java-client/src/main-flavored/java/co/elastic/clients/transport/VersionInfo.java
+++ b/java-client/src/main-flavored/java/co/elastic/clients/transport/VersionInfo.java
@@ -21,5 +21,5 @@
 
 // Package private
 class VersionInfo {
-    static final String VERSION = "8.18.1";
+    static final String VERSION = "8.18.2";
 }

From 4c5d50260672505998848760fa1ec7fe4d11b75e Mon Sep 17 00:00:00 2001
From: Laura Trotta <laura.trotta@elastic.co>
Date: Tue, 27 May 2025 12:06:11 +0200
Subject: [PATCH 2/2] [codegen] update to latest spec

---
 .../analysis/IcuNormalizationCharFilter.java  |  30 +
 .../elasticsearch/cat/CatNodeColumn.java      | 533 ++++++++++++++++++
 .../elasticsearch/cat/NodesRequest.java       |  37 +-
 .../core/search/ShardProfile.java             |  12 +-
 .../elasticsearch/doc-files/api-spec.html     | 247 ++++----
 .../indices/DeleteTemplateRequest.java        |   9 +-
 .../ElasticsearchIndicesAsyncClient.java      |  21 +-
 .../indices/ElasticsearchIndicesClient.java   |  21 +-
 .../indices/GetTemplateRequest.java           |   8 +-
 .../indices/PutTemplateRequest.java           |   6 +-
 .../ChatCompletionUnifiedRequest.java         |  22 +
 .../ElasticsearchInferenceAsyncClient.java    | 334 ++---------
 .../ElasticsearchInferenceClient.java         | 334 ++---------
 .../InferenceEndpointInfoJinaAi.java          | 141 +++++
 .../inference/PutAlibabacloudRequest.java     |  10 -
 .../inference/PutAmazonbedrockRequest.java    |  12 +-
 .../inference/PutAnthropicRequest.java        |  10 -
 .../inference/PutAzureaistudioRequest.java    |  10 -
 .../inference/PutAzureopenaiRequest.java      |  10 -
 .../inference/PutCohereRequest.java           |  10 -
 .../inference/PutGoogleaistudioRequest.java   |  10 -
 .../inference/PutGooglevertexaiRequest.java   |  10 -
 .../inference/PutHuggingFaceRequest.java      |  12 +-
 .../inference/PutJinaaiRequest.java           |  10 -
 .../inference/PutJinaaiResponse.java          |   6 +-
 .../inference/PutMistralRequest.java          |  10 -
 .../inference/PutOpenaiRequest.java           |  10 -
 .../elasticsearch/inference/PutRequest.java   |  11 +-
 .../inference/PutWatsonxRequest.java          |  10 -
 .../inference/RequestChatCompletion.java      |  28 +-
 .../inference/TaskTypeJinaAi.java             |  66 +++
 .../snapshot/SnapshotShardFailure.java        |  13 +-
 32 files changed, 1123 insertions(+), 890 deletions(-)
 create mode 100644 java-client/src/main/java/co/elastic/clients/elasticsearch/cat/CatNodeColumn.java
 create mode 100644 java-client/src/main/java/co/elastic/clients/elasticsearch/inference/InferenceEndpointInfoJinaAi.java
 create mode 100644 java-client/src/main/java/co/elastic/clients/elasticsearch/inference/TaskTypeJinaAi.java

diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/_types/analysis/IcuNormalizationCharFilter.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/_types/analysis/IcuNormalizationCharFilter.java
index 6db1a9fb1..57ad404b3 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/_types/analysis/IcuNormalizationCharFilter.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/_types/analysis/IcuNormalizationCharFilter.java
@@ -26,6 +26,7 @@
 import co.elastic.clients.json.ObjectDeserializer;
 import co.elastic.clients.util.ObjectBuilder;
 import jakarta.json.stream.JsonGenerator;
+import java.lang.String;
 import java.util.Objects;
 import java.util.function.Function;
 import javax.annotation.Nullable;
@@ -61,6 +62,9 @@ public class IcuNormalizationCharFilter extends CharFilterBase implements CharFi
 	@Nullable
 	private final IcuNormalizationType name;
 
+	@Nullable
+	private final String unicodeSetFilter;
+
 	// ---------------------------------------------------------------------------------------------
 
 	private IcuNormalizationCharFilter(Builder builder) {
@@ -68,6 +72,7 @@ private IcuNormalizationCharFilter(Builder builder) {
 
 		this.mode = builder.mode;
 		this.name = builder.name;
+		this.unicodeSetFilter = builder.unicodeSetFilter;
 
 	}
 
@@ -99,6 +104,14 @@ public final IcuNormalizationType name() {
 		return this.name;
 	}
 
+	/**
+	 * API name: {@code unicode_set_filter}
+	 */
+	@Nullable
+	public final String unicodeSetFilter() {
+		return this.unicodeSetFilter;
+	}
+
 	protected void serializeInternal(JsonGenerator generator, JsonpMapper mapper) {
 
 		generator.write("type", "icu_normalizer");
@@ -111,6 +124,11 @@ protected void serializeInternal(JsonGenerator generator, JsonpMapper mapper) {
 			generator.writeKey("name");
 			this.name.serialize(generator, mapper);
 		}
+		if (this.unicodeSetFilter != null) {
+			generator.writeKey("unicode_set_filter");
+			generator.write(this.unicodeSetFilter);
+
+		}
 
 	}
 
@@ -129,6 +147,9 @@ public static class Builder extends CharFilterBase.AbstractBuilder<Builder>
 		@Nullable
 		private IcuNormalizationType name;
 
+		@Nullable
+		private String unicodeSetFilter;
+
 		/**
 		 * API name: {@code mode}
 		 */
@@ -145,6 +166,14 @@ public final Builder name(@Nullable IcuNormalizationType value) {
 			return this;
 		}
 
+		/**
+		 * API name: {@code unicode_set_filter}
+		 */
+		public final Builder unicodeSetFilter(@Nullable String value) {
+			this.unicodeSetFilter = value;
+			return this;
+		}
+
 		@Override
 		protected Builder self() {
 			return this;
@@ -176,6 +205,7 @@ protected static void setupIcuNormalizationCharFilterDeserializer(
 		CharFilterBase.setupCharFilterBaseDeserializer(op);
 		op.add(Builder::mode, IcuNormalizationMode._DESERIALIZER, "mode");
 		op.add(Builder::name, IcuNormalizationType._DESERIALIZER, "name");
+		op.add(Builder::unicodeSetFilter, JsonpDeserializer.stringDeserializer(), "unicode_set_filter");
 
 		op.ignore("type");
 	}
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/cat/CatNodeColumn.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/cat/CatNodeColumn.java
new file mode 100644
index 000000000..732a7f016
--- /dev/null
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/cat/CatNodeColumn.java
@@ -0,0 +1,533 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package co.elastic.clients.elasticsearch.cat;
+
+import co.elastic.clients.json.JsonEnum;
+import co.elastic.clients.json.JsonpDeserializable;
+import co.elastic.clients.json.JsonpDeserializer;
+
+//----------------------------------------------------------------
+//       THIS CODE IS GENERATED. MANUAL EDITS WILL BE LOST.
+//----------------------------------------------------------------
+//
+// This code is generated from the Elasticsearch API specification
+// at https://github.com/elastic/elasticsearch-specification
+//
+// Manual updates to this file will be lost when the code is
+// re-generated.
+//
+// If you find a property that is missing or wrongly typed, please
+// open an issue or a PR on the API specification repository.
+//
+//----------------------------------------------------------------
+
+/**
+ *
+ * @see <a href="../doc-files/api-spec.html#cat._types.CatNodeColumn">API
+ *      specification</a>
+ */
+@JsonpDeserializable
+public enum CatNodeColumn implements JsonEnum {
+	/**
+	 * The Elasticsearch build hash. For example: <code>5c03844</code>.
+	 */
+	Build("build", "b"),
+
+	/**
+	 * The size of completion. For example: <code>0b</code>.
+	 */
+	CompletionSize("completion.size", "cs", "completionSize"),
+
+	/**
+	 * The percentage of recent system CPU used.
+	 */
+	Cpu("cpu"),
+
+	/**
+	 * The available disk space. For example: <code>198.4gb</code>.
+	 */
+	DiskAvail("disk.avail", "d", "disk", "diskAvail"),
+
+	/**
+	 * The total disk space. For example: <code>458.3gb</code>.
+	 */
+	DiskTotal("disk.total", "dt", "diskTotal"),
+
+	/**
+	 * The used disk space. For example: <code>259.8gb</code>.
+	 */
+	DiskUsed("disk.used", "du", "diskUsed"),
+
+	/**
+	 * The percentage of disk space used.
+	 */
+	DiskUsedPercent("disk.used_percent", "dup", "diskUsedPercent"),
+
+	/**
+	 * The number of fielddata cache evictions.
+	 */
+	FielddataEvictions("fielddata.evictions", "fe", "fielddataEvictions"),
+
+	/**
+	 * The fielddata cache memory used. For example: <code>0b</code>.
+	 */
+	FielddataMemorySize("fielddata.memory_size", "fm", "fielddataMemory"),
+
+	/**
+	 * The number of file descriptors used.
+	 */
+	FileDescCurrent("file_desc.current", "fdc", "fileDescriptorCurrent"),
+
+	/**
+	 * The maximum number of file descriptors.
+	 */
+	FileDescMax("file_desc.max", "fdm", "fileDescriptorMax"),
+
+	/**
+	 * The percentage of file descriptors used.
+	 */
+	FileDescPercent("file_desc.percent", "fdp", "fileDescriptorPercent"),
+
+	/**
+	 * The number of flushes.
+	 */
+	FlushTotal("flush.total", "ft", "flushTotal"),
+
+	/**
+	 * The amount of time spent in flush.
+	 */
+	FlushTotalTime("flush.total_time", "ftt", "flushTotalTime"),
+
+	/**
+	 * The number of current get operations.
+	 */
+	GetCurrent("get.current", "gc", "getCurrent"),
+
+	/**
+	 * The time spent in successful get operations. For example: <code>14ms</code>.
+	 */
+	GetExistsTime("get.exists_time", "geti", "getExistsTime"),
+
+	/**
+	 * The number of successful get operations.
+	 */
+	GetExistsTotal("get.exists_total", "geto", "getExistsTotal"),
+
+	/**
+	 * The time spent in failed get operations. For example: <code>0s</code>.
+	 */
+	GetMissingTime("get.missing_time", "gmti", "getMissingTime"),
+
+	/**
+	 * The number of failed get operations.
+	 */
+	GetMissingTotal("get.missing_total", "gmto", "getMissingTotal"),
+
+	/**
+	 * The amount of time spent in get operations. For example: <code>14ms</code>.
+	 */
+	GetTime("get.time", "gti", "getTime"),
+
+	/**
+	 * The number of get operations.
+	 */
+	GetTotal("get.total", "gto", "getTotal"),
+
+	/**
+	 * The used heap size. For example: <code>311.2mb</code>.
+	 */
+	HeapCurrent("heap.current", "hc", "heapCurrent"),
+
+	/**
+	 * The total heap size. For example: <code>4gb</code>.
+	 */
+	HeapMax("heap.max", "hm", "heapMax"),
+
+	/**
+	 * The used percentage of total allocated Elasticsearch JVM heap. This value
+	 * reflects only the Elasticsearch process running within the operating system
+	 * and is the most direct indicator of its JVM, heap, or memory resource
+	 * performance.
+	 */
+	HeapPercent("heap.percent", "hp", "heapPercent"),
+
+	/**
+	 * The bound HTTP address.
+	 */
+	HttpAddress("http_address", "http"),
+
+	/**
+	 * The identifier for the node.
+	 */
+	Id("id", "nodeId"),
+
+	/**
+	 * The number of current deletion operations.
+	 */
+	IndexingDeleteCurrent("indexing.delete_current", "idc", "indexingDeleteCurrent"),
+
+	/**
+	 * The time spent in deletion operations. For example: <code>2ms</code>.
+	 */
+	IndexingDeleteTime("indexing.delete_time", "idti", "indexingDeleteTime"),
+
+	/**
+	 * The number of deletion operations.
+	 */
+	IndexingDeleteTotal("indexing.delete_total", "idto", "indexingDeleteTotal"),
+
+	/**
+	 * The number of current indexing operations.
+	 */
+	IndexingIndexCurrent("indexing.index_current", "iic", "indexingIndexCurrent"),
+
+	/**
+	 * The number of failed indexing operations.
+	 */
+	IndexingIndexFailed("indexing.index_failed", "iif", "indexingIndexFailed"),
+
+	/**
+	 * The number of indexing operations that failed due to version conflict.
+	 */
+	IndexingIndexFailedDueToVersionConflict("indexing.index_failed_due_to_version_conflict", "iifvc",
+			"indexingIndexFailedDueToVersionConflict"),
+
+	/**
+	 * The time spent in indexing operations. For example: <code>134ms</code>.
+	 */
+	IndexingIndexTime("indexing.index_time", "iiti", "indexingIndexTime"),
+
+	/**
+	 * The number of indexing operations.
+	 */
+	IndexingIndexTotal("indexing.index_total", "iito", "indexingIndexTotal"),
+
+	/**
+	 * The IP address.
+	 */
+	Ip("ip", "i"),
+
+	/**
+	 * The Java version. For example: <code>1.8.0</code>.
+	 */
+	Jdk("jdk", "j"),
+
+	/**
+	 * The most recent load average. For example: <code>0.22</code>.
+	 */
+	Load_1m("load_1m", "l"),
+
+	/**
+	 * The load average for the last five minutes. For example: <code>0.78</code>.
+	 */
+	Load_5m("load_5m", "l"),
+
+	/**
+	 * The load average for the last fifteen minutes. For example:
+	 * <code>1.24</code>.
+	 */
+	Load_15m("load_15m", "l"),
+
+	/**
+	 * The number of mappings, including runtime and object fields.
+	 */
+	MappingsTotalCount("mappings.total_count", "mtc", "mappingsTotalCount"),
+
+	/**
+	 * The estimated heap overhead, in bytes, of mappings on this node, which allows
+	 * for 1KiB of heap for every mapped field.
+	 */
+	MappingsTotalEstimatedOverheadInBytes("mappings.total_estimated_overhead_in_bytes", "mteo",
+			"mappingsTotalEstimatedOverheadInBytes"),
+
+	/**
+	 * Indicates whether the node is the elected master node. Returned values
+	 * include <code>*</code> (elected master) and <code>-</code> (not elected
+	 * master).
+	 */
+	Master("master", "m"),
+
+	/**
+	 * The number of current merge operations.
+	 */
+	MergesCurrent("merges.current", "mc", "mergesCurrent"),
+
+	/**
+	 * The number of current merging documents.
+	 */
+	MergesCurrentDocs("merges.current_docs", "mcd", "mergesCurrentDocs"),
+
+	/**
+	 * The size of current merges. For example: <code>0b</code>.
+	 */
+	MergesCurrentSize("merges.current_size", "mcs", "mergesCurrentSize"),
+
+	/**
+	 * The number of completed merge operations.
+	 */
+	MergesTotal("merges.total", "mt", "mergesTotal"),
+
+	/**
+	 * The number of merged documents.
+	 */
+	MergesTotalDocs("merges.total_docs", "mtd", "mergesTotalDocs"),
+
+	/**
+	 * The total size of merges. For example: <code>0b</code>.
+	 */
+	MergesTotalSize("merges.total_size", "mts", "mergesTotalSize"),
+
+	/**
+	 * The time spent merging documents. For example: <code>0s</code>.
+	 */
+	MergesTotalTime("merges.total_time", "mtt", "mergesTotalTime"),
+
+	/**
+	 * The node name.
+	 */
+	Name("name", "n"),
+
+	/**
+	 * The roles of the node. Returned values include <code>c</code> (cold node),
+	 * <code>d</code> (data node), <code>f</code> (frozen node), <code>h</code> (hot
+	 * node), <code>i</code> (ingest node), <code>l</code> (machine learning node),
+	 * <code>m</code> (master-eligible node), <code>r</code> (remote cluster client
+	 * node), <code>s</code> (content node), <code>t</code> (transform node),
+	 * <code>v</code> (voting-only node), <code>w</code> (warm node), and
+	 * <code>-</code> (coordinating node only). For example, <code>dim</code>
+	 * indicates a master-eligible data and ingest node.
+	 */
+	NodeRole("node.role", "r", "role", "nodeRole"),
+
+	/**
+	 * The process identifier.
+	 */
+	Pid("pid", "p"),
+
+	/**
+	 * The bound transport port number.
+	 */
+	Port("port", "po"),
+
+	/**
+	 * The used query cache memory. For example: <code>0b</code>.
+	 */
+	QueryCacheMemorySize("query_cache.memory_size", "qcm", "queryCacheMemory"),
+
+	/**
+	 * The number of query cache evictions.
+	 */
+	QueryCacheEvictions("query_cache.evictions", "qce", "queryCacheEvictions"),
+
+	/**
+	 * The query cache hit count.
+	 */
+	QueryCacheHitCount("query_cache.hit_count", "qchc", "queryCacheHitCount"),
+
+	/**
+	 * The query cache miss count.
+	 */
+	QueryCacheMissCount("query_cache.miss_count", "qcmc", "queryCacheMissCount"),
+
+	/**
+	 * The used total memory. For example: <code>513.4mb</code>.
+	 */
+	RamCurrent("ram.current", "rc", "ramCurrent"),
+
+	/**
+	 * The total memory. For example: <code>2.9gb</code>.
+	 */
+	RamMax("ram.max", "rm", "ramMax"),
+
+	/**
+	 * The used percentage of the total operating system memory. This reflects all
+	 * processes running on the operating system instead of only Elasticsearch and
+	 * is not guaranteed to correlate to its performance.
+	 */
+	RamPercent("ram.percent", "rp", "ramPercent"),
+
+	/**
+	 * The number of refresh operations.
+	 */
+	RefreshTotal("refresh.total", "rto", "refreshTotal"),
+
+	/**
+	 * The time spent in refresh operations. For example: <code>91ms</code>.
+	 */
+	RefreshTime("refresh.time", "rti", "refreshTime"),
+
+	/**
+	 * The used request cache memory. For example: <code>0b</code>.
+	 */
+	RequestCacheMemorySize("request_cache.memory_size", "rcm", "requestCacheMemory"),
+
+	/**
+	 * The number of request cache evictions.
+	 */
+	RequestCacheEvictions("request_cache.evictions", "rce", "requestCacheEvictions"),
+
+	/**
+	 * The request cache hit count.
+	 */
+	RequestCacheHitCount("request_cache.hit_count", "rchc", "requestCacheHitCount"),
+
+	/**
+	 * The request cache miss count.
+	 */
+	RequestCacheMissCount("request_cache.miss_count", "rcmc", "requestCacheMissCount"),
+
+	/**
+	 * The number of total script compilations.
+	 */
+	ScriptCompilations("script.compilations", "scrcc", "scriptCompilations"),
+
+	/**
+	 * The number of total compiled scripts evicted from cache.
+	 */
+	ScriptCacheEvictions("script.cache_evictions", "scrce", "scriptCacheEvictions"),
+
+	/**
+	 * The number of current fetch phase operations.
+	 */
+	SearchFetchCurrent("search.fetch_current", "sfc", "searchFetchCurrent"),
+
+	/**
+	 * The time spent in fetch phase. For example: <code>37ms</code>.
+	 */
+	SearchFetchTime("search.fetch_time", "sfti", "searchFetchTime"),
+
+	/**
+	 * The number of fetch operations.
+	 */
+	SearchFetchTotal("search.fetch_total", "sfto", "searchFetchTotal"),
+
+	/**
+	 * The number of open search contexts.
+	 */
+	SearchOpenContexts("search.open_contexts", "so", "searchOpenContexts"),
+
+	/**
+	 * The number of current query phase operations.
+	 */
+	SearchQueryCurrent("search.query_current", "sqc", "searchQueryCurrent"),
+
+	/**
+	 * The time spent in query phase. For example: <code>43ms</code>.
+	 */
+	SearchQueryTime("search.query_time", "sqti", "searchQueryTime"),
+
+	/**
+	 * The number of query operations.
+	 */
+	SearchQueryTotal("search.query_total", "sqto", "searchQueryTotal"),
+
+	/**
+	 * The number of open scroll contexts.
+	 */
+	SearchScrollCurrent("search.scroll_current", "scc", "searchScrollCurrent"),
+
+	/**
+	 * The amount of time scroll contexts were held open. For example:
+	 * <code>2m</code>.
+	 */
+	SearchScrollTime("search.scroll_time", "scti", "searchScrollTime"),
+
+	/**
+	 * The number of completed scroll contexts.
+	 */
+	SearchScrollTotal("search.scroll_total", "scto", "searchScrollTotal"),
+
+	/**
+	 * The number of segments.
+	 */
+	SegmentsCount("segments.count", "sc", "segmentsCount"),
+
+	/**
+	 * The memory used by fixed bit sets for nested object field types and type
+	 * filters for types referred in join fields. For example: <code>1.0kb</code>.
+	 */
+	SegmentsFixedBitsetMemory("segments.fixed_bitset_memory", "sfbm", "fixedBitsetMemory"),
+
+	/**
+	 * The memory used by the index writer. For example: <code>18mb</code>.
+	 */
+	SegmentsIndexWriterMemory("segments.index_writer_memory", "siwm", "segmentsIndexWriterMemory"),
+
+	/**
+	 * The memory used by segments. For example: <code>1.4kb</code>.
+	 */
+	SegmentsMemory("segments.memory", "sm", "segmentsMemory"),
+
+	/**
+	 * The memory used by the version map. For example: <code>1.0kb</code>.
+	 */
+	SegmentsVersionMapMemory("segments.version_map_memory", "svmm", "segmentsVersionMapMemory"),
+
+	/**
+	 * The number of shards assigned.
+	 */
+	ShardStatsTotalCount("shard_stats.total_count", "sstc", "shards", "shardStatsTotalCount"),
+
+	/**
+	 * The number of current suggest operations.
+	 */
+	SuggestCurrent("suggest.current", "suc", "suggestCurrent"),
+
+	/**
+	 * The time spent in suggest operations.
+	 */
+	SuggestTime("suggest.time", "suti", "suggestTime"),
+
+	/**
+	 * The number of suggest operations.
+	 */
+	SuggestTotal("suggest.total", "suto", "suggestTotal"),
+
+	/**
+	 * The amount of node uptime. For example: <code>17.3m</code>.
+	 */
+	Uptime("uptime", "u"),
+
+	/**
+	 * The Elasticsearch version. For example: <code>9.0.0</code>.
+	 */
+	Version("version", "v"),
+
+	;
+
+	private final String jsonValue;
+	private final String[] aliases;
+
+	CatNodeColumn(String jsonValue, String... aliases) {
+		this.jsonValue = jsonValue;
+		this.aliases = aliases;
+	}
+
+	public String jsonValue() {
+		return this.jsonValue;
+	}
+
+	public String[] aliases() {
+		return this.aliases;
+	}
+
+	public static final JsonEnum.Deserializer<CatNodeColumn> _DESERIALIZER = new JsonEnum.Deserializer<>(
+			CatNodeColumn.values());
+}
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/cat/NodesRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/cat/NodesRequest.java
index f00be48f5..6390a4f02 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/cat/NodesRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/cat/NodesRequest.java
@@ -132,7 +132,8 @@ public final Boolean fullId() {
 	}
 
 	/**
-	 * List of columns to appear in the response. Supports simple wildcards.
+	 * A comma-separated list of columns names to display. It supports simple
+	 * wildcards.
 	 * <p>
 	 * API name: {@code h}
 	 */
@@ -152,7 +153,7 @@ public final Boolean includeUnloadedSegments() {
 	}
 
 	/**
-	 * Period to wait for a connection to the master node.
+	 * The period to wait for a connection to the master node.
 	 * <p>
 	 * API name: {@code master_timeout}
 	 */
@@ -162,9 +163,9 @@ public final Time masterTimeout() {
 	}
 
 	/**
-	 * List of columns that determine how the table should be sorted. Sorting
-	 * defaults to ascending and can be changed by setting <code>:asc</code> or
-	 * <code>:desc</code> as a suffix to the column name.
+	 * A comma-separated list of column names or aliases that determines the sort
+	 * order. Sorting defaults to ascending and can be changed by setting
+	 * <code>:asc</code> or <code>:desc</code> as a suffix to the column name.
 	 * <p>
 	 * API name: {@code s}
 	 */
@@ -173,7 +174,7 @@ public final List<String> s() {
 	}
 
 	/**
-	 * Unit used to display time values.
+	 * The unit used to display time values.
 	 * <p>
 	 * API name: {@code time}
 	 */
@@ -232,7 +233,8 @@ public final Builder fullId(@Nullable Boolean value) {
 		}
 
 		/**
-		 * List of columns to appear in the response. Supports simple wildcards.
+		 * A comma-separated list of columns names to display. It supports simple
+		 * wildcards.
 		 * <p>
 		 * API name: {@code h}
 		 * <p>
@@ -244,7 +246,8 @@ public final Builder h(List<String> list) {
 		}
 
 		/**
-		 * List of columns to appear in the response. Supports simple wildcards.
+		 * A comma-separated list of columns names to display. It supports simple
+		 * wildcards.
 		 * <p>
 		 * API name: {@code h}
 		 * <p>
@@ -267,7 +270,7 @@ public final Builder includeUnloadedSegments(@Nullable Boolean value) {
 		}
 
 		/**
-		 * Period to wait for a connection to the master node.
+		 * The period to wait for a connection to the master node.
 		 * <p>
 		 * API name: {@code master_timeout}
 		 */
@@ -277,7 +280,7 @@ public final Builder masterTimeout(@Nullable Time value) {
 		}
 
 		/**
-		 * Period to wait for a connection to the master node.
+		 * The period to wait for a connection to the master node.
 		 * <p>
 		 * API name: {@code master_timeout}
 		 */
@@ -286,9 +289,9 @@ public final Builder masterTimeout(Function<Time.Builder, ObjectBuilder<Time>> f
 		}
 
 		/**
-		 * List of columns that determine how the table should be sorted. Sorting
-		 * defaults to ascending and can be changed by setting <code>:asc</code> or
-		 * <code>:desc</code> as a suffix to the column name.
+		 * A comma-separated list of column names or aliases that determines the sort
+		 * order. Sorting defaults to ascending and can be changed by setting
+		 * <code>:asc</code> or <code>:desc</code> as a suffix to the column name.
 		 * <p>
 		 * API name: {@code s}
 		 * <p>
@@ -300,9 +303,9 @@ public final Builder s(List<String> list) {
 		}
 
 		/**
-		 * List of columns that determine how the table should be sorted. Sorting
-		 * defaults to ascending and can be changed by setting <code>:asc</code> or
-		 * <code>:desc</code> as a suffix to the column name.
+		 * A comma-separated list of column names or aliases that determines the sort
+		 * order. Sorting defaults to ascending and can be changed by setting
+		 * <code>:asc</code> or <code>:desc</code> as a suffix to the column name.
 		 * <p>
 		 * API name: {@code s}
 		 * <p>
@@ -314,7 +317,7 @@ public final Builder s(String value, String... values) {
 		}
 
 		/**
-		 * Unit used to display time values.
+		 * The unit used to display time values.
 		 * <p>
 		 * API name: {@code time}
 		 */
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/core/search/ShardProfile.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/core/search/ShardProfile.java
index 1b199e402..bfead3d10 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/core/search/ShardProfile.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/core/search/ShardProfile.java
@@ -30,7 +30,7 @@
 import co.elastic.clients.util.ObjectBuilder;
 import co.elastic.clients.util.WithJsonObjectBuilderBase;
 import jakarta.json.stream.JsonGenerator;
-import java.lang.Long;
+import java.lang.Integer;
 import java.lang.String;
 import java.util.List;
 import java.util.Objects;
@@ -80,7 +80,7 @@ public class ShardProfile implements JsonpSerializable {
 
 	private final List<SearchProfile> searches;
 
-	private final long shardId;
+	private final int shardId;
 
 	// ---------------------------------------------------------------------------------------------
 
@@ -163,7 +163,7 @@ public final List<SearchProfile> searches() {
 	/**
 	 * Required - API name: {@code shard_id}
 	 */
-	public final long shardId() {
+	public final int shardId() {
 		return this.shardId;
 	}
 
@@ -255,7 +255,7 @@ public static class Builder extends WithJsonObjectBuilderBase<Builder> implement
 
 		private List<SearchProfile> searches;
 
-		private Long shardId;
+		private Integer shardId;
 
 		/**
 		 * Required - API name: {@code aggregations}
@@ -380,7 +380,7 @@ public final Builder searches(Function<SearchProfile.Builder, ObjectBuilder<Sear
 		/**
 		 * Required - API name: {@code shard_id}
 		 */
-		public final Builder shardId(long value) {
+		public final Builder shardId(int value) {
 			this.shardId = value;
 			return this;
 		}
@@ -422,7 +422,7 @@ protected static void setupShardProfileDeserializer(ObjectDeserializer<ShardProf
 		op.add(Builder::index, JsonpDeserializer.stringDeserializer(), "index");
 		op.add(Builder::nodeId, JsonpDeserializer.stringDeserializer(), "node_id");
 		op.add(Builder::searches, JsonpDeserializer.arrayDeserializer(SearchProfile._DESERIALIZER), "searches");
-		op.add(Builder::shardId, JsonpDeserializer.longDeserializer(), "shard_id");
+		op.add(Builder::shardId, JsonpDeserializer.integerDeserializer(), "shard_id");
 
 	}
 
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/doc-files/api-spec.html b/java-client/src/main/java/co/elastic/clients/elasticsearch/doc-files/api-spec.html
index bb1da1db9..7589c2939 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/doc-files/api-spec.html
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/doc-files/api-spec.html
@@ -686,19 +686,19 @@
 '_types.analysis.HungarianAnalyzer': '_types/analysis/analyzers.ts#L186-L191',
 '_types.analysis.HunspellTokenFilter': '_types/analysis/token_filters.ts#L268-L280',
 '_types.analysis.HyphenationDecompounderTokenFilter': '_types/analysis/token_filters.ts#L67-L76',
-'_types.analysis.IcuAnalyzer': '_types/analysis/icu-plugin.ts#L67-L71',
-'_types.analysis.IcuCollationAlternate': '_types/analysis/icu-plugin.ts#L89-L92',
-'_types.analysis.IcuCollationCaseFirst': '_types/analysis/icu-plugin.ts#L94-L97',
-'_types.analysis.IcuCollationDecomposition': '_types/analysis/icu-plugin.ts#L99-L102',
-'_types.analysis.IcuCollationStrength': '_types/analysis/icu-plugin.ts#L104-L110',
-'_types.analysis.IcuCollationTokenFilter': '_types/analysis/icu-plugin.ts#L51-L65',
-'_types.analysis.IcuFoldingTokenFilter': '_types/analysis/icu-plugin.ts#L46-L49',
-'_types.analysis.IcuNormalizationCharFilter': '_types/analysis/icu-plugin.ts#L40-L44',
-'_types.analysis.IcuNormalizationMode': '_types/analysis/icu-plugin.ts#L78-L81',
+'_types.analysis.IcuAnalyzer': '_types/analysis/icu-plugin.ts#L68-L72',
+'_types.analysis.IcuCollationAlternate': '_types/analysis/icu-plugin.ts#L90-L93',
+'_types.analysis.IcuCollationCaseFirst': '_types/analysis/icu-plugin.ts#L95-L98',
+'_types.analysis.IcuCollationDecomposition': '_types/analysis/icu-plugin.ts#L100-L103',
+'_types.analysis.IcuCollationStrength': '_types/analysis/icu-plugin.ts#L105-L111',
+'_types.analysis.IcuCollationTokenFilter': '_types/analysis/icu-plugin.ts#L52-L66',
+'_types.analysis.IcuFoldingTokenFilter': '_types/analysis/icu-plugin.ts#L47-L50',
+'_types.analysis.IcuNormalizationCharFilter': '_types/analysis/icu-plugin.ts#L40-L45',
+'_types.analysis.IcuNormalizationMode': '_types/analysis/icu-plugin.ts#L79-L82',
 '_types.analysis.IcuNormalizationTokenFilter': '_types/analysis/icu-plugin.ts#L35-L38',
-'_types.analysis.IcuNormalizationType': '_types/analysis/icu-plugin.ts#L83-L87',
+'_types.analysis.IcuNormalizationType': '_types/analysis/icu-plugin.ts#L84-L88',
 '_types.analysis.IcuTokenizer': '_types/analysis/icu-plugin.ts#L30-L33',
-'_types.analysis.IcuTransformDirection': '_types/analysis/icu-plugin.ts#L73-L76',
+'_types.analysis.IcuTransformDirection': '_types/analysis/icu-plugin.ts#L74-L77',
 '_types.analysis.IcuTransformTokenFilter': '_types/analysis/icu-plugin.ts#L24-L28',
 '_types.analysis.IndicNormalizationTokenFilter': '_types/analysis/token_filters.ts#L497-L499',
 '_types.analysis.IndonesianAnalyzer': '_types/analysis/analyzers.ts#L193-L198',
@@ -1043,10 +1043,11 @@
 'autoscaling.put_autoscaling_policy.Response': 'autoscaling/put_autoscaling_policy/PutAutoscalingPolicyResponse.ts#L22-L24',
 'cat._types.CatAnomalyDetectorColumn': 'cat/_types/CatBase.ts#L32-L401',
 'cat._types.CatDatafeedColumn': 'cat/_types/CatBase.ts#L405-L471',
-'cat._types.CatDfaColumn': 'cat/_types/CatBase.ts#L472-L557',
+'cat._types.CatDfaColumn': 'cat/_types/CatBase.ts#L473-L558',
+'cat._types.CatNodeColumn': 'cat/_types/CatBase.ts#L560-L1012',
 'cat._types.CatRequestBase': 'cat/_types/CatBase.ts#L28-L30',
-'cat._types.CatTrainedModelsColumn': 'cat/_types/CatBase.ts#L561-L635',
-'cat._types.CatTransformColumn': 'cat/_types/CatBase.ts#L640-L844',
+'cat._types.CatTrainedModelsColumn': 'cat/_types/CatBase.ts#L1018-L1092',
+'cat._types.CatTransformColumn': 'cat/_types/CatBase.ts#L1097-L1301',
 'cat.aliases.AliasesRecord': 'cat/aliases/types.ts#L22-L53',
 'cat.aliases.Request': 'cat/aliases/CatAliasesRequest.ts#L24-L85',
 'cat.aliases.Response': 'cat/aliases/CatAliasesResponse.ts#L22-L24',
@@ -1089,7 +1090,7 @@
 'cat.nodeattrs.Request': 'cat/nodeattrs/CatNodeAttributesRequest.ts#L24-L67',
 'cat.nodeattrs.Response': 'cat/nodeattrs/CatNodeAttributesResponse.ts#L22-L24',
 'cat.nodes.NodesRecord': 'cat/nodes/types.ts#L23-L542',
-'cat.nodes.Request': 'cat/nodes/CatNodesRequest.ts#L24-L77',
+'cat.nodes.Request': 'cat/nodes/CatNodesRequest.ts#L24-L79',
 'cat.nodes.Response': 'cat/nodes/CatNodesResponse.ts#L22-L24',
 'cat.pending_tasks.PendingTasksRecord': 'cat/pending_tasks/types.ts#L20-L41',
 'cat.pending_tasks.Request': 'cat/pending_tasks/CatPendingTasksRequest.ts#L24-L71',
@@ -1574,7 +1575,7 @@
 'indices.delete_data_stream.Response': 'indices/delete_data_stream/IndicesDeleteDataStreamResponse.ts#L22-L24',
 'indices.delete_index_template.Request': 'indices/delete_index_template/IndicesDeleteIndexTemplateRequest.ts#L24-L60',
 'indices.delete_index_template.Response': 'indices/delete_index_template/IndicesDeleteIndexTemplateResponse.ts#L22-L24',
-'indices.delete_template.Request': 'indices/delete_template/IndicesDeleteTemplateRequest.ts#L24-L59',
+'indices.delete_template.Request': 'indices/delete_template/IndicesDeleteTemplateRequest.ts#L24-L61',
 'indices.delete_template.Response': 'indices/delete_template/IndicesDeleteTemplateResponse.ts#L22-L24',
 'indices.disk_usage.Request': 'indices/disk_usage/IndicesDiskUsageRequest.ts#L23-L84',
 'indices.disk_usage.Response': 'indices/disk_usage/IndicesDiskUsageResponse.ts#L22-L25',
@@ -1629,7 +1630,7 @@
 'indices.get_migrate_reindex_status.StatusInProgress': 'indices/get_migrate_reindex_status/MigrateGetReindexStatusResponse.ts#L38-L42',
 'indices.get_settings.Request': 'indices/get_settings/IndicesGetSettingsRequest.ts#L24-L111',
 'indices.get_settings.Response': 'indices/get_settings/IndicesGetSettingsResponse.ts#L24-L27',
-'indices.get_template.Request': 'indices/get_template/IndicesGetTemplateRequest.ts#L24-L72',
+'indices.get_template.Request': 'indices/get_template/IndicesGetTemplateRequest.ts#L24-L73',
 'indices.get_template.Response': 'indices/get_template/IndicesGetTemplateResponse.ts#L23-L26',
 'indices.migrate_reindex.MigrateReindex': 'indices/migrate_reindex/MigrateReindexRequest.ts#L39-L48',
 'indices.migrate_reindex.ModeEnum': 'indices/migrate_reindex/MigrateReindexRequest.ts#L54-L56',
@@ -1657,7 +1658,7 @@
 'indices.put_mapping.Response': 'indices/put_mapping/IndicesPutMappingResponse.ts#L22-L24',
 'indices.put_settings.Request': 'indices/put_settings/IndicesPutSettingsRequest.ts#L25-L125',
 'indices.put_settings.Response': 'indices/put_settings/IndicesPutSettingsResponse.ts#L22-L24',
-'indices.put_template.Request': 'indices/put_template/IndicesPutTemplateRequest.ts#L29-L124',
+'indices.put_template.Request': 'indices/put_template/IndicesPutTemplateRequest.ts#L29-L125',
 'indices.put_template.Response': 'indices/put_template/IndicesPutTemplateResponse.ts#L22-L24',
 'indices.recovery.FileDetails': 'indices/recovery/types.ts#L50-L54',
 'indices.recovery.RecoveryBytes': 'indices/recovery/types.ts#L38-L48',
@@ -1741,100 +1742,102 @@
 'indices.validate_query.IndicesValidationExplanation': 'indices/validate_query/IndicesValidateQueryResponse.ts#L32-L37',
 'indices.validate_query.Request': 'indices/validate_query/IndicesValidateQueryRequest.ts#L25-L123',
 'indices.validate_query.Response': 'indices/validate_query/IndicesValidateQueryResponse.ts#L23-L30',
-'inference._types.AdaptiveAllocations': 'inference/_types/CommonTypes.ts#L60-L77',
-'inference._types.AlibabaCloudServiceSettings': 'inference/_types/CommonTypes.ts#L219-L264',
-'inference._types.AlibabaCloudServiceType': 'inference/_types/CommonTypes.ts#L289-L291',
-'inference._types.AlibabaCloudTaskSettings': 'inference/_types/CommonTypes.ts#L266-L280',
-'inference._types.AlibabaCloudTaskType': 'inference/_types/CommonTypes.ts#L282-L287',
-'inference._types.AmazonBedrockServiceSettings': 'inference/_types/CommonTypes.ts#L293-L335',
-'inference._types.AmazonBedrockServiceType': 'inference/_types/CommonTypes.ts#L368-L370',
-'inference._types.AmazonBedrockTaskSettings': 'inference/_types/CommonTypes.ts#L337-L361',
-'inference._types.AmazonBedrockTaskType': 'inference/_types/CommonTypes.ts#L363-L366',
-'inference._types.AnthropicServiceSettings': 'inference/_types/CommonTypes.ts#L372-L388',
-'inference._types.AnthropicServiceType': 'inference/_types/CommonTypes.ts#L421-L423',
-'inference._types.AnthropicTaskSettings': 'inference/_types/CommonTypes.ts#L390-L415',
-'inference._types.AnthropicTaskType': 'inference/_types/CommonTypes.ts#L417-L419',
-'inference._types.AzureAiStudioServiceSettings': 'inference/_types/CommonTypes.ts#L425-L467',
-'inference._types.AzureAiStudioServiceType': 'inference/_types/CommonTypes.ts#L504-L506',
-'inference._types.AzureAiStudioTaskSettings': 'inference/_types/CommonTypes.ts#L469-L497',
-'inference._types.AzureAiStudioTaskType': 'inference/_types/CommonTypes.ts#L499-L502',
-'inference._types.AzureOpenAIServiceSettings': 'inference/_types/CommonTypes.ts#L508-L553',
-'inference._types.AzureOpenAIServiceType': 'inference/_types/CommonTypes.ts#L568-L570',
-'inference._types.AzureOpenAITaskSettings': 'inference/_types/CommonTypes.ts#L555-L561',
-'inference._types.AzureOpenAITaskType': 'inference/_types/CommonTypes.ts#L563-L566',
-'inference._types.CohereEmbeddingType': 'inference/_types/CommonTypes.ts#L625-L629',
-'inference._types.CohereInputType': 'inference/_types/CommonTypes.ts#L631-L636',
-'inference._types.CohereServiceSettings': 'inference/_types/CommonTypes.ts#L572-L613',
-'inference._types.CohereServiceType': 'inference/_types/CommonTypes.ts#L621-L623',
-'inference._types.CohereSimilarityType': 'inference/_types/CommonTypes.ts#L638-L642',
-'inference._types.CohereTaskSettings': 'inference/_types/CommonTypes.ts#L650-L682',
-'inference._types.CohereTaskType': 'inference/_types/CommonTypes.ts#L615-L619',
-'inference._types.CohereTruncateType': 'inference/_types/CommonTypes.ts#L644-L648',
+'inference._types.AdaptiveAllocations': 'inference/_types/CommonTypes.ts#L62-L79',
+'inference._types.AlibabaCloudServiceSettings': 'inference/_types/CommonTypes.ts#L221-L266',
+'inference._types.AlibabaCloudServiceType': 'inference/_types/CommonTypes.ts#L291-L293',
+'inference._types.AlibabaCloudTaskSettings': 'inference/_types/CommonTypes.ts#L268-L282',
+'inference._types.AlibabaCloudTaskType': 'inference/_types/CommonTypes.ts#L284-L289',
+'inference._types.AmazonBedrockServiceSettings': 'inference/_types/CommonTypes.ts#L295-L337',
+'inference._types.AmazonBedrockServiceType': 'inference/_types/CommonTypes.ts#L370-L372',
+'inference._types.AmazonBedrockTaskSettings': 'inference/_types/CommonTypes.ts#L339-L363',
+'inference._types.AmazonBedrockTaskType': 'inference/_types/CommonTypes.ts#L365-L368',
+'inference._types.AnthropicServiceSettings': 'inference/_types/CommonTypes.ts#L374-L390',
+'inference._types.AnthropicServiceType': 'inference/_types/CommonTypes.ts#L423-L425',
+'inference._types.AnthropicTaskSettings': 'inference/_types/CommonTypes.ts#L392-L417',
+'inference._types.AnthropicTaskType': 'inference/_types/CommonTypes.ts#L419-L421',
+'inference._types.AzureAiStudioServiceSettings': 'inference/_types/CommonTypes.ts#L427-L469',
+'inference._types.AzureAiStudioServiceType': 'inference/_types/CommonTypes.ts#L506-L508',
+'inference._types.AzureAiStudioTaskSettings': 'inference/_types/CommonTypes.ts#L471-L499',
+'inference._types.AzureAiStudioTaskType': 'inference/_types/CommonTypes.ts#L501-L504',
+'inference._types.AzureOpenAIServiceSettings': 'inference/_types/CommonTypes.ts#L510-L555',
+'inference._types.AzureOpenAIServiceType': 'inference/_types/CommonTypes.ts#L570-L572',
+'inference._types.AzureOpenAITaskSettings': 'inference/_types/CommonTypes.ts#L557-L563',
+'inference._types.AzureOpenAITaskType': 'inference/_types/CommonTypes.ts#L565-L568',
+'inference._types.CohereEmbeddingType': 'inference/_types/CommonTypes.ts#L627-L631',
+'inference._types.CohereInputType': 'inference/_types/CommonTypes.ts#L633-L638',
+'inference._types.CohereServiceSettings': 'inference/_types/CommonTypes.ts#L574-L615',
+'inference._types.CohereServiceType': 'inference/_types/CommonTypes.ts#L623-L625',
+'inference._types.CohereSimilarityType': 'inference/_types/CommonTypes.ts#L640-L644',
+'inference._types.CohereTaskSettings': 'inference/_types/CommonTypes.ts#L652-L684',
+'inference._types.CohereTaskType': 'inference/_types/CommonTypes.ts#L617-L621',
+'inference._types.CohereTruncateType': 'inference/_types/CommonTypes.ts#L646-L650',
 'inference._types.CompletionInferenceResult': 'inference/_types/Results.ts#L83-L88',
 'inference._types.CompletionResult': 'inference/_types/Results.ts#L76-L81',
-'inference._types.CompletionTool': 'inference/_types/CommonTypes.ts#L205-L217',
-'inference._types.CompletionToolChoice': 'inference/_types/CommonTypes.ts#L168-L180',
-'inference._types.CompletionToolChoiceFunction': 'inference/_types/CommonTypes.ts#L157-L166',
-'inference._types.CompletionToolFunction': 'inference/_types/CommonTypes.ts#L182-L203',
-'inference._types.CompletionToolType': 'inference/_types/CommonTypes.ts#L79-L82',
-'inference._types.ContentObject': 'inference/_types/CommonTypes.ts#L84-L96',
+'inference._types.CompletionTool': 'inference/_types/CommonTypes.ts#L207-L219',
+'inference._types.CompletionToolChoice': 'inference/_types/CommonTypes.ts#L170-L182',
+'inference._types.CompletionToolChoiceFunction': 'inference/_types/CommonTypes.ts#L159-L168',
+'inference._types.CompletionToolFunction': 'inference/_types/CommonTypes.ts#L184-L205',
+'inference._types.CompletionToolType': 'inference/_types/CommonTypes.ts#L81-L84',
+'inference._types.ContentObject': 'inference/_types/CommonTypes.ts#L86-L98',
 'inference._types.DeleteInferenceEndpointResult': 'inference/_types/Results.ts#L109-L114',
-'inference._types.ElasticsearchServiceSettings': 'inference/_types/CommonTypes.ts#L704-L738',
-'inference._types.ElasticsearchServiceType': 'inference/_types/CommonTypes.ts#L754-L756',
-'inference._types.ElasticsearchTaskSettings': 'inference/_types/CommonTypes.ts#L740-L746',
-'inference._types.ElasticsearchTaskType': 'inference/_types/CommonTypes.ts#L748-L752',
-'inference._types.ElserServiceSettings': 'inference/_types/CommonTypes.ts#L758-L784',
-'inference._types.ElserServiceType': 'inference/_types/CommonTypes.ts#L790-L792',
-'inference._types.ElserTaskType': 'inference/_types/CommonTypes.ts#L786-L788',
-'inference._types.GoogleAiServiceType': 'inference/_types/CommonTypes.ts#L817-L819',
-'inference._types.GoogleAiStudioServiceSettings': 'inference/_types/CommonTypes.ts#L794-L810',
-'inference._types.GoogleAiStudioTaskType': 'inference/_types/CommonTypes.ts#L812-L815',
-'inference._types.GoogleVertexAIServiceSettings': 'inference/_types/CommonTypes.ts#L821-L847',
-'inference._types.GoogleVertexAIServiceType': 'inference/_types/CommonTypes.ts#L865-L867',
-'inference._types.GoogleVertexAITaskSettings': 'inference/_types/CommonTypes.ts#L849-L858',
-'inference._types.GoogleVertexAITaskType': 'inference/_types/CommonTypes.ts#L860-L863',
-'inference._types.HuggingFaceServiceSettings': 'inference/_types/CommonTypes.ts#L869-L890',
-'inference._types.HuggingFaceServiceType': 'inference/_types/CommonTypes.ts#L896-L898',
-'inference._types.HuggingFaceTaskType': 'inference/_types/CommonTypes.ts#L892-L894',
-'inference._types.InferenceChunkingSettings': 'inference/_types/Services.ts#L60-L89',
+'inference._types.ElasticsearchServiceSettings': 'inference/_types/CommonTypes.ts#L706-L740',
+'inference._types.ElasticsearchServiceType': 'inference/_types/CommonTypes.ts#L756-L758',
+'inference._types.ElasticsearchTaskSettings': 'inference/_types/CommonTypes.ts#L742-L748',
+'inference._types.ElasticsearchTaskType': 'inference/_types/CommonTypes.ts#L750-L754',
+'inference._types.ElserServiceSettings': 'inference/_types/CommonTypes.ts#L760-L786',
+'inference._types.ElserServiceType': 'inference/_types/CommonTypes.ts#L792-L794',
+'inference._types.ElserTaskType': 'inference/_types/CommonTypes.ts#L788-L790',
+'inference._types.GoogleAiServiceType': 'inference/_types/CommonTypes.ts#L819-L821',
+'inference._types.GoogleAiStudioServiceSettings': 'inference/_types/CommonTypes.ts#L796-L812',
+'inference._types.GoogleAiStudioTaskType': 'inference/_types/CommonTypes.ts#L814-L817',
+'inference._types.GoogleVertexAIServiceSettings': 'inference/_types/CommonTypes.ts#L823-L849',
+'inference._types.GoogleVertexAIServiceType': 'inference/_types/CommonTypes.ts#L867-L869',
+'inference._types.GoogleVertexAITaskSettings': 'inference/_types/CommonTypes.ts#L851-L860',
+'inference._types.GoogleVertexAITaskType': 'inference/_types/CommonTypes.ts#L862-L865',
+'inference._types.HuggingFaceServiceSettings': 'inference/_types/CommonTypes.ts#L871-L892',
+'inference._types.HuggingFaceServiceType': 'inference/_types/CommonTypes.ts#L898-L900',
+'inference._types.HuggingFaceTaskType': 'inference/_types/CommonTypes.ts#L894-L896',
+'inference._types.InferenceChunkingSettings': 'inference/_types/Services.ts#L71-L100',
 'inference._types.InferenceEndpoint': 'inference/_types/Services.ts#L24-L44',
 'inference._types.InferenceEndpointInfo': 'inference/_types/Services.ts#L46-L58',
+'inference._types.InferenceEndpointInfoJinaAi': 'inference/_types/Services.ts#L60-L69',
 'inference._types.InferenceResult': 'inference/_types/Results.ts#L116-L127',
-'inference._types.JinaAIServiceSettings': 'inference/_types/CommonTypes.ts#L900-L929',
-'inference._types.JinaAIServiceType': 'inference/_types/CommonTypes.ts#L959-L961',
-'inference._types.JinaAISimilarityType': 'inference/_types/CommonTypes.ts#L963-L967',
-'inference._types.JinaAITaskSettings': 'inference/_types/CommonTypes.ts#L931-L952',
-'inference._types.JinaAITaskType': 'inference/_types/CommonTypes.ts#L954-L957',
-'inference._types.JinaAITextEmbeddingTask': 'inference/_types/CommonTypes.ts#L969-L974',
-'inference._types.Message': 'inference/_types/CommonTypes.ts#L135-L155',
-'inference._types.MessageContent': 'inference/_types/CommonTypes.ts#L130-L133',
-'inference._types.MistralServiceSettings': 'inference/_types/CommonTypes.ts#L976-L1003',
-'inference._types.MistralServiceType': 'inference/_types/CommonTypes.ts#L1009-L1011',
-'inference._types.MistralTaskType': 'inference/_types/CommonTypes.ts#L1005-L1007',
-'inference._types.OpenAIServiceSettings': 'inference/_types/CommonTypes.ts#L1013-L1055',
-'inference._types.OpenAIServiceType': 'inference/_types/CommonTypes.ts#L1071-L1073',
-'inference._types.OpenAITaskSettings': 'inference/_types/CommonTypes.ts#L1057-L1063',
-'inference._types.OpenAITaskType': 'inference/_types/CommonTypes.ts#L1065-L1069',
+'inference._types.JinaAIServiceSettings': 'inference/_types/CommonTypes.ts#L902-L931',
+'inference._types.JinaAIServiceType': 'inference/_types/CommonTypes.ts#L961-L963',
+'inference._types.JinaAISimilarityType': 'inference/_types/CommonTypes.ts#L965-L969',
+'inference._types.JinaAITaskSettings': 'inference/_types/CommonTypes.ts#L933-L954',
+'inference._types.JinaAITaskType': 'inference/_types/CommonTypes.ts#L956-L959',
+'inference._types.JinaAITextEmbeddingTask': 'inference/_types/CommonTypes.ts#L971-L976',
+'inference._types.Message': 'inference/_types/CommonTypes.ts#L137-L157',
+'inference._types.MessageContent': 'inference/_types/CommonTypes.ts#L132-L135',
+'inference._types.MistralServiceSettings': 'inference/_types/CommonTypes.ts#L978-L1005',
+'inference._types.MistralServiceType': 'inference/_types/CommonTypes.ts#L1011-L1013',
+'inference._types.MistralTaskType': 'inference/_types/CommonTypes.ts#L1007-L1009',
+'inference._types.OpenAIServiceSettings': 'inference/_types/CommonTypes.ts#L1015-L1057',
+'inference._types.OpenAIServiceType': 'inference/_types/CommonTypes.ts#L1073-L1075',
+'inference._types.OpenAITaskSettings': 'inference/_types/CommonTypes.ts#L1059-L1065',
+'inference._types.OpenAITaskType': 'inference/_types/CommonTypes.ts#L1067-L1071',
 'inference._types.RankedDocument': 'inference/_types/Results.ts#L90-L100',
-'inference._types.RateLimitSetting': 'inference/_types/Services.ts#L95-L100',
-'inference._types.RequestChatCompletion': 'inference/_types/CommonTypes.ts#L25-L58',
+'inference._types.RateLimitSetting': 'inference/_types/Services.ts#L106-L111',
+'inference._types.RequestChatCompletion': 'inference/_types/CommonTypes.ts#L25-L60',
 'inference._types.RerankedInferenceResult': 'inference/_types/Results.ts#L102-L107',
 'inference._types.SparseEmbeddingInferenceResult': 'inference/_types/Results.ts#L40-L45',
 'inference._types.SparseEmbeddingResult': 'inference/_types/Results.ts#L36-L38',
 'inference._types.TaskType': 'inference/_types/TaskType.ts#L20-L29',
+'inference._types.TaskTypeJinaAi': 'inference/_types/TaskType.ts#L31-L34',
 'inference._types.TextEmbeddingByteResult': 'inference/_types/Results.ts#L53-L58',
 'inference._types.TextEmbeddingInferenceResult': 'inference/_types/Results.ts#L67-L74',
 'inference._types.TextEmbeddingResult': 'inference/_types/Results.ts#L60-L65',
-'inference._types.ToolCall': 'inference/_types/CommonTypes.ts#L112-L128',
-'inference._types.ToolCallFunction': 'inference/_types/CommonTypes.ts#L98-L110',
-'inference._types.VoyageAIServiceSettings': 'inference/_types/CommonTypes.ts#L1075-L1106',
-'inference._types.VoyageAIServiceType': 'inference/_types/CommonTypes.ts#L1139-L1141',
-'inference._types.VoyageAITaskSettings': 'inference/_types/CommonTypes.ts#L1108-L1132',
-'inference._types.VoyageAITaskType': 'inference/_types/CommonTypes.ts#L1134-L1137',
-'inference._types.WatsonxServiceSettings': 'inference/_types/CommonTypes.ts#L1143-L1180',
-'inference._types.WatsonxServiceType': 'inference/_types/CommonTypes.ts#L1186-L1188',
-'inference._types.WatsonxTaskType': 'inference/_types/CommonTypes.ts#L1182-L1184',
-'inference.chat_completion_unified.Request': 'inference/chat_completion_unified/UnifiedRequest.ts#L24-L53',
+'inference._types.ToolCall': 'inference/_types/CommonTypes.ts#L114-L130',
+'inference._types.ToolCallFunction': 'inference/_types/CommonTypes.ts#L100-L112',
+'inference._types.VoyageAIServiceSettings': 'inference/_types/CommonTypes.ts#L1077-L1108',
+'inference._types.VoyageAIServiceType': 'inference/_types/CommonTypes.ts#L1141-L1143',
+'inference._types.VoyageAITaskSettings': 'inference/_types/CommonTypes.ts#L1110-L1134',
+'inference._types.VoyageAITaskType': 'inference/_types/CommonTypes.ts#L1136-L1139',
+'inference._types.WatsonxServiceSettings': 'inference/_types/CommonTypes.ts#L1145-L1182',
+'inference._types.WatsonxServiceType': 'inference/_types/CommonTypes.ts#L1188-L1190',
+'inference._types.WatsonxTaskType': 'inference/_types/CommonTypes.ts#L1184-L1186',
+'inference.chat_completion_unified.Request': 'inference/chat_completion_unified/UnifiedRequest.ts#L24-L64',
 'inference.chat_completion_unified.Response': 'inference/chat_completion_unified/UnifiedResponse.ts#L22-L24',
 'inference.completion.Request': 'inference/completion/CompletionRequest.ts#L25-L63',
 'inference.completion.Response': 'inference/completion/CompletionResponse.ts#L22-L24',
@@ -1844,39 +1847,39 @@
 'inference.get.Response': 'inference/get/GetResponse.ts#L22-L26',
 'inference.inference.Request': 'inference/inference/InferenceRequest.ts#L26-L91',
 'inference.inference.Response': 'inference/inference/InferenceResponse.ts#L22-L25',
-'inference.put.Request': 'inference/put/PutRequest.ts#L25-L65',
+'inference.put.Request': 'inference/put/PutRequest.ts#L25-L60',
 'inference.put.Response': 'inference/put/PutResponse.ts#L22-L24',
-'inference.put_alibabacloud.Request': 'inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L30-L83',
+'inference.put_alibabacloud.Request': 'inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L30-L77',
 'inference.put_alibabacloud.Response': 'inference/put_alibabacloud/PutAlibabaCloudResponse.ts#L22-L24',
-'inference.put_amazonbedrock.Request': 'inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L30-L86',
+'inference.put_amazonbedrock.Request': 'inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L30-L80',
 'inference.put_amazonbedrock.Response': 'inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24',
-'inference.put_anthropic.Request': 'inference/put_anthropic/PutAnthropicRequest.ts#L30-L84',
+'inference.put_anthropic.Request': 'inference/put_anthropic/PutAnthropicRequest.ts#L30-L78',
 'inference.put_anthropic.Response': 'inference/put_anthropic/PutAnthropicResponse.ts#L22-L24',
-'inference.put_azureaistudio.Request': 'inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L30-L83',
+'inference.put_azureaistudio.Request': 'inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L30-L77',
 'inference.put_azureaistudio.Response': 'inference/put_azureaistudio/PutAzureAiStudioResponse.ts#L22-L24',
-'inference.put_azureopenai.Request': 'inference/put_azureopenai/PutAzureOpenAiRequest.ts#L30-L91',
+'inference.put_azureopenai.Request': 'inference/put_azureopenai/PutAzureOpenAiRequest.ts#L30-L85',
 'inference.put_azureopenai.Response': 'inference/put_azureopenai/PutAzureOpenAiResponse.ts#L22-L24',
-'inference.put_cohere.Request': 'inference/put_cohere/PutCohereRequest.ts#L30-L84',
+'inference.put_cohere.Request': 'inference/put_cohere/PutCohereRequest.ts#L30-L78',
 'inference.put_cohere.Response': 'inference/put_cohere/PutCohereResponse.ts#L22-L24',
 'inference.put_elasticsearch.Request': 'inference/put_elasticsearch/PutElasticsearchRequest.ts#L30-L91',
 'inference.put_elasticsearch.Response': 'inference/put_elasticsearch/PutElasticsearchResponse.ts#L22-L24',
 'inference.put_elser.Request': 'inference/put_elser/PutElserRequest.ts#L29-L86',
 'inference.put_elser.Response': 'inference/put_elser/PutElserResponse.ts#L22-L24',
-'inference.put_googleaistudio.Request': 'inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L29-L77',
+'inference.put_googleaistudio.Request': 'inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L29-L71',
 'inference.put_googleaistudio.Response': 'inference/put_googleaistudio/PutGoogleAiStudioResponse.ts#L22-L24',
-'inference.put_googlevertexai.Request': 'inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L30-L83',
+'inference.put_googlevertexai.Request': 'inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L30-L77',
 'inference.put_googlevertexai.Response': 'inference/put_googlevertexai/PutGoogleVertexAiResponse.ts#L22-L24',
-'inference.put_hugging_face.Request': 'inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L91',
+'inference.put_hugging_face.Request': 'inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L85',
 'inference.put_hugging_face.Response': 'inference/put_hugging_face/PutHuggingFaceResponse.ts#L22-L24',
-'inference.put_jinaai.Request': 'inference/put_jinaai/PutJinaAiRequest.ts#L30-L86',
+'inference.put_jinaai.Request': 'inference/put_jinaai/PutJinaAiRequest.ts#L30-L80',
 'inference.put_jinaai.Response': 'inference/put_jinaai/PutJinaAiResponse.ts#L22-L24',
-'inference.put_mistral.Request': 'inference/put_mistral/PutMistralRequest.ts#L29-L78',
+'inference.put_mistral.Request': 'inference/put_mistral/PutMistralRequest.ts#L29-L72',
 'inference.put_mistral.Response': 'inference/put_mistral/PutMistralResponse.ts#L22-L24',
-'inference.put_openai.Request': 'inference/put_openai/PutOpenAiRequest.ts#L30-L84',
+'inference.put_openai.Request': 'inference/put_openai/PutOpenAiRequest.ts#L30-L78',
 'inference.put_openai.Response': 'inference/put_openai/PutOpenAiResponse.ts#L22-L24',
 'inference.put_voyageai.Request': 'inference/put_voyageai/PutVoyageAIRequest.ts#L30-L79',
 'inference.put_voyageai.Response': 'inference/put_voyageai/PutVoyageAIResponse.ts#L22-L24',
-'inference.put_watsonx.Request': 'inference/put_watsonx/PutWatsonxRequest.ts#L28-L74',
+'inference.put_watsonx.Request': 'inference/put_watsonx/PutWatsonxRequest.ts#L28-L68',
 'inference.put_watsonx.Response': 'inference/put_watsonx/PutWatsonxResponse.ts#L22-L24',
 'inference.rerank.Request': 'inference/rerank/RerankRequest.ts#L25-L72',
 'inference.rerank.Response': 'inference/rerank/RerankResponse.ts#L22-L24',
@@ -2875,7 +2878,7 @@
 'snapshot._types.SharedFileSystemRepositorySettings': 'snapshot/_types/SnapshotRepository.ts#L104-L108',
 'snapshot._types.SnapshotIndexStats': 'snapshot/_types/SnapshotIndexStats.ts#L25-L29',
 'snapshot._types.SnapshotInfo': 'snapshot/_types/SnapshotInfo.ts#L41-L71',
-'snapshot._types.SnapshotShardFailure': 'snapshot/_types/SnapshotShardFailure.ts#L22-L29',
+'snapshot._types.SnapshotShardFailure': 'snapshot/_types/SnapshotShardFailure.ts#L23-L30',
 'snapshot._types.SnapshotShardsStatus': 'snapshot/_types/SnapshotShardsStatus.ts#L24-L27',
 'snapshot._types.SnapshotSort': 'snapshot/_types/SnapshotInfo.ts#L73-L93',
 'snapshot._types.SnapshotStats': 'snapshot/_types/SnapshotStats.ts#L23-L29',
@@ -3067,8 +3070,8 @@
 'watcher._types.IndexAction': 'watcher/_types/Actions.ts#L256-L265',
 'watcher._types.IndexResult': 'watcher/_types/Actions.ts#L267-L269',
 'watcher._types.IndexResultSummary': 'watcher/_types/Actions.ts#L271-L277',
-'watcher._types.InputContainer': 'watcher/_types/Input.ts#L90-L98',
-'watcher._types.InputType': 'watcher/_types/Input.ts#L100-L104',
+'watcher._types.InputContainer': 'watcher/_types/Input.ts#L87-L95',
+'watcher._types.InputType': 'watcher/_types/Input.ts#L97-L101',
 'watcher._types.LoggingAction': 'watcher/_types/Actions.ts#L281-L285',
 'watcher._types.LoggingResult': 'watcher/_types/Actions.ts#L287-L289',
 'watcher._types.Month': 'watcher/_types/Schedule.ts#L65-L78',
@@ -3083,15 +3086,15 @@
 'watcher._types.Quantifier': 'watcher/_types/Conditions.ts#L74-L77',
 'watcher._types.QueryWatch': 'watcher/_types/Watch.ts#L58-L64',
 'watcher._types.ReportingEmailAttachment': 'watcher/_types/Actions.ts#L224-L232',
-'watcher._types.ResponseContentType': 'watcher/_types/Input.ts#L106-L110',
+'watcher._types.ResponseContentType': 'watcher/_types/Input.ts#L103-L107',
 'watcher._types.ScheduleContainer': 'watcher/_types/Schedule.ts#L80-L92',
 'watcher._types.ScheduleTimeOfDay': 'watcher/_types/Schedule.ts#L99-L104',
 'watcher._types.ScheduleTriggerEvent': 'watcher/_types/Schedule.ts#L94-L97',
 'watcher._types.ScriptCondition': 'watcher/_types/Conditions.ts#L79-L87',
-'watcher._types.SearchInput': 'watcher/_types/Input.ts#L112-L116',
-'watcher._types.SearchInputRequestBody': 'watcher/_types/Input.ts#L147-L149',
-'watcher._types.SearchInputRequestDefinition': 'watcher/_types/Input.ts#L118-L125',
-'watcher._types.SearchTemplateRequestBody': 'watcher/_types/Input.ts#L128-L145',
+'watcher._types.SearchInput': 'watcher/_types/Input.ts#L109-L113',
+'watcher._types.SearchInputRequestBody': 'watcher/_types/Input.ts#L144-L146',
+'watcher._types.SearchInputRequestDefinition': 'watcher/_types/Input.ts#L115-L122',
+'watcher._types.SearchTemplateRequestBody': 'watcher/_types/Input.ts#L125-L142',
 'watcher._types.SimulatedActions': 'watcher/_types/Action.ts#L90-L94',
 'watcher._types.SlackAction': 'watcher/_types/Actions.ts#L91-L94',
 'watcher._types.SlackAttachment': 'watcher/_types/Actions.ts#L101-L117',
@@ -3223,10 +3226,10 @@
         if (hash.length > 1) {
             hash = hash.substring(1);
         }
-        window.location = "https://github.com/elastic/elasticsearch-specification/tree/68c914c921730ba183c12d9f6f2edb994fa817d3/specification/" + (paths[hash] || "");
+        window.location = "https://github.com/elastic/elasticsearch-specification/tree/0b326d89a5dc4013a2ef7239e58fe98f3db2639a/specification/" + (paths[hash] || "");
     </script>
 </head>
 <body>
-    Please see the <a href="https://github.com/elastic/elasticsearch-specification/tree/68c914c921730ba183c12d9f6f2edb994fa817d3/specification/">Elasticsearch API specification</a>.
+    Please see the <a href="https://github.com/elastic/elasticsearch-specification/tree/0b326d89a5dc4013a2ef7239e58fe98f3db2639a/specification/">Elasticsearch API specification</a>.
 </body>
 </html>
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/DeleteTemplateRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/DeleteTemplateRequest.java
index 1e8ec4664..aec07653c 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/DeleteTemplateRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/DeleteTemplateRequest.java
@@ -56,12 +56,15 @@
 // typedef: indices.delete_template.Request
 
 /**
- * Delete a legacy index template.
+ * Delete a legacy index template. IMPORTANT: This documentation is about legacy
+ * index templates, which are deprecated and will be replaced by the composable
+ * templates introduced in Elasticsearch 7.8.
  * 
  * @see <a href="../doc-files/api-spec.html#indices.delete_template.Request">API
  *      specification</a>
+ * @deprecated 7.8.0
  */
-
+@Deprecated
 public class DeleteTemplateRequest extends RequestBase {
 	@Nullable
 	private final Time masterTimeout;
@@ -122,7 +125,7 @@ public final Time timeout() {
 	/**
 	 * Builder for {@link DeleteTemplateRequest}.
 	 */
-
+	@Deprecated
 	public static class Builder extends RequestBase.AbstractBuilder<Builder>
 			implements
 				ObjectBuilder<DeleteTemplateRequest> {
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/ElasticsearchIndicesAsyncClient.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/ElasticsearchIndicesAsyncClient.java
index 8bd7ae353..061724fc1 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/ElasticsearchIndicesAsyncClient.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/ElasticsearchIndicesAsyncClient.java
@@ -938,7 +938,9 @@ public final CompletableFuture<DeleteIndexTemplateResponse> deleteIndexTemplate(
 	// ----- Endpoint: indices.delete_template
 
 	/**
-	 * Delete a legacy index template.
+	 * Delete a legacy index template. IMPORTANT: This documentation is about legacy
+	 * index templates, which are deprecated and will be replaced by the composable
+	 * templates introduced in Elasticsearch 7.8.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/indices-delete-template-v1.html">Documentation
@@ -953,7 +955,9 @@ public CompletableFuture<DeleteTemplateResponse> deleteTemplate(DeleteTemplateRe
 	}
 
 	/**
-	 * Delete a legacy index template.
+	 * Delete a legacy index template. IMPORTANT: This documentation is about legacy
+	 * index templates, which are deprecated and will be replaced by the composable
+	 * templates introduced in Elasticsearch 7.8.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -2101,7 +2105,8 @@ public CompletableFuture<GetIndicesSettingsResponse> getSettings() {
 	// ----- Endpoint: indices.get_template
 
 	/**
-	 * Get index templates. Get information about one or more index templates.
+	 * Get legacy index templates. Get information about one or more index
+	 * templates.
 	 * <p>
 	 * IMPORTANT: This documentation is about legacy index templates, which are
 	 * deprecated and will be replaced by the composable templates introduced in
@@ -2120,7 +2125,8 @@ public CompletableFuture<GetTemplateResponse> getTemplate(GetTemplateRequest req
 	}
 
 	/**
-	 * Get index templates. Get information about one or more index templates.
+	 * Get legacy index templates. Get information about one or more index
+	 * templates.
 	 * <p>
 	 * IMPORTANT: This documentation is about legacy index templates, which are
 	 * deprecated and will be replaced by the composable templates introduced in
@@ -2140,7 +2146,8 @@ public final CompletableFuture<GetTemplateResponse> getTemplate(
 	}
 
 	/**
-	 * Get index templates. Get information about one or more index templates.
+	 * Get legacy index templates. Get information about one or more index
+	 * templates.
 	 * <p>
 	 * IMPORTANT: This documentation is about legacy index templates, which are
 	 * deprecated and will be replaced by the composable templates introduced in
@@ -2842,7 +2849,7 @@ public CompletableFuture<PutIndicesSettingsResponse> putSettings() {
 	// ----- Endpoint: indices.put_template
 
 	/**
-	 * Create or update an index template. Index templates define settings,
+	 * Create or update a legacy index template. Index templates define settings,
 	 * mappings, and aliases that can be applied automatically to new indices.
 	 * Elasticsearch applies templates to new indices based on an index pattern that
 	 * matches the index name.
@@ -2886,7 +2893,7 @@ public CompletableFuture<PutTemplateResponse> putTemplate(PutTemplateRequest req
 	}
 
 	/**
-	 * Create or update an index template. Index templates define settings,
+	 * Create or update a legacy index template. Index templates define settings,
 	 * mappings, and aliases that can be applied automatically to new indices.
 	 * Elasticsearch applies templates to new indices based on an index pattern that
 	 * matches the index name.
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/ElasticsearchIndicesClient.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/ElasticsearchIndicesClient.java
index f95756c7b..6a239feb3 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/ElasticsearchIndicesClient.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/ElasticsearchIndicesClient.java
@@ -949,7 +949,9 @@ public final DeleteIndexTemplateResponse deleteIndexTemplate(
 	// ----- Endpoint: indices.delete_template
 
 	/**
-	 * Delete a legacy index template.
+	 * Delete a legacy index template. IMPORTANT: This documentation is about legacy
+	 * index templates, which are deprecated and will be replaced by the composable
+	 * templates introduced in Elasticsearch 7.8.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/indices-delete-template-v1.html">Documentation
@@ -965,7 +967,9 @@ public DeleteTemplateResponse deleteTemplate(DeleteTemplateRequest request)
 	}
 
 	/**
-	 * Delete a legacy index template.
+	 * Delete a legacy index template. IMPORTANT: This documentation is about legacy
+	 * index templates, which are deprecated and will be replaced by the composable
+	 * templates introduced in Elasticsearch 7.8.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -2132,7 +2136,8 @@ public GetIndicesSettingsResponse getSettings() throws IOException, Elasticsearc
 	// ----- Endpoint: indices.get_template
 
 	/**
-	 * Get index templates. Get information about one or more index templates.
+	 * Get legacy index templates. Get information about one or more index
+	 * templates.
 	 * <p>
 	 * IMPORTANT: This documentation is about legacy index templates, which are
 	 * deprecated and will be replaced by the composable templates introduced in
@@ -2151,7 +2156,8 @@ public GetTemplateResponse getTemplate(GetTemplateRequest request) throws IOExce
 	}
 
 	/**
-	 * Get index templates. Get information about one or more index templates.
+	 * Get legacy index templates. Get information about one or more index
+	 * templates.
 	 * <p>
 	 * IMPORTANT: This documentation is about legacy index templates, which are
 	 * deprecated and will be replaced by the composable templates introduced in
@@ -2172,7 +2178,8 @@ public final GetTemplateResponse getTemplate(
 	}
 
 	/**
-	 * Get index templates. Get information about one or more index templates.
+	 * Get legacy index templates. Get information about one or more index
+	 * templates.
 	 * <p>
 	 * IMPORTANT: This documentation is about legacy index templates, which are
 	 * deprecated and will be replaced by the composable templates introduced in
@@ -2889,7 +2896,7 @@ public PutIndicesSettingsResponse putSettings() throws IOException, Elasticsearc
 	// ----- Endpoint: indices.put_template
 
 	/**
-	 * Create or update an index template. Index templates define settings,
+	 * Create or update a legacy index template. Index templates define settings,
 	 * mappings, and aliases that can be applied automatically to new indices.
 	 * Elasticsearch applies templates to new indices based on an index pattern that
 	 * matches the index name.
@@ -2933,7 +2940,7 @@ public PutTemplateResponse putTemplate(PutTemplateRequest request) throws IOExce
 	}
 
 	/**
-	 * Create or update an index template. Index templates define settings,
+	 * Create or update a legacy index template. Index templates define settings,
 	 * mappings, and aliases that can be applied automatically to new indices.
 	 * Elasticsearch applies templates to new indices based on an index pattern that
 	 * matches the index name.
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/GetTemplateRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/GetTemplateRequest.java
index 14e709082..5268ad0b2 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/GetTemplateRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/GetTemplateRequest.java
@@ -59,7 +59,8 @@
 // typedef: indices.get_template.Request
 
 /**
- * Get index templates. Get information about one or more index templates.
+ * Get legacy index templates. Get information about one or more index
+ * templates.
  * <p>
  * IMPORTANT: This documentation is about legacy index templates, which are
  * deprecated and will be replaced by the composable templates introduced in
@@ -67,8 +68,9 @@
  * 
  * @see <a href="../doc-files/api-spec.html#indices.get_template.Request">API
  *      specification</a>
+ * @deprecated 7.8.0
  */
-
+@Deprecated
 public class GetTemplateRequest extends RequestBase {
 	@Nullable
 	private final Boolean flatSettings;
@@ -145,7 +147,7 @@ public final List<String> name() {
 	/**
 	 * Builder for {@link GetTemplateRequest}.
 	 */
-
+	@Deprecated
 	public static class Builder extends RequestBase.AbstractBuilder<Builder>
 			implements
 				ObjectBuilder<GetTemplateRequest> {
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/PutTemplateRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/PutTemplateRequest.java
index 0727f64a1..df57dca4e 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/PutTemplateRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/indices/PutTemplateRequest.java
@@ -63,7 +63,7 @@
 // typedef: indices.put_template.Request
 
 /**
- * Create or update an index template. Index templates define settings,
+ * Create or update a legacy index template. Index templates define settings,
  * mappings, and aliases that can be applied automatically to new indices.
  * Elasticsearch applies templates to new indices based on an index pattern that
  * matches the index name.
@@ -96,7 +96,9 @@
  * 
  * @see <a href="../doc-files/api-spec.html#indices.put_template.Request">API
  *      specification</a>
+ * @deprecated 7.8.0
  */
+@Deprecated
 @JsonpDeserializable
 public class PutTemplateRequest extends RequestBase implements JsonpSerializable {
 	private final Map<String, Alias> aliases;
@@ -312,7 +314,7 @@ protected void serializeInternal(JsonGenerator generator, JsonpMapper mapper) {
 	/**
 	 * Builder for {@link PutTemplateRequest}.
 	 */
-
+	@Deprecated
 	public static class Builder extends RequestBase.AbstractBuilder<Builder>
 			implements
 				ObjectBuilder<PutTemplateRequest> {
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/ChatCompletionUnifiedRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/ChatCompletionUnifiedRequest.java
index 4b2bb22e2..a1971f525 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/ChatCompletionUnifiedRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/ChatCompletionUnifiedRequest.java
@@ -62,6 +62,28 @@
 
 /**
  * Perform chat completion inference
+ * <p>
+ * The chat completion inference API enables real-time responses for chat
+ * completion tasks by delivering answers incrementally, reducing response times
+ * during computation. It only works with the <code>chat_completion</code> task
+ * type for <code>openai</code> and <code>elastic</code> inference services.
+ * <p>
+ * IMPORTANT: The inference APIs enable you to use certain services, such as
+ * built-in machine learning models (ELSER, E5), models uploaded through Eland,
+ * Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic,
+ * Watsonx.ai, or Hugging Face. For built-in models and models uploaded through
+ * Eland, the inference APIs offer an alternative way to use and manage trained
+ * models. However, if you do not plan to use the inference APIs to use these
+ * models or if you want to use non-NLP models, use the machine learning trained
+ * model APIs.
+ * <p>
+ * NOTE: The <code>chat_completion</code> task type is only available within the
+ * _stream API and only supports streaming. The Chat completion inference API
+ * and the Stream inference API differ in their response structure and
+ * capabilities. The Chat completion inference API provides more comprehensive
+ * customization options through more fields and function calling support. If
+ * you use the <code>openai</code> service or the <code>elastic</code> service,
+ * use the Chat completion inference API.
  * 
  * @see <a href=
  *      "../doc-files/api-spec.html#inference.chat_completion_unified.Request">API
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/ElasticsearchInferenceAsyncClient.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/ElasticsearchInferenceAsyncClient.java
index 194bca79d..e96df8193 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/ElasticsearchInferenceAsyncClient.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/ElasticsearchInferenceAsyncClient.java
@@ -72,6 +72,28 @@ public ElasticsearchInferenceAsyncClient withTransportOptions(@Nullable Transpor
 
 	/**
 	 * Perform chat completion inference
+	 * <p>
+	 * The chat completion inference API enables real-time responses for chat
+	 * completion tasks by delivering answers incrementally, reducing response times
+	 * during computation. It only works with the <code>chat_completion</code> task
+	 * type for <code>openai</code> and <code>elastic</code> inference services.
+	 * <p>
+	 * IMPORTANT: The inference APIs enable you to use certain services, such as
+	 * built-in machine learning models (ELSER, E5), models uploaded through Eland,
+	 * Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic,
+	 * Watsonx.ai, or Hugging Face. For built-in models and models uploaded through
+	 * Eland, the inference APIs offer an alternative way to use and manage trained
+	 * models. However, if you do not plan to use the inference APIs to use these
+	 * models or if you want to use non-NLP models, use the machine learning trained
+	 * model APIs.
+	 * <p>
+	 * NOTE: The <code>chat_completion</code> task type is only available within the
+	 * _stream API and only supports streaming. The Chat completion inference API
+	 * and the Stream inference API differ in their response structure and
+	 * capabilities. The Chat completion inference API provides more comprehensive
+	 * customization options through more fields and function calling support. If
+	 * you use the <code>openai</code> service or the <code>elastic</code> service,
+	 * use the Chat completion inference API.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/chat-completion-inference-api.html">Documentation
@@ -87,6 +109,28 @@ public CompletableFuture<BinaryResponse> chatCompletionUnified(ChatCompletionUni
 
 	/**
 	 * Perform chat completion inference
+	 * <p>
+	 * The chat completion inference API enables real-time responses for chat
+	 * completion tasks by delivering answers incrementally, reducing response times
+	 * during computation. It only works with the <code>chat_completion</code> task
+	 * type for <code>openai</code> and <code>elastic</code> inference services.
+	 * <p>
+	 * IMPORTANT: The inference APIs enable you to use certain services, such as
+	 * built-in machine learning models (ELSER, E5), models uploaded through Eland,
+	 * Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic,
+	 * Watsonx.ai, or Hugging Face. For built-in models and models uploaded through
+	 * Eland, the inference APIs offer an alternative way to use and manage trained
+	 * models. However, if you do not plan to use the inference APIs to use these
+	 * models or if you want to use non-NLP models, use the machine learning trained
+	 * model APIs.
+	 * <p>
+	 * NOTE: The <code>chat_completion</code> task type is only available within the
+	 * _stream API and only supports streaming. The Chat completion inference API
+	 * and the Stream inference API differ in their response structure and
+	 * capabilities. The Chat completion inference API provides more comprehensive
+	 * customization options through more fields and function calling support. If
+	 * you use the <code>openai</code> service or the <code>elastic</code> service,
+	 * use the Chat completion inference API.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -291,16 +335,7 @@ public final CompletableFuture<InferenceResponse> inference(
 	// ----- Endpoint: inference.put
 
 	/**
-	 * Create an inference endpoint. When you create an inference endpoint, the
-	 * associated machine learning model is automatically deployed if it is not
-	 * already running. After creating the endpoint, wait for the model deployment
-	 * to complete before using it. To verify the deployment status, use the get
-	 * trained model statistics API. Look for
-	 * <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response
-	 * and ensure that the <code>&quot;allocation_count&quot;</code> matches the
-	 * <code>&quot;target_allocation_count&quot;</code>. Avoid creating multiple
-	 * endpoints for the same model unless required, as each endpoint consumes
-	 * significant resources.
+	 * Create an inference endpoint.
 	 * <p>
 	 * IMPORTANT: The inference APIs enable you to use certain services, such as
 	 * built-in machine learning models (ELSER, E5), models uploaded through Eland,
@@ -324,16 +359,7 @@ public CompletableFuture<PutResponse> put(PutRequest request) {
 	}
 
 	/**
-	 * Create an inference endpoint. When you create an inference endpoint, the
-	 * associated machine learning model is automatically deployed if it is not
-	 * already running. After creating the endpoint, wait for the model deployment
-	 * to complete before using it. To verify the deployment status, use the get
-	 * trained model statistics API. Look for
-	 * <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response
-	 * and ensure that the <code>&quot;allocation_count&quot;</code> matches the
-	 * <code>&quot;target_allocation_count&quot;</code>. Avoid creating multiple
-	 * endpoints for the same model unless required, as each endpoint consumes
-	 * significant resources.
+	 * Create an inference endpoint.
 	 * <p>
 	 * IMPORTANT: The inference APIs enable you to use certain services, such as
 	 * built-in machine learning models (ELSER, E5), models uploaded through Eland,
@@ -363,16 +389,6 @@ public final CompletableFuture<PutResponse> put(Function<PutRequest.Builder, Obj
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>alibabacloud-ai-search</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-alibabacloud-ai-search.html">Documentation
@@ -391,16 +407,6 @@ public CompletableFuture<PutAlibabacloudResponse> putAlibabacloud(PutAlibabaclou
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>alibabacloud-ai-search</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -431,17 +437,7 @@ public final CompletableFuture<PutAlibabacloudResponse> putAlibabacloud(
 	 * updated keys.
 	 * </p>
 	 * </blockquote>
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
-	 * 
+	 *
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-amazon-bedrock.html">Documentation
 	 *      on elastic.co</a>
@@ -468,17 +464,7 @@ public CompletableFuture<PutAmazonbedrockResponse> putAmazonbedrock(PutAmazonbed
 	 * updated keys.
 	 * </p>
 	 * </blockquote>
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
-	 * 
+	 *
 	 * @param fn
 	 *            a function that initializes a builder to create the
 	 *            {@link PutAmazonbedrockRequest}
@@ -499,16 +485,6 @@ public final CompletableFuture<PutAmazonbedrockResponse> putAmazonbedrock(
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>anthropic</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-anthropic.html">Documentation
@@ -527,16 +503,6 @@ public CompletableFuture<PutAnthropicResponse> putAnthropic(PutAnthropicRequest
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>anthropic</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -558,16 +524,6 @@ public final CompletableFuture<PutAnthropicResponse> putAnthropic(
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>azureaistudio</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-ai-studio.html">Documentation
@@ -586,16 +542,6 @@ public CompletableFuture<PutAzureaistudioResponse> putAzureaistudio(PutAzureaist
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>azureaistudio</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -632,16 +578,6 @@ public final CompletableFuture<PutAzureaistudioResponse> putAzureaistudio(
 	 * be found in the <a href=
 	 * "https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure
 	 * models documentation</a>.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-openai.html">Documentation
@@ -675,16 +611,6 @@ public CompletableFuture<PutAzureopenaiResponse> putAzureopenai(PutAzureopenaiRe
 	 * be found in the <a href=
 	 * "https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure
 	 * models documentation</a>.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -706,16 +632,6 @@ public final CompletableFuture<PutAzureopenaiResponse> putAzureopenai(
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>cohere</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-cohere.html">Documentation
@@ -734,16 +650,6 @@ public CompletableFuture<PutCohereResponse> putCohere(PutCohereRequest request)
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>cohere</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -951,16 +857,6 @@ public final CompletableFuture<PutElserResponse> putElser(
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>googleaistudio</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-google-ai-studio.html">Documentation
@@ -979,16 +875,6 @@ public CompletableFuture<PutGoogleaistudioResponse> putGoogleaistudio(PutGooglea
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>googleaistudio</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -1010,16 +896,6 @@ public final CompletableFuture<PutGoogleaistudioResponse> putGoogleaistudio(
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>googlevertexai</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-google-vertex-ai.html">Documentation
@@ -1038,16 +914,6 @@ public CompletableFuture<PutGooglevertexaiResponse> putGooglevertexai(PutGooglev
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>googlevertexai</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -1087,17 +953,7 @@ public final CompletableFuture<PutGooglevertexaiResponse> putGooglevertexai(
 	 * <li><code>multilingual-e5-base</code></li>
 	 * <li><code>multilingual-e5-small</code></li>
 	 * </ul>
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
-	 * 
+	 *
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-hugging-face.html">Documentation
 	 *      on elastic.co</a>
@@ -1133,17 +989,7 @@ public CompletableFuture<PutHuggingFaceResponse> putHuggingFace(PutHuggingFaceRe
 	 * <li><code>multilingual-e5-base</code></li>
 	 * <li><code>multilingual-e5-small</code></li>
 	 * </ul>
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
-	 * 
+	 *
 	 * @param fn
 	 *            a function that initializes a builder to create the
 	 *            {@link PutHuggingFaceRequest}
@@ -1169,16 +1015,6 @@ public final CompletableFuture<PutHuggingFaceResponse> putHuggingFace(
 	 * <a href="https://jina.ai/reranker">https://jina.ai/reranker</a>. To review
 	 * the available <code>text_embedding</code> models, refer to the
 	 * <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-jinaai.html">Documentation
@@ -1202,16 +1038,6 @@ public CompletableFuture<PutJinaaiResponse> putJinaai(PutJinaaiRequest request)
 	 * <a href="https://jina.ai/reranker">https://jina.ai/reranker</a>. To review
 	 * the available <code>text_embedding</code> models, refer to the
 	 * <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -1233,16 +1059,6 @@ public final CompletableFuture<PutJinaaiResponse> putJinaai(
 	 * <p>
 	 * Creates an inference endpoint to perform an inference task with the
 	 * <code>mistral</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/{brnach}/infer-service-mistral.html">Documentation
@@ -1261,16 +1077,6 @@ public CompletableFuture<PutMistralResponse> putMistral(PutMistralRequest reques
 	 * <p>
 	 * Creates an inference endpoint to perform an inference task with the
 	 * <code>mistral</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -1292,16 +1098,6 @@ public final CompletableFuture<PutMistralResponse> putMistral(
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>openai</code> service or <code>openai</code> compatible APIs.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-openai.html">Documentation
@@ -1320,16 +1116,6 @@ public CompletableFuture<PutOpenaiResponse> putOpenai(PutOpenaiRequest request)
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>openai</code> service or <code>openai</code> compatible APIs.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -1399,16 +1185,6 @@ public final CompletableFuture<PutVoyageaiResponse> putVoyageai(
 	 * Elasticsearch deployment to use the <code>watsonxai</code> inference service.
 	 * You can provision one through the IBM catalog, the Cloud Databases CLI
 	 * plug-in, the Cloud Databases API, or Terraform.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-watsonx-ai.html">Documentation
@@ -1430,16 +1206,6 @@ public CompletableFuture<PutWatsonxResponse> putWatsonx(PutWatsonxRequest reques
 	 * Elasticsearch deployment to use the <code>watsonxai</code> inference service.
 	 * You can provision one through the IBM catalog, the Cloud Databases CLI
 	 * plug-in, the Cloud Databases API, or Terraform.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/ElasticsearchInferenceClient.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/ElasticsearchInferenceClient.java
index fb5601f33..906505bc3 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/ElasticsearchInferenceClient.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/ElasticsearchInferenceClient.java
@@ -70,6 +70,28 @@ public ElasticsearchInferenceClient withTransportOptions(@Nullable TransportOpti
 
 	/**
 	 * Perform chat completion inference
+	 * <p>
+	 * The chat completion inference API enables real-time responses for chat
+	 * completion tasks by delivering answers incrementally, reducing response times
+	 * during computation. It only works with the <code>chat_completion</code> task
+	 * type for <code>openai</code> and <code>elastic</code> inference services.
+	 * <p>
+	 * IMPORTANT: The inference APIs enable you to use certain services, such as
+	 * built-in machine learning models (ELSER, E5), models uploaded through Eland,
+	 * Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic,
+	 * Watsonx.ai, or Hugging Face. For built-in models and models uploaded through
+	 * Eland, the inference APIs offer an alternative way to use and manage trained
+	 * models. However, if you do not plan to use the inference APIs to use these
+	 * models or if you want to use non-NLP models, use the machine learning trained
+	 * model APIs.
+	 * <p>
+	 * NOTE: The <code>chat_completion</code> task type is only available within the
+	 * _stream API and only supports streaming. The Chat completion inference API
+	 * and the Stream inference API differ in their response structure and
+	 * capabilities. The Chat completion inference API provides more comprehensive
+	 * customization options through more fields and function calling support. If
+	 * you use the <code>openai</code> service or the <code>elastic</code> service,
+	 * use the Chat completion inference API.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/chat-completion-inference-api.html">Documentation
@@ -86,6 +108,28 @@ public BinaryResponse chatCompletionUnified(ChatCompletionUnifiedRequest request
 
 	/**
 	 * Perform chat completion inference
+	 * <p>
+	 * The chat completion inference API enables real-time responses for chat
+	 * completion tasks by delivering answers incrementally, reducing response times
+	 * during computation. It only works with the <code>chat_completion</code> task
+	 * type for <code>openai</code> and <code>elastic</code> inference services.
+	 * <p>
+	 * IMPORTANT: The inference APIs enable you to use certain services, such as
+	 * built-in machine learning models (ELSER, E5), models uploaded through Eland,
+	 * Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic,
+	 * Watsonx.ai, or Hugging Face. For built-in models and models uploaded through
+	 * Eland, the inference APIs offer an alternative way to use and manage trained
+	 * models. However, if you do not plan to use the inference APIs to use these
+	 * models or if you want to use non-NLP models, use the machine learning trained
+	 * model APIs.
+	 * <p>
+	 * NOTE: The <code>chat_completion</code> task type is only available within the
+	 * _stream API and only supports streaming. The Chat completion inference API
+	 * and the Stream inference API differ in their response structure and
+	 * capabilities. The Chat completion inference API provides more comprehensive
+	 * customization options through more fields and function calling support. If
+	 * you use the <code>openai</code> service or the <code>elastic</code> service,
+	 * use the Chat completion inference API.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -292,16 +336,7 @@ public final InferenceResponse inference(Function<InferenceRequest.Builder, Obje
 	// ----- Endpoint: inference.put
 
 	/**
-	 * Create an inference endpoint. When you create an inference endpoint, the
-	 * associated machine learning model is automatically deployed if it is not
-	 * already running. After creating the endpoint, wait for the model deployment
-	 * to complete before using it. To verify the deployment status, use the get
-	 * trained model statistics API. Look for
-	 * <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response
-	 * and ensure that the <code>&quot;allocation_count&quot;</code> matches the
-	 * <code>&quot;target_allocation_count&quot;</code>. Avoid creating multiple
-	 * endpoints for the same model unless required, as each endpoint consumes
-	 * significant resources.
+	 * Create an inference endpoint.
 	 * <p>
 	 * IMPORTANT: The inference APIs enable you to use certain services, such as
 	 * built-in machine learning models (ELSER, E5), models uploaded through Eland,
@@ -325,16 +360,7 @@ public PutResponse put(PutRequest request) throws IOException, ElasticsearchExce
 	}
 
 	/**
-	 * Create an inference endpoint. When you create an inference endpoint, the
-	 * associated machine learning model is automatically deployed if it is not
-	 * already running. After creating the endpoint, wait for the model deployment
-	 * to complete before using it. To verify the deployment status, use the get
-	 * trained model statistics API. Look for
-	 * <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response
-	 * and ensure that the <code>&quot;allocation_count&quot;</code> matches the
-	 * <code>&quot;target_allocation_count&quot;</code>. Avoid creating multiple
-	 * endpoints for the same model unless required, as each endpoint consumes
-	 * significant resources.
+	 * Create an inference endpoint.
 	 * <p>
 	 * IMPORTANT: The inference APIs enable you to use certain services, such as
 	 * built-in machine learning models (ELSER, E5), models uploaded through Eland,
@@ -365,16 +391,6 @@ public final PutResponse put(Function<PutRequest.Builder, ObjectBuilder<PutReque
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>alibabacloud-ai-search</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-alibabacloud-ai-search.html">Documentation
@@ -394,16 +410,6 @@ public PutAlibabacloudResponse putAlibabacloud(PutAlibabacloudRequest request)
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>alibabacloud-ai-search</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -435,17 +441,7 @@ public final PutAlibabacloudResponse putAlibabacloud(
 	 * updated keys.
 	 * </p>
 	 * </blockquote>
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
-	 * 
+	 *
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-amazon-bedrock.html">Documentation
 	 *      on elastic.co</a>
@@ -473,17 +469,7 @@ public PutAmazonbedrockResponse putAmazonbedrock(PutAmazonbedrockRequest request
 	 * updated keys.
 	 * </p>
 	 * </blockquote>
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
-	 * 
+	 *
 	 * @param fn
 	 *            a function that initializes a builder to create the
 	 *            {@link PutAmazonbedrockRequest}
@@ -505,16 +491,6 @@ public final PutAmazonbedrockResponse putAmazonbedrock(
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>anthropic</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-anthropic.html">Documentation
@@ -533,16 +509,6 @@ public PutAnthropicResponse putAnthropic(PutAnthropicRequest request) throws IOE
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>anthropic</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -565,16 +531,6 @@ public final PutAnthropicResponse putAnthropic(
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>azureaistudio</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-ai-studio.html">Documentation
@@ -594,16 +550,6 @@ public PutAzureaistudioResponse putAzureaistudio(PutAzureaistudioRequest request
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>azureaistudio</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -641,16 +587,6 @@ public final PutAzureaistudioResponse putAzureaistudio(
 	 * be found in the <a href=
 	 * "https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure
 	 * models documentation</a>.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-openai.html">Documentation
@@ -685,16 +621,6 @@ public PutAzureopenaiResponse putAzureopenai(PutAzureopenaiRequest request)
 	 * be found in the <a href=
 	 * "https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure
 	 * models documentation</a>.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -717,16 +643,6 @@ public final PutAzureopenaiResponse putAzureopenai(
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>cohere</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-cohere.html">Documentation
@@ -745,16 +661,6 @@ public PutCohereResponse putCohere(PutCohereRequest request) throws IOException,
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>cohere</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -964,16 +870,6 @@ public final PutElserResponse putElser(Function<PutElserRequest.Builder, ObjectB
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>googleaistudio</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-google-ai-studio.html">Documentation
@@ -993,16 +889,6 @@ public PutGoogleaistudioResponse putGoogleaistudio(PutGoogleaistudioRequest requ
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>googleaistudio</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -1025,16 +911,6 @@ public final PutGoogleaistudioResponse putGoogleaistudio(
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>googlevertexai</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-google-vertex-ai.html">Documentation
@@ -1054,16 +930,6 @@ public PutGooglevertexaiResponse putGooglevertexai(PutGooglevertexaiRequest requ
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>googlevertexai</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -1104,17 +970,7 @@ public final PutGooglevertexaiResponse putGooglevertexai(
 	 * <li><code>multilingual-e5-base</code></li>
 	 * <li><code>multilingual-e5-small</code></li>
 	 * </ul>
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
-	 * 
+	 *
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-hugging-face.html">Documentation
 	 *      on elastic.co</a>
@@ -1151,17 +1007,7 @@ public PutHuggingFaceResponse putHuggingFace(PutHuggingFaceRequest request)
 	 * <li><code>multilingual-e5-base</code></li>
 	 * <li><code>multilingual-e5-small</code></li>
 	 * </ul>
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
-	 * 
+	 *
 	 * @param fn
 	 *            a function that initializes a builder to create the
 	 *            {@link PutHuggingFaceRequest}
@@ -1188,16 +1034,6 @@ public final PutHuggingFaceResponse putHuggingFace(
 	 * <a href="https://jina.ai/reranker">https://jina.ai/reranker</a>. To review
 	 * the available <code>text_embedding</code> models, refer to the
 	 * <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-jinaai.html">Documentation
@@ -1221,16 +1057,6 @@ public PutJinaaiResponse putJinaai(PutJinaaiRequest request) throws IOException,
 	 * <a href="https://jina.ai/reranker">https://jina.ai/reranker</a>. To review
 	 * the available <code>text_embedding</code> models, refer to the
 	 * <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -1252,16 +1078,6 @@ public final PutJinaaiResponse putJinaai(Function<PutJinaaiRequest.Builder, Obje
 	 * <p>
 	 * Creates an inference endpoint to perform an inference task with the
 	 * <code>mistral</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/{brnach}/infer-service-mistral.html">Documentation
@@ -1280,16 +1096,6 @@ public PutMistralResponse putMistral(PutMistralRequest request) throws IOExcepti
 	 * <p>
 	 * Creates an inference endpoint to perform an inference task with the
 	 * <code>mistral</code> service.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -1311,16 +1117,6 @@ public final PutMistralResponse putMistral(Function<PutMistralRequest.Builder, O
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>openai</code> service or <code>openai</code> compatible APIs.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-openai.html">Documentation
@@ -1339,16 +1135,6 @@ public PutOpenaiResponse putOpenai(PutOpenaiRequest request) throws IOException,
 	 * <p>
 	 * Create an inference endpoint to perform an inference task with the
 	 * <code>openai</code> service or <code>openai</code> compatible APIs.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
@@ -1419,16 +1205,6 @@ public final PutVoyageaiResponse putVoyageai(
 	 * Elasticsearch deployment to use the <code>watsonxai</code> inference service.
 	 * You can provision one through the IBM catalog, the Cloud Databases CLI
 	 * plug-in, the Cloud Databases API, or Terraform.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @see <a href=
 	 *      "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-watsonx-ai.html">Documentation
@@ -1450,16 +1226,6 @@ public PutWatsonxResponse putWatsonx(PutWatsonxRequest request) throws IOExcepti
 	 * Elasticsearch deployment to use the <code>watsonxai</code> inference service.
 	 * You can provision one through the IBM catalog, the Cloud Databases CLI
 	 * plug-in, the Cloud Databases API, or Terraform.
-	 * <p>
-	 * When you create an inference endpoint, the associated machine learning model
-	 * is automatically deployed if it is not already running. After creating the
-	 * endpoint, wait for the model deployment to complete before using it. To
-	 * verify the deployment status, use the get trained model statistics API. Look
-	 * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
-	 * response and ensure that the <code>&quot;allocation_count&quot;</code>
-	 * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
-	 * multiple endpoints for the same model unless required, as each endpoint
-	 * consumes significant resources.
 	 * 
 	 * @param fn
 	 *            a function that initializes a builder to create the
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/InferenceEndpointInfoJinaAi.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/InferenceEndpointInfoJinaAi.java
new file mode 100644
index 000000000..da46627ab
--- /dev/null
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/InferenceEndpointInfoJinaAi.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package co.elastic.clients.elasticsearch.inference;
+
+import co.elastic.clients.json.JsonpDeserializable;
+import co.elastic.clients.json.JsonpDeserializer;
+import co.elastic.clients.json.JsonpMapper;
+import co.elastic.clients.json.ObjectBuilderDeserializer;
+import co.elastic.clients.json.ObjectDeserializer;
+import co.elastic.clients.util.ApiTypeHelper;
+import co.elastic.clients.util.ObjectBuilder;
+import co.elastic.clients.util.WithJsonObjectBuilderBase;
+import jakarta.json.stream.JsonGenerator;
+import java.lang.String;
+import java.util.Objects;
+import javax.annotation.Nullable;
+
+//----------------------------------------------------------------
+//       THIS CODE IS GENERATED. MANUAL EDITS WILL BE LOST.
+//----------------------------------------------------------------
+//
+// This code is generated from the Elasticsearch API specification
+// at https://github.com/elastic/elasticsearch-specification
+//
+// Manual updates to this file will be lost when the code is
+// re-generated.
+//
+// If you find a property that is missing or wrongly typed, please
+// open an issue or a PR on the API specification repository.
+//
+//----------------------------------------------------------------
+
+// typedef: inference._types.InferenceEndpointInfoJinaAi
+
+/**
+ *
+ * @see <a href=
+ *      "../doc-files/api-spec.html#inference._types.InferenceEndpointInfoJinaAi">API
+ *      specification</a>
+ */
+
+public abstract class InferenceEndpointInfoJinaAi extends InferenceEndpoint {
+	private final String inferenceId;
+
+	private final TaskTypeJinaAi taskType;
+
+	// ---------------------------------------------------------------------------------------------
+
+	protected InferenceEndpointInfoJinaAi(AbstractBuilder<?> builder) {
+		super(builder);
+
+		this.inferenceId = ApiTypeHelper.requireNonNull(builder.inferenceId, this, "inferenceId");
+		this.taskType = ApiTypeHelper.requireNonNull(builder.taskType, this, "taskType");
+
+	}
+
+	/**
+	 * Required - The inference Id
+	 * <p>
+	 * API name: {@code inference_id}
+	 */
+	public final String inferenceId() {
+		return this.inferenceId;
+	}
+
+	/**
+	 * Required - The task type
+	 * <p>
+	 * API name: {@code task_type}
+	 */
+	public final TaskTypeJinaAi taskType() {
+		return this.taskType;
+	}
+
+	protected void serializeInternal(JsonGenerator generator, JsonpMapper mapper) {
+
+		super.serializeInternal(generator, mapper);
+		generator.writeKey("inference_id");
+		generator.write(this.inferenceId);
+
+		generator.writeKey("task_type");
+		this.taskType.serialize(generator, mapper);
+
+	}
+
+	public abstract static class AbstractBuilder<BuilderT extends AbstractBuilder<BuilderT>>
+			extends
+				InferenceEndpoint.AbstractBuilder<BuilderT> {
+		private String inferenceId;
+
+		private TaskTypeJinaAi taskType;
+
+		/**
+		 * Required - The inference Id
+		 * <p>
+		 * API name: {@code inference_id}
+		 */
+		public final BuilderT inferenceId(String value) {
+			this.inferenceId = value;
+			return self();
+		}
+
+		/**
+		 * Required - The task type
+		 * <p>
+		 * API name: {@code task_type}
+		 */
+		public final BuilderT taskType(TaskTypeJinaAi value) {
+			this.taskType = value;
+			return self();
+		}
+
+	}
+
+	// ---------------------------------------------------------------------------------------------
+	protected static <BuilderT extends AbstractBuilder<BuilderT>> void setupInferenceEndpointInfoJinaAiDeserializer(
+			ObjectDeserializer<BuilderT> op) {
+		InferenceEndpoint.setupInferenceEndpointDeserializer(op);
+		op.add(AbstractBuilder::inferenceId, JsonpDeserializer.stringDeserializer(), "inference_id");
+		op.add(AbstractBuilder::taskType, TaskTypeJinaAi._DESERIALIZER, "task_type");
+
+	}
+
+}
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAlibabacloudRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAlibabacloudRequest.java
index c0954788d..55378b971 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAlibabacloudRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAlibabacloudRequest.java
@@ -62,16 +62,6 @@
  * <p>
  * Create an inference endpoint to perform an inference task with the
  * <code>alibabacloud-ai-search</code> service.
- * <p>
- * When you create an inference endpoint, the associated machine learning model
- * is automatically deployed if it is not already running. After creating the
- * endpoint, wait for the model deployment to complete before using it. To
- * verify the deployment status, use the get trained model statistics API. Look
- * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
- * response and ensure that the <code>&quot;allocation_count&quot;</code>
- * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
- * multiple endpoints for the same model unless required, as each endpoint
- * consumes significant resources.
  * 
  * @see <a href=
  *      "../doc-files/api-spec.html#inference.put_alibabacloud.Request">API
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAmazonbedrockRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAmazonbedrockRequest.java
index 55b01e4bf..7afaf21bf 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAmazonbedrockRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAmazonbedrockRequest.java
@@ -71,17 +71,7 @@
  * updated keys.
  * </p>
  * </blockquote>
- * <p>
- * When you create an inference endpoint, the associated machine learning model
- * is automatically deployed if it is not already running. After creating the
- * endpoint, wait for the model deployment to complete before using it. To
- * verify the deployment status, use the get trained model statistics API. Look
- * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
- * response and ensure that the <code>&quot;allocation_count&quot;</code>
- * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
- * multiple endpoints for the same model unless required, as each endpoint
- * consumes significant resources.
- * 
+ *
  * @see <a href=
  *      "../doc-files/api-spec.html#inference.put_amazonbedrock.Request">API
  *      specification</a>
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAnthropicRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAnthropicRequest.java
index 2d1507c5e..afb447140 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAnthropicRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAnthropicRequest.java
@@ -62,16 +62,6 @@
  * <p>
  * Create an inference endpoint to perform an inference task with the
  * <code>anthropic</code> service.
- * <p>
- * When you create an inference endpoint, the associated machine learning model
- * is automatically deployed if it is not already running. After creating the
- * endpoint, wait for the model deployment to complete before using it. To
- * verify the deployment status, use the get trained model statistics API. Look
- * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
- * response and ensure that the <code>&quot;allocation_count&quot;</code>
- * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
- * multiple endpoints for the same model unless required, as each endpoint
- * consumes significant resources.
  * 
  * @see <a href="../doc-files/api-spec.html#inference.put_anthropic.Request">API
  *      specification</a>
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAzureaistudioRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAzureaistudioRequest.java
index 3a0d1ef54..87415cc8c 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAzureaistudioRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAzureaistudioRequest.java
@@ -62,16 +62,6 @@
  * <p>
  * Create an inference endpoint to perform an inference task with the
  * <code>azureaistudio</code> service.
- * <p>
- * When you create an inference endpoint, the associated machine learning model
- * is automatically deployed if it is not already running. After creating the
- * endpoint, wait for the model deployment to complete before using it. To
- * verify the deployment status, use the get trained model statistics API. Look
- * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
- * response and ensure that the <code>&quot;allocation_count&quot;</code>
- * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
- * multiple endpoints for the same model unless required, as each endpoint
- * consumes significant resources.
  * 
  * @see <a href=
  *      "../doc-files/api-spec.html#inference.put_azureaistudio.Request">API
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAzureopenaiRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAzureopenaiRequest.java
index 9b14d99ec..37aa2292c 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAzureopenaiRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutAzureopenaiRequest.java
@@ -77,16 +77,6 @@
  * be found in the <a href=
  * "https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure
  * models documentation</a>.
- * <p>
- * When you create an inference endpoint, the associated machine learning model
- * is automatically deployed if it is not already running. After creating the
- * endpoint, wait for the model deployment to complete before using it. To
- * verify the deployment status, use the get trained model statistics API. Look
- * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
- * response and ensure that the <code>&quot;allocation_count&quot;</code>
- * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
- * multiple endpoints for the same model unless required, as each endpoint
- * consumes significant resources.
  * 
  * @see <a href=
  *      "../doc-files/api-spec.html#inference.put_azureopenai.Request">API
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutCohereRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutCohereRequest.java
index 26de9cff1..3da2f26f5 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutCohereRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutCohereRequest.java
@@ -62,16 +62,6 @@
  * <p>
  * Create an inference endpoint to perform an inference task with the
  * <code>cohere</code> service.
- * <p>
- * When you create an inference endpoint, the associated machine learning model
- * is automatically deployed if it is not already running. After creating the
- * endpoint, wait for the model deployment to complete before using it. To
- * verify the deployment status, use the get trained model statistics API. Look
- * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
- * response and ensure that the <code>&quot;allocation_count&quot;</code>
- * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
- * multiple endpoints for the same model unless required, as each endpoint
- * consumes significant resources.
  * 
  * @see <a href="../doc-files/api-spec.html#inference.put_cohere.Request">API
  *      specification</a>
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutGoogleaistudioRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutGoogleaistudioRequest.java
index fb2949825..32b9db6cf 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutGoogleaistudioRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutGoogleaistudioRequest.java
@@ -62,16 +62,6 @@
  * <p>
  * Create an inference endpoint to perform an inference task with the
  * <code>googleaistudio</code> service.
- * <p>
- * When you create an inference endpoint, the associated machine learning model
- * is automatically deployed if it is not already running. After creating the
- * endpoint, wait for the model deployment to complete before using it. To
- * verify the deployment status, use the get trained model statistics API. Look
- * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
- * response and ensure that the <code>&quot;allocation_count&quot;</code>
- * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
- * multiple endpoints for the same model unless required, as each endpoint
- * consumes significant resources.
  * 
  * @see <a href=
  *      "../doc-files/api-spec.html#inference.put_googleaistudio.Request">API
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutGooglevertexaiRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutGooglevertexaiRequest.java
index c491288d4..5f9082e54 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutGooglevertexaiRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutGooglevertexaiRequest.java
@@ -62,16 +62,6 @@
  * <p>
  * Create an inference endpoint to perform an inference task with the
  * <code>googlevertexai</code> service.
- * <p>
- * When you create an inference endpoint, the associated machine learning model
- * is automatically deployed if it is not already running. After creating the
- * endpoint, wait for the model deployment to complete before using it. To
- * verify the deployment status, use the get trained model statistics API. Look
- * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
- * response and ensure that the <code>&quot;allocation_count&quot;</code>
- * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
- * multiple endpoints for the same model unless required, as each endpoint
- * consumes significant resources.
  * 
  * @see <a href=
  *      "../doc-files/api-spec.html#inference.put_googlevertexai.Request">API
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutHuggingFaceRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutHuggingFaceRequest.java
index 3b081c97f..1d494fa22 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutHuggingFaceRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutHuggingFaceRequest.java
@@ -80,17 +80,7 @@
  * <li><code>multilingual-e5-base</code></li>
  * <li><code>multilingual-e5-small</code></li>
  * </ul>
- * <p>
- * When you create an inference endpoint, the associated machine learning model
- * is automatically deployed if it is not already running. After creating the
- * endpoint, wait for the model deployment to complete before using it. To
- * verify the deployment status, use the get trained model statistics API. Look
- * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
- * response and ensure that the <code>&quot;allocation_count&quot;</code>
- * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
- * multiple endpoints for the same model unless required, as each endpoint
- * consumes significant resources.
- * 
+ *
  * @see <a href=
  *      "../doc-files/api-spec.html#inference.put_hugging_face.Request">API
  *      specification</a>
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutJinaaiRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutJinaaiRequest.java
index ab9b47ccb..924d75223 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutJinaaiRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutJinaaiRequest.java
@@ -67,16 +67,6 @@
  * <a href="https://jina.ai/reranker">https://jina.ai/reranker</a>. To review
  * the available <code>text_embedding</code> models, refer to the
  * <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.
- * <p>
- * When you create an inference endpoint, the associated machine learning model
- * is automatically deployed if it is not already running. After creating the
- * endpoint, wait for the model deployment to complete before using it. To
- * verify the deployment status, use the get trained model statistics API. Look
- * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
- * response and ensure that the <code>&quot;allocation_count&quot;</code>
- * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
- * multiple endpoints for the same model unless required, as each endpoint
- * consumes significant resources.
  * 
  * @see <a href="../doc-files/api-spec.html#inference.put_jinaai.Request">API
  *      specification</a>
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutJinaaiResponse.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutJinaaiResponse.java
index 6d40a39ab..44aacfdbd 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutJinaaiResponse.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutJinaaiResponse.java
@@ -51,7 +51,7 @@
  *      specification</a>
  */
 @JsonpDeserializable
-public class PutJinaaiResponse extends InferenceEndpointInfo {
+public class PutJinaaiResponse extends InferenceEndpointInfoJinaAi {
 	// ---------------------------------------------------------------------------------------------
 
 	private PutJinaaiResponse(Builder builder) {
@@ -69,7 +69,7 @@ public static PutJinaaiResponse of(Function<Builder, ObjectBuilder<PutJinaaiResp
 	 * Builder for {@link PutJinaaiResponse}.
 	 */
 
-	public static class Builder extends InferenceEndpointInfo.AbstractBuilder<Builder>
+	public static class Builder extends InferenceEndpointInfoJinaAi.AbstractBuilder<Builder>
 			implements
 				ObjectBuilder<PutJinaaiResponse> {
 		@Override
@@ -99,7 +99,7 @@ public PutJinaaiResponse build() {
 			.lazy(Builder::new, PutJinaaiResponse::setupPutJinaaiResponseDeserializer);
 
 	protected static void setupPutJinaaiResponseDeserializer(ObjectDeserializer<PutJinaaiResponse.Builder> op) {
-		InferenceEndpointInfo.setupInferenceEndpointInfoDeserializer(op);
+		InferenceEndpointInfoJinaAi.setupInferenceEndpointInfoJinaAiDeserializer(op);
 
 	}
 
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutMistralRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutMistralRequest.java
index cde473677..cfc6451ee 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutMistralRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutMistralRequest.java
@@ -62,16 +62,6 @@
  * <p>
  * Creates an inference endpoint to perform an inference task with the
  * <code>mistral</code> service.
- * <p>
- * When you create an inference endpoint, the associated machine learning model
- * is automatically deployed if it is not already running. After creating the
- * endpoint, wait for the model deployment to complete before using it. To
- * verify the deployment status, use the get trained model statistics API. Look
- * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
- * response and ensure that the <code>&quot;allocation_count&quot;</code>
- * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
- * multiple endpoints for the same model unless required, as each endpoint
- * consumes significant resources.
  * 
  * @see <a href="../doc-files/api-spec.html#inference.put_mistral.Request">API
  *      specification</a>
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutOpenaiRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutOpenaiRequest.java
index 2b10fe33f..bbc89dd1a 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutOpenaiRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutOpenaiRequest.java
@@ -62,16 +62,6 @@
  * <p>
  * Create an inference endpoint to perform an inference task with the
  * <code>openai</code> service or <code>openai</code> compatible APIs.
- * <p>
- * When you create an inference endpoint, the associated machine learning model
- * is automatically deployed if it is not already running. After creating the
- * endpoint, wait for the model deployment to complete before using it. To
- * verify the deployment status, use the get trained model statistics API. Look
- * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
- * response and ensure that the <code>&quot;allocation_count&quot;</code>
- * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
- * multiple endpoints for the same model unless required, as each endpoint
- * consumes significant resources.
  * 
  * @see <a href="../doc-files/api-spec.html#inference.put_openai.Request">API
  *      specification</a>
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutRequest.java
index 6f734f226..1336a1320 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutRequest.java
@@ -59,16 +59,7 @@
 // typedef: inference.put.Request
 
 /**
- * Create an inference endpoint. When you create an inference endpoint, the
- * associated machine learning model is automatically deployed if it is not
- * already running. After creating the endpoint, wait for the model deployment
- * to complete before using it. To verify the deployment status, use the get
- * trained model statistics API. Look for
- * <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response
- * and ensure that the <code>&quot;allocation_count&quot;</code> matches the
- * <code>&quot;target_allocation_count&quot;</code>. Avoid creating multiple
- * endpoints for the same model unless required, as each endpoint consumes
- * significant resources.
+ * Create an inference endpoint.
  * <p>
  * IMPORTANT: The inference APIs enable you to use certain services, such as
  * built-in machine learning models (ELSER, E5), models uploaded through Eland,
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutWatsonxRequest.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutWatsonxRequest.java
index 2529929f7..dad5c0e86 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutWatsonxRequest.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/PutWatsonxRequest.java
@@ -65,16 +65,6 @@
  * Elasticsearch deployment to use the <code>watsonxai</code> inference service.
  * You can provision one through the IBM catalog, the Cloud Databases CLI
  * plug-in, the Cloud Databases API, or Terraform.
- * <p>
- * When you create an inference endpoint, the associated machine learning model
- * is automatically deployed if it is not already running. After creating the
- * endpoint, wait for the model deployment to complete before using it. To
- * verify the deployment status, use the get trained model statistics API. Look
- * for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the
- * response and ensure that the <code>&quot;allocation_count&quot;</code>
- * matches the <code>&quot;target_allocation_count&quot;</code>. Avoid creating
- * multiple endpoints for the same model unless required, as each endpoint
- * consumes significant resources.
  * 
  * @see <a href="../doc-files/api-spec.html#inference.put_watsonx.Request">API
  *      specification</a>
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/RequestChatCompletion.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/RequestChatCompletion.java
index ed267fc8e..99628e8c0 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/RequestChatCompletion.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/RequestChatCompletion.java
@@ -104,7 +104,12 @@ public static RequestChatCompletion of(Function<Builder, ObjectBuilder<RequestCh
 	}
 
 	/**
-	 * Required - A list of objects representing the conversation.
+	 * Required - A list of objects representing the conversation. Requests should
+	 * generally only add new messages from the user (role <code>user</code>). The
+	 * other message roles (<code>assistant</code>, <code>system</code>, or
+	 * <code>tool</code>) should generally only be copied from the response to a
+	 * previous completion request, such that the messages array is built up
+	 * throughout a conversation.
 	 * <p>
 	 * API name: {@code messages}
 	 */
@@ -289,7 +294,12 @@ public static class Builder extends WithJsonObjectBuilderBase<Builder>
 		private Float topP;
 
 		/**
-		 * Required - A list of objects representing the conversation.
+		 * Required - A list of objects representing the conversation. Requests should
+		 * generally only add new messages from the user (role <code>user</code>). The
+		 * other message roles (<code>assistant</code>, <code>system</code>, or
+		 * <code>tool</code>) should generally only be copied from the response to a
+		 * previous completion request, such that the messages array is built up
+		 * throughout a conversation.
 		 * <p>
 		 * API name: {@code messages}
 		 * <p>
@@ -301,7 +311,12 @@ public final Builder messages(List<Message> list) {
 		}
 
 		/**
-		 * Required - A list of objects representing the conversation.
+		 * Required - A list of objects representing the conversation. Requests should
+		 * generally only add new messages from the user (role <code>user</code>). The
+		 * other message roles (<code>assistant</code>, <code>system</code>, or
+		 * <code>tool</code>) should generally only be copied from the response to a
+		 * previous completion request, such that the messages array is built up
+		 * throughout a conversation.
 		 * <p>
 		 * API name: {@code messages}
 		 * <p>
@@ -313,7 +328,12 @@ public final Builder messages(Message value, Message... values) {
 		}
 
 		/**
-		 * Required - A list of objects representing the conversation.
+		 * Required - A list of objects representing the conversation. Requests should
+		 * generally only add new messages from the user (role <code>user</code>). The
+		 * other message roles (<code>assistant</code>, <code>system</code>, or
+		 * <code>tool</code>) should generally only be copied from the response to a
+		 * previous completion request, such that the messages array is built up
+		 * throughout a conversation.
 		 * <p>
 		 * API name: {@code messages}
 		 * <p>
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/TaskTypeJinaAi.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/TaskTypeJinaAi.java
new file mode 100644
index 000000000..21aa5602f
--- /dev/null
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/inference/TaskTypeJinaAi.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package co.elastic.clients.elasticsearch.inference;
+
+import co.elastic.clients.json.JsonEnum;
+import co.elastic.clients.json.JsonpDeserializable;
+import co.elastic.clients.json.JsonpDeserializer;
+
+//----------------------------------------------------------------
+//       THIS CODE IS GENERATED. MANUAL EDITS WILL BE LOST.
+//----------------------------------------------------------------
+//
+// This code is generated from the Elasticsearch API specification
+// at https://github.com/elastic/elasticsearch-specification
+//
+// Manual updates to this file will be lost when the code is
+// re-generated.
+//
+// If you find a property that is missing or wrongly typed, please
+// open an issue or a PR on the API specification repository.
+//
+//----------------------------------------------------------------
+
+/**
+ *
+ * @see <a href="../doc-files/api-spec.html#inference._types.TaskTypeJinaAi">API
+ *      specification</a>
+ */
+@JsonpDeserializable
+public enum TaskTypeJinaAi implements JsonEnum {
+	TextEmbedding("text_embedding"),
+
+	Rerank("rerank"),
+
+	;
+
+	private final String jsonValue;
+
+	TaskTypeJinaAi(String jsonValue) {
+		this.jsonValue = jsonValue;
+	}
+
+	public String jsonValue() {
+		return this.jsonValue;
+	}
+
+	public static final JsonEnum.Deserializer<TaskTypeJinaAi> _DESERIALIZER = new JsonEnum.Deserializer<>(
+			TaskTypeJinaAi.values());
+}
diff --git a/java-client/src/main/java/co/elastic/clients/elasticsearch/snapshot/SnapshotShardFailure.java b/java-client/src/main/java/co/elastic/clients/elasticsearch/snapshot/SnapshotShardFailure.java
index 31817895f..e409acfb0 100644
--- a/java-client/src/main/java/co/elastic/clients/elasticsearch/snapshot/SnapshotShardFailure.java
+++ b/java-client/src/main/java/co/elastic/clients/elasticsearch/snapshot/SnapshotShardFailure.java
@@ -30,6 +30,7 @@
 import co.elastic.clients.util.ObjectBuilder;
 import co.elastic.clients.util.WithJsonObjectBuilderBase;
 import jakarta.json.stream.JsonGenerator;
+import java.lang.Integer;
 import java.lang.String;
 import java.util.Objects;
 import java.util.function.Function;
@@ -67,7 +68,7 @@ public class SnapshotShardFailure implements JsonpSerializable {
 
 	private final String reason;
 
-	private final String shardId;
+	private final int shardId;
 
 	private final String indexUuid;
 
@@ -80,7 +81,7 @@ private SnapshotShardFailure(Builder builder) {
 		this.index = ApiTypeHelper.requireNonNull(builder.index, this, "index");
 		this.nodeId = builder.nodeId;
 		this.reason = ApiTypeHelper.requireNonNull(builder.reason, this, "reason");
-		this.shardId = ApiTypeHelper.requireNonNull(builder.shardId, this, "shardId");
+		this.shardId = ApiTypeHelper.requireNonNull(builder.shardId, this, "shardId", 0);
 		this.indexUuid = ApiTypeHelper.requireNonNull(builder.indexUuid, this, "indexUuid");
 		this.status = ApiTypeHelper.requireNonNull(builder.status, this, "status");
 
@@ -115,7 +116,7 @@ public final String reason() {
 	/**
 	 * Required - API name: {@code shard_id}
 	 */
-	public final String shardId() {
+	public final int shardId() {
 		return this.shardId;
 	}
 
@@ -187,7 +188,7 @@ public static class Builder extends WithJsonObjectBuilderBase<Builder>
 
 		private String reason;
 
-		private String shardId;
+		private Integer shardId;
 
 		private String indexUuid;
 
@@ -220,7 +221,7 @@ public final Builder reason(String value) {
 		/**
 		 * Required - API name: {@code shard_id}
 		 */
-		public final Builder shardId(String value) {
+		public final Builder shardId(int value) {
 			this.shardId = value;
 			return this;
 		}
@@ -272,7 +273,7 @@ protected static void setupSnapshotShardFailureDeserializer(ObjectDeserializer<S
 		op.add(Builder::index, JsonpDeserializer.stringDeserializer(), "index");
 		op.add(Builder::nodeId, JsonpDeserializer.stringDeserializer(), "node_id");
 		op.add(Builder::reason, JsonpDeserializer.stringDeserializer(), "reason");
-		op.add(Builder::shardId, JsonpDeserializer.stringDeserializer(), "shard_id");
+		op.add(Builder::shardId, JsonpDeserializer.integerDeserializer(), "shard_id");
 		op.add(Builder::indexUuid, JsonpDeserializer.stringDeserializer(), "index_uuid");
 		op.add(Builder::status, JsonpDeserializer.stringDeserializer(), "status");