MySQL 9.4.0
Source Code Documentation
bulk_data_service.h
Go to the documentation of this file.
1/* Copyright (c) 2022, 2025, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is designed to work with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have either included with
13 the program or referenced in the documentation.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24/**
25 @file
26 Services for bulk data conversion and load to SE.
27*/
28
29#pragma once
30
31#include <assert.h>
33#include <stddef.h>
34#include <cstring>
35#include <functional>
36#include <iomanip>
37#include <iostream>
38#include <limits>
39#include <memory>
40#include <sstream>
41#include <string>
42#include <vector>
43#include "field_types.h"
45
46class THD;
47struct TABLE;
48struct CHARSET_INFO;
49using Blob_context = void *;
50
51/** The blob reference size. Refer to lob::ref_t::SIZE or FIELD_REF_SIZE. */
52constexpr size_t BLOB_REF_SIZE = 20;
53
55 std::string filename;
56 size_t row_number;
57 std::string column_name;
58 std::string column_type;
59 std::string column_input_data;
60 std::string m_error_mesg{};
61 std::string m_table_name{};
62 size_t m_bytes;
64
65 std::ostream &print(std::ostream &out) const;
66};
67
69 std::ostream &out) const {
70 out << "[Bulk_load_error_location_details: filename=" << filename
71 << ", column_name=" << column_name << "]";
72 return out;
73}
74
75/** Overloading the global output operator to print objects of type
76Bulk_load_error_location_details.
77@param[in] out output stream
78@param[in] obj object to be printed
79@return given output stream. */
80inline std::ostream &operator<<(std::ostream &out,
82 return obj.print(out);
83}
84
86 /** Column data. */
87 const char *m_data_ptr{};
88
89 /** Column data length. */
90 size_t m_data_len{};
91
92 /** Check if it is DB_ROW_ID column based on the value it contains.
93 @return true if it is DB_ROW_ID column, false otherwise */
94 bool is_row_id() const { return m_row_id != UINT64_MAX; }
95
96 /** The generated DB_ROW_ID value */
97 uint64_t m_row_id{UINT64_MAX};
98
99 /** Mark the column to be null, by setting length to a special value. This is
100 only used for columns whose state is maintained across chunks
101 (aka fragmented columns). */
102 void set_null() {
103 assert(m_data_ptr == nullptr);
105 }
106
107 /** Check if the column is null, by checking special value for length.
108 @return true if the column is null, false otherwise. */
109 bool is_null() const {
111 m_data_ptr == nullptr);
113 }
114
115 /** Check if the column data is stored externally. If the data is stored
116 externally, then the data length (m_data_len) would be equal to the
117 BLOB_REF_SIZE and the column data (m_data_ptr) will contain the lob
118 reference.
119 @return true if data is stored externally, false otherwise. */
120 bool is_ext() const {
121 assert(!m_is_ext || m_data_len == BLOB_REF_SIZE);
122 return m_is_ext;
123 }
124
125 /** Check if the column data is stored externally. It is called relaxed,
126 because the column length might not be equal to BLOB_REF_SIZE. Only to
127 be used while the blob is being processed by the CSV parser.
128 @return true if data is stored externally, false otherwise. */
129 bool is_ext_relaxed() const {
130 assert(!m_is_ext || m_data_len >= BLOB_REF_SIZE);
131 return m_is_ext;
132 }
133
134 /** Mark that the column data has been stored externally. */
135 void set_ext() {
136 assert(m_data_len == BLOB_REF_SIZE);
137 m_is_ext = true;
138 }
139
140 /** Initialize the members */
141 void init() {
142 m_data_ptr = nullptr;
143 m_data_len = 0;
144 m_is_ext = false;
145 m_row_id = UINT64_MAX;
146 }
147
148 /** Print this object into the given output stream.
149 @param[in] out output stream into which this object will be printed.
150 @return given output stream */
151 std::ostream &print(std::ostream &out) const;
152
153 std::string to_string() const;
154
155 private:
156 /** If true, the column data is stored externally. */
157 bool m_is_ext{false};
158};
159
160inline std::string Column_text::to_string() const {
162 sout << "[Column_text: len=" << m_data_len;
163 sout << ", val=";
164
165 if (m_data_ptr == nullptr) {
166 sout << "nullptr";
167 } else {
168 for (size_t i = 0; i < m_data_len; ++i) {
169 const char c = m_data_ptr[i];
170 if (isalnum(c)) {
171 sout << c;
172 } else {
173 sout << ".";
174 }
175 }
176 sout << "[hex=";
177 for (size_t i = 0; i < m_data_len; ++i) {
178 sout << std::setfill('0') << std::setw(2) << std::hex
179 << (int)*(&m_data_ptr[i]);
180 }
181 }
182 sout << "]";
183 return sout.str();
184}
185
186inline std::ostream &Column_text::print(std::ostream &out) const {
187 out << "[Column_text: this=" << static_cast<const void *>(this)
188 << ", m_data_ptr=" << static_cast<const void *>(m_data_ptr)
189 << ", m_data_len=" << m_data_len << ", m_is_ext=" << m_is_ext << "]";
190 return out;
191}
192
193/** Overloading the global output operator to print objects of type
194Column_text.
195@param[in] out output stream
196@param[in] obj object to be printed
197@return given output stream. */
198inline std::ostream &operator<<(std::ostream &out, const Column_text &obj) {
199 return obj.print(out);
200}
201
202struct Row_meta;
203
205 /** Column Data Type */
206 int16_t m_type{};
207
208 /** Column data length. */
209 uint16_t m_data_len{};
210
211 /** If column is NULL. */
212 bool m_is_null{false};
213
214 char *get_data() const { return m_is_null ? nullptr : m_data_ptr; }
215
216 void set_data(char *ptr) { m_data_ptr = ptr; }
217
218 /** Save the beginning of the row pointer in this object. This should be
219 called only when the column is null.
220 @param[in] row_begin pointer to beginning of row.*/
221 void row(char *row_begin) {
222 assert(m_is_null);
223 m_data_len = 0;
224 m_data_ptr = row_begin;
225 }
226
227 /** Get the pointer to the beginning of row. This is valid only if the
228 column is null. This should be called on the first column of the row. There
229 is no need to call this on other columns.
230 @param[in] row_meta meta data information about the row
231 @param[in] col_index Index of the first column which is 0.
232 @return pointer to row beginning. */
233 char *get_row_begin(const Row_meta &row_meta,
234 size_t col_index [[maybe_unused]]) const;
235
236 /** Column data in integer format. Used only for specific datatype. */
237 uint64_t m_int_data;
238
239 void init() {
240 m_type = 0;
241 m_data_len = 0;
242 m_is_null = false;
243 m_data_ptr = nullptr;
244 m_int_data = 0;
245 }
246
247 std::string to_string() const;
248
249 private:
250 /** Column data or row begin. There is a need to fetch the beginning of
251 the row from the vector of Column_mysql. But in the case of secondary
252 indexes, all the keys could be null and it becomes impossible to obtain
253 the pointer to beginning of the row. To solve this problem, I am re-using
254 this pointer to hold the row begin when the column is null. So it becomes
255 important to make use of m_is_null to check if the column is null. It is NOT
256 correct to check this pointer against nullptr to confirm if column is null.*/
257 char *m_data_ptr{nullptr};
258};
259
260inline std::string Column_mysql::to_string() const {
262 sout << "[Column_mysql: len=" << m_data_len;
263 sout << ", val=";
264
265 switch (m_type) {
266 case MYSQL_TYPE_LONG: {
267 sout << m_int_data;
268 } break;
269 default: {
270 for (size_t i = 0; i < m_data_len; ++i) {
271 const char c = m_data_ptr[i];
272 if (isalnum(c)) {
273 sout << c;
274 } else {
275 sout << ".";
276 }
277 }
278
279 } break;
280 }
281 if (m_type != MYSQL_TYPE_LONG) {
282 sout << "[hex=";
283 for (size_t i = 0; i < m_data_len; ++i) {
284 sout << std::setfill('0') << std::setw(2) << std::hex
285 << (int)*(&m_data_ptr[i]);
286 }
287 sout << "]";
288 }
289 return sout.str();
290}
291
292/** Implements the row and column memory management for parse and load
293operations. We try to pre-allocate the memory contiguously as much as we can
294to maximize the performance.
295
296@tparam Column_type Column_text when used in the CSV context, Column_sql when
297used in the InnoDB context.
298*/
299template <typename Column_type>
301 public:
302 /** Create a new row bunch.
303 @param[in] n_cols number of columns */
304 Row_bunch(size_t n_cols) : m_num_columns(n_cols) {}
305
306 /** @return return number of rows in the bunch. */
307 size_t get_num_rows() const { return m_num_rows; }
308
309 /** @return return number of columns in each row. */
310 size_t get_num_cols() const { return m_num_columns; }
311
312 /** Process all columns, invoking callback for each.
313 @param[in] row_index index of the row
314 @param[in] cbk callback function
315 @return true if successful */
316 template <typename F>
317 bool process_columns(size_t row_index, F &&cbk) {
318 assert(row_index < m_num_rows);
319
320 auto row_offset = row_index * m_num_columns;
321 return process_columns_by_offset(row_offset, std::move(cbk));
322 }
323
324 template <typename F>
325 bool process_columns_by_offset(size_t row_offset, F &&cbk) {
326 assert(row_offset + m_num_columns <= m_columns.size());
327
328 for (size_t index = 0; index < m_num_columns; ++index) {
329 bool last_col = (index == m_num_columns - 1);
330 if (!cbk(m_columns[row_offset + index], last_col)) {
331 return false;
332 }
333 }
334 return true;
335 }
336
337 void reset() {
338 for (auto &col : m_columns) {
339 col.init();
340 }
341 }
342
343 /** Get current row offset to access columns.
344 @param[in] row_index row index
345 @return row offset in column vector. */
346 size_t get_row_offset(size_t row_index) const {
347 assert(row_index < m_num_rows);
348 return row_index * m_num_columns;
349 }
350
351 /** Get next row offset from current row offset.
352 @param[in,out] offset row offset
353 @return true if there is a next row. */
354 size_t get_next_row_offset(size_t &offset) const {
355 offset += m_num_columns;
356 return (offset < m_columns.size());
357 }
358
359 /** Get column using row offset and column index.
360 @param[in] row_offset row offset in column vector
361 @param[in] col_index index of the column within row
362 @return column data */
363 Column_type &get_column(size_t row_offset, size_t col_index) {
364 assert(col_index < m_num_columns);
365 assert(row_offset + col_index < m_columns.size());
366 return m_columns[row_offset + col_index];
367 }
368
369 /** Get column using row index and column index.
370 @param[in] row_index index of the row in the bunch
371 @param[in] col_index index of the column within row
372 @return column data */
373 Column_type &get_col(size_t row_index, size_t col_index) {
374 return get_column(get_row_offset(row_index), col_index);
375 }
376
377 /** Get column using the column offset.
378 @param[in] col_offset column offset
379 @return column data */
380 Column_type &get_col(size_t col_offset) { return m_columns[col_offset]; }
381
382 /** Get constant column for reading using row offset and column index.
383 @param[in] row_offset row offset in column vector
384 @param[in] col_index index of the column within row
385 @return column data */
386 const Column_type &read_column(size_t row_offset, size_t col_index) const {
387 assert(col_index < m_num_columns);
388 assert(row_offset + col_index < m_columns.size());
389 return m_columns[row_offset + col_index];
390 }
391
392 /** Set the number of rows. Adjust number of rows base on maximum column
393 storage limit.
394 @param[in,out] n_rows number of rows
395 @return true if successful, false if too many rows or columns. */
396 bool set_num_rows(size_t n_rows) {
397 /* Avoid any overflow during multiplication. */
398 if (n_rows > std::numeric_limits<uint32_t>::max() ||
400 return false;
401 }
402 auto total_cols = (uint64_t)n_rows * m_num_columns;
403
404 if (total_cols > S_MAX_TOTAL_COLS) {
405 return false;
406 }
407
408 m_num_rows = n_rows;
409
410 /* Extend columns if needed. */
411 if (m_columns.size() < total_cols) {
412 m_columns.resize(total_cols);
413 }
414 return true;
415 }
416
417 /** Limit allocation up to 600M columns. This number is rounded up from an
418 * estimate of the number of columns with the max chunk size (1024M). In the
419 * worst case we can have 2 bytes per column so a chunk can contain around
420 * 512M columns, and because of rows that spill over chunk boundaries we
421 * assume we can append a full additional row (which should have at most
422 * 4096 columns). Rounded up to 600M. */
423 const static size_t S_MAX_TOTAL_COLS = 600 * 1024 * 1024;
424
425 private:
426 /** All the columns. */
427 std::vector<Column_type> m_columns;
428
429 /** Number of rows. */
430 size_t m_num_rows{};
431
432 /** Number of columns in each row. */
434};
435
438
439/** Column metadata information. */
441 /** Data comparison method. */
442 enum class Compare {
443 /* Integer comparison */
445 /* Unsigned Integer comparison */
447 /* Binary comparison (memcmp) */
448 BINARY,
449 /* Need to callback to use appropriate comparison function in server. */
450 MYSQL
451 };
452
453 std::string get_compare_string() const {
454 switch (m_compare) {
456 return "INTEGER_SIGNED";
458 return "INTEGER_UNSIGNED";
459 case Compare::BINARY:
460 return "BINARY";
461 case Compare::MYSQL:
462 return "MYSQL";
463 }
464 assert(0);
465 return "INVALID";
466 }
467
468 /** @return true if integer type. */
469 bool is_integer() const {
472 }
473
474 /** Based on the column data type check if it can be stored externally.
475 @return true if the column data can be stored externally
476 @return false if the column data cannot be stored externally */
477 bool can_be_stored_externally() const;
478
479 /** true if this column is part of secondary index. */
481
482 /** Field type. (@ref enum_field_types) */
484
485 /** If column could be NULL. */
487
488 /** true if column belongs to primary index (key or non-key) */
489 bool m_is_pk{false};
490
491 /** true if column is a key for primary or secondary index. */
493
494 /** If the key is descending. */
496
497 /** If the key is prefix of the column. */
499
500 /** If it is fixed length type. */
502
503 /** If it is integer type. */
505
506 /** If it is unsigned integer type. */
508
509 /** Check the row header to find out if it is fixed length. For
510 character data type the row header indicates fixed length. */
512
513 /** If character column length can be kept in one byte. */
515
516 /** The length of column data if fixed. */
517 uint16_t m_fixed_len;
518
519 /** Maximum length of data in bytes. */
520 uint16_t m_max_len;
521
522 /** Index of column in row. */
523 uint16_t m_index;
524
525 /** Position of column in table. Refer to Field::field_index() */
527
528 /** Byte index in NULL bitmap. */
529 uint16_t m_null_byte;
530
531 /** BIT number in NULL bitmap. */
532 uint16_t m_null_bit;
533
534 /** Character set for char & varchar columns. */
535 const void *m_charset;
536
537 /** Field name */
538 std::string m_field_name;
539
540 /** Get a string representation of Column_meta object. Useful only for
541 debugging purposes.
542 @see Column_meta
543 @return string representation of this object. */
544 std::string to_string() const;
545
546 /** Print this object into the given output stream.
547 @param[in] out output stream into which object will be printed
548 @return given output stream. */
549 std::ostream &print(std::ostream &out) const;
550
551 /** Get the data type of the column as a string.
552 @return data type of the column as a string. */
553 std::string get_type_string() const;
554};
555
556inline std::string Column_meta::get_type_string() const {
557 switch (m_type) {
559 return "decimal";
560 case MYSQL_TYPE_TINY:
561 return "tiny";
562 case MYSQL_TYPE_SHORT:
563 return "short";
564 case MYSQL_TYPE_LONG:
565 return "long";
566 case MYSQL_TYPE_FLOAT:
567 return "float";
569 return "double";
570 case MYSQL_TYPE_NULL:
571 return "null";
573 return "timestamp";
575 return "longlong";
576 case MYSQL_TYPE_INT24:
577 return "int";
578 case MYSQL_TYPE_DATE:
579 return "date";
580 case MYSQL_TYPE_TIME:
581 return "time";
583 return "datetime";
584 case MYSQL_TYPE_YEAR:
585 return "year";
587 return "date";
589 return "varchar";
590 case MYSQL_TYPE_BIT:
591 return "bit";
593 return "timestamp";
595 return "datetime";
596 case MYSQL_TYPE_TIME2:
597 return "time";
599 return "typed_array";
601 return "vector";
603 return "invalid";
604 case MYSQL_TYPE_BOOL:
605 return "bool";
606 case MYSQL_TYPE_JSON:
607 return "json";
609 return "decimal";
610 case MYSQL_TYPE_ENUM:
611 return "enum";
612 case MYSQL_TYPE_SET:
613 return "set";
615 return "tiny_blob";
617 return "medium_blob";
619 return "long_blob";
620 case MYSQL_TYPE_BLOB:
621 return "blob";
623 return "var_string";
625 return "string";
627 return "geometry";
628 }
629 return "invalid";
630}
631
633 switch (m_type) {
634 case MYSQL_TYPE_JSON:
639 case MYSQL_TYPE_BLOB:
642 return true;
643 }
644 default:
645 break;
646 }
647 return false;
648}
649
650inline std::string Column_meta::to_string() const {
652 out << "[Column_meta: m_type=" << get_type_string()
653 << ", m_field_name=" << m_field_name << ", m_index=" << m_index
654 << ", m_field_index=" << m_field_index
655 << ", m_is_single_byte_len=" << m_is_single_byte_len
656 << ", m_is_fixed_len=" << m_is_fixed_len
657 << ", m_fixed_len=" << m_fixed_len << ", m_null_byte=" << m_null_byte
658 << ", m_null_bit=" << m_null_bit << ", m_compare=" << get_compare_string()
659 << ", m_is_desc_key=" << m_is_desc_key << "]";
660 return out.str();
661}
662
663inline std::ostream &Column_meta::print(std::ostream &out) const {
664 out << to_string();
665 return out;
666}
667
668/** Overloading the global output operator to print objects of type
669Column_meta.
670@param[in] out output stream
671@param[in] obj object to be printed
672@return given output stream. */
673inline std::ostream &operator<<(std::ostream &out, const Column_meta &obj) {
674 return obj.print(out);
675}
676
677/** Table metadata. */
679 /** Number of keys/indexes the table has. */
680 size_t m_n_keys;
681
682 /** Key number of the primary key. */
684
685 /** True if generated DB_ROW_ID is the pk. */
686 bool dbrowid_is_pk{false};
687
688 /** Table being bulk loaded. */
689 std::string m_table_name;
690};
691
692/** Row metadata */
693struct Row_meta {
694 /** Key type for fast comparison. */
695 enum class Key_type {
696 /* All Keys are signed integer an ascending. */
698 /* All keys are integer. */
699 INT,
700 /* Keys are of any supported type. */
701 ANY
702 };
703 /** All columns in a row are arranged with key columns first. */
704 std::vector<Column_meta> m_columns;
705
706 /** All columns in a row arranged as per col_index. */
707 std::vector<const Column_meta *> m_columns_text_order;
708
709 /** Get a string representation of this Row_meta object.
710 @see Row_meta
711 @return string representation of this object. */
712 std::string to_string() const;
713
714 /** Get the metadata of the given column.
715 @param[in] col_index position of the column in the index.
716 @return metadata of the requested column. */
717 const Column_meta &get_column_meta_index_order(size_t col_index) const {
718 assert(col_index < m_columns.size());
719 return m_columns[col_index];
720 }
721
722 /** Get the meta data of the column.
723 @param[in] col_index the index of the column as it appears in CSV file.
724 @return a reference to the column meta data.*/
725 const Column_meta &get_column_meta(size_t col_index) const {
726 assert(col_index < m_columns_text_order.size());
727 assert(col_index == m_columns_text_order[col_index]->m_index);
728 return *m_columns_text_order[col_index];
729 }
730
731 /** Total bitmap header length for the row. */
732 size_t m_bitmap_length = 0;
733
734 /** Total header length. */
735 size_t m_header_length = 0;
736
737 /** Length of the first key column. Helps to get the row pointer from first
738 key data pointer. */
739 size_t m_first_key_len = 0;
740
741 /** Key length in bytes for non-integer keys. This is required to estimate
742 the space required to save keys. */
743 size_t m_key_length = 0;
744
745 /** Number of columns used in primary key. */
746 uint32_t m_keys = 0;
747
748 /** Number of columns not used in primary Key. */
749 uint32_t m_non_keys = 0;
750
751 /** Key type for comparison. */
753
754 /** Total number of columns. A key could be on a column prefix.
755 m_columns <= m_keys + m_non_keys */
756 uint32_t m_num_columns = 0;
757
758 /** Approximate row length. */
760
761 /** Number of columns that can be stored externally. */
762 size_t m_n_blob_cols{0};
763
764 /** Name of the key */
765 std::string m_name;
766
767 /** true if primary key, false if secondary key. */
768 bool is_pk;
769
770 /** true if DB_ROW_ID is the pk, false otherwise. */
771 bool dbrowid_is_pk{false};
772};
773
774inline std::ostream &operator<<(std::ostream &os,
776 switch (key_type) {
778 os << "ANY";
779 break;
781 os << "INT_SIGNED_ASC";
782 break;
784 os << "INT";
785 break;
786 }
787 return os;
788}
789
790inline std::string Row_meta::to_string() const {
792 out << "[Row_meta: m_name=" << m_name << ", m_num_columns=" << m_num_columns
793 << ", m_keys=" << m_keys << ", m_non_keys=" << m_non_keys
794 << ", m_key_length=" << m_key_length << ", m_key_type=" << m_key_type
795 << ", m_approx_row_len=" << m_approx_row_len;
796 for (auto &col_meta : m_columns) {
797 out << col_meta.to_string() << ", ";
798 }
799 out << "]";
800 return out.str();
801}
802
803inline char *Column_mysql::get_row_begin(const Row_meta &row_meta,
804 size_t col_index
805 [[maybe_unused]]) const {
806 assert(m_is_null || col_index == 0);
807 return m_is_null ? m_data_ptr
808 : (m_data_ptr - row_meta.m_first_key_len -
809 row_meta.m_header_length);
810}
811
812namespace Bulk_load {
813
816 public:
817 void KeyTooBig() const override;
818 void ValueTooBig() const override;
819 void TooDeep() const override;
820 void InvalidJson() const override;
821 void InternalError(const char *message) const override;
822 bool CheckStack() const override;
823
824 const char *c_str() const { return m_error.c_str(); }
825
826 std::string get_error() const { return m_error; }
827
828 private:
829 mutable std::string m_error{};
830};
831
833 m_error = "Key is too big";
834}
835
837 m_error = "Value is too big";
838}
839
841 m_error = "JSON document has more nesting levels than supported";
842}
844 m_error = "Invalid JSON value is encountered";
845}
847 const char *message [[maybe_unused]]) const {
848 m_error = message;
849 m_error += " (Internal Error)";
850}
851
853 return false;
854}
855
856/** Callbacks for collecting time statistics */
858 /* Operation begin. */
859 std::function<void()> m_fn_begin;
860 /* Operation end. */
861 std::function<void()> m_fn_end;
862};
863
864} // namespace Bulk_load
865
866/** Bulk Data conversion. */
867BEGIN_SERVICE_DEFINITION(bulk_data_convert)
868/** Convert row from text format for MySQL column format. Convert as many
869rows as possible consuming the data buffer starting form next_index. On
870output next_index is the next row index that is not yet consumed. If it
871matches the size of input text_rows, then all rows are consumed.
872@param[in,out] thd session THD
873@param[in] table MySQL TABLE
874@param[in] text_rows rows with column in text
875@param[in,out] next_index next_index in text_rows to be processed
876@param[in,out] buffer data buffer for keeping sql row data
877@param[in,out] buffer_length length of the data buffer
878@param[in] charset input row data character set
879@param[in] metadata row metadata
880@param[out] sql_rows rows with column in MySQL column format
881@return error code. */
883 (THD * thd, const TABLE *table, const Rows_text &text_rows,
884 size_t &next_index, char *buffer, size_t &buffer_length,
885 const CHARSET_INFO *charset, const Row_meta &metadata,
886 Rows_mysql &sql_rows,
888
889/** Convert row to MySQL column format from raw form
890@param[in,out] buffer input raw data buffer
891@param[in] buffer_length buffer length
892@param[in] metadata row metadata
893@param[in] start_index start row index in row bunch
894@param[out] consumed_length length of buffer consumed
895@param[in,out] sql_rows row bunch to fill data
896@return error code. */
898 (char *buffer, size_t buffer_length, const Row_meta &metadata,
899 size_t start_index, size_t &consumed_length,
900 Rows_mysql &sql_rows));
901
902/** Convert row to MySQL column format using the key
903@param[in] metadata row metadata
904@param[in] sql_keys Key bunch
905@param[in] key_offset offset for the key
906@param[in,out] sql_rows row bunch to fill data
907@param[in] sql_index index of the row to be filled
908@return error code. */
910 (const Row_meta &metadata, const Rows_mysql &sql_keys,
911 size_t key_offset, Rows_mysql &sql_rows, size_t sql_index));
912
913/** Check if session is interrupted.
914@param[in,out] thd session THD
915@return true if connection or statement is killed. */
917
918/** Compare two key columns
919@param[in] key1 first key
920@param[in] key2 second key
921@param[in] col_meta column meta information
922@return positive, 0, negative, if key_1 is greater, equal, less than key_2 */
924 (const Column_mysql &key1, const Column_mysql &key2,
925 const Column_meta &col_meta));
926
927/** Get row metadata information for all the indexes.
928@param[in,out] thd session THD
929@param[in] table MySQL TABLE
930@param[in] have_key include Primary Key metadata
931@param[out] metadata Metadata for each of the indexes.
932@return true if successful. */
934 (THD * thd, const TABLE *table, bool have_key,
935 std::vector<Row_meta> &metadata));
936
937/** Get table metadata information for the table being bulk loaded.
938@param[in,out] thd session THD
939@param[in] table MySQL TABLE
940@param[out] metadata Metadata of the table.
941@return true if successful. */
943 (THD * thd, const TABLE *table, Table_meta &metadata));
944
945END_SERVICE_DEFINITION(bulk_data_convert)
946
947/** Column metadata information. */
948/* Bulk data load to SE. */
950/** Begin Loading bulk data to SE.
951@param[in,out] thd session THD
952@param[in] table MySQL TABLE
953@param[in] keynr key number, identifying the index being loaded.
954@param[in] data_size total data size to load
955@param[in] memory SE memory to be used
956@param[in] num_threads Number of concurrent threads
957@return SE bulk load context or nullptr in case of an error. */
958DECLARE_METHOD(void *, begin,
959 (THD * thd, const TABLE *table, size_t keynr, size_t data_size,
960 size_t memory, size_t num_threads));
961
962/** Load a set of rows to SE table by one thread.
963@param[in,out] thd session THD
964@param[in,out] ctx SE load context returned by begin()
965@param[in] table MySQL TABLE
966@param[in] sql_rows row data to load
967@param[in] thread current thread number
968@param[in] wait_cbks wait stat callbacks
969@return true if successful. */
970DECLARE_METHOD(bool, load,
971 (THD * thd, void *ctx, const TABLE *table,
972 const Rows_mysql &sql_rows, size_t thread,
973 Bulk_load::Stat_callbacks &wait_cbks));
974
975/** Create a blob context object to insert a blob.
976@param[in,out] thd session THD
977@param[in,out] load_ctx SE load context returned by begin()
978@param[in] table MySQL TABLE
979@param[out] blob_ctx a blob context object to insert a blob.
980@param[out] blobref buffer to hold blob reference
981@param[in] thread current thread number
982@return true if successful. */
984 (THD * thd, void *load_ctx, const TABLE *table,
985 Blob_context &blob_ctx, unsigned char *blobref, size_t thread));
986
987/** Write data into a blob
988@param[in,out] thd session THD
989@param[in,out] load_ctx SE load context returned by begin()
990@param[in] table MySQL TABLE
991@param[in] blob_ctx a blob context object to insert a blob.
992@param[out] blobref buffer to hold blob reference
993@param[in] thread current thread number
994@param[in] data blob data to be written
995@param[in] data_len length of blob data to be written (in bytes);
996@return true if successful. */
998 (THD * thd, void *load_ctx, const TABLE *table,
999 Blob_context blob_ctx, unsigned char *blobref, size_t thread,
1000 const unsigned char *data, size_t data_len));
1001
1002/** Close the blob
1003@param[in,out] thd session THD
1004@param[in,out] load_ctx SE load context returned by begin()
1005@param[in] table MySQL TABLE
1006@param[in] blob_ctx a blob context object to insert a blob.
1007@param[out] blobref buffer to hold blob reference
1008@param[in] thread current thread number
1009@return true if successful. */
1011 (THD * thd, void *load_ctx, const TABLE *table,
1012 Blob_context blob_ctx, unsigned char *blobref, size_t thread));
1013
1014/** End Loading bulk data to SE.
1015
1016Called at the end of bulk load execution, even if begin or load calls failed.
1017
1018@param[in,out] thd session THD
1019@param[in,out] ctx SE load context
1020@param[in] table MySQL TABLE
1021@param[in] error true, if exiting after error
1022@return true if successful. */
1023DECLARE_METHOD(bool, end,
1024 (THD * thd, void *ctx, const TABLE *table, bool error));
1025
1026/** Check if a table is supported by the bulk load implementation.
1027@param[in,out] thd session THD
1028@param[in] table MySQL TABLE
1029@return true if table is supported. */
1031
1032/** Get available buffer pool memory for bulk load operations.
1033@param[in,out] thd session THD
1034@param[in] table MySQL TABLE
1035@return buffer pool memory available for bulk load. */
1037
Kerberos Client Authentication nullptr
Definition: auth_kerberos_client_plugin.cc:247
constexpr size_t BLOB_REF_SIZE
The blob reference size.
Definition: bulk_data_service.h:52
void * Blob_context
Definition: bulk_data_service.h:49
std::ostream & operator<<(std::ostream &out, const Bulk_load_error_location_details &obj)
Overloading the global output operator to print objects of type Bulk_load_error_location_details.
Definition: bulk_data_service.h:80
Definition: bulk_data_service.h:815
const char * c_str() const
Definition: bulk_data_service.h:824
void KeyTooBig() const override
Called when a JSON object contains a member with a name that is longer than supported by the JSON bin...
Definition: bulk_data_service.h:832
std::string get_error() const
Definition: bulk_data_service.h:826
std::string m_error
Definition: bulk_data_service.h:829
void InternalError(const char *message) const override
Called when an internal error occurs.
Definition: bulk_data_service.h:846
void ValueTooBig() const override
Called when a JSON document is too big to be stored in the JSON binary format.
Definition: bulk_data_service.h:836
void TooDeep() const override
Called when a JSON document has more nesting levels than supported.
Definition: bulk_data_service.h:840
void InvalidJson() const override
Called when an invalid JSON value is encountered.
Definition: bulk_data_service.h:843
bool CheckStack() const override
Check if the stack is about to be exhausted, and report the error.
Definition: bulk_data_service.h:852
Error handler for the functions that serialize a JSON value in the JSON binary storage format.
Definition: json_error_handler.h:49
Implements the row and column memory management for parse and load operations.
Definition: bulk_data_service.h:300
bool set_num_rows(size_t n_rows)
Set the number of rows.
Definition: bulk_data_service.h:396
std::vector< Column_type > m_columns
All the columns.
Definition: bulk_data_service.h:427
size_t get_next_row_offset(size_t &offset) const
Get next row offset from current row offset.
Definition: bulk_data_service.h:354
Column_type & get_col(size_t col_offset)
Get column using the column offset.
Definition: bulk_data_service.h:380
bool process_columns(size_t row_index, F &&cbk)
Process all columns, invoking callback for each.
Definition: bulk_data_service.h:317
bool process_columns_by_offset(size_t row_offset, F &&cbk)
Definition: bulk_data_service.h:325
size_t get_num_cols() const
Definition: bulk_data_service.h:310
void reset()
Definition: bulk_data_service.h:337
size_t m_num_rows
Number of rows.
Definition: bulk_data_service.h:430
size_t get_row_offset(size_t row_index) const
Get current row offset to access columns.
Definition: bulk_data_service.h:346
size_t get_num_rows() const
Definition: bulk_data_service.h:307
const Column_type & read_column(size_t row_offset, size_t col_index) const
Get constant column for reading using row offset and column index.
Definition: bulk_data_service.h:386
Column_type & get_col(size_t row_index, size_t col_index)
Get column using row index and column index.
Definition: bulk_data_service.h:373
Row_bunch(size_t n_cols)
Create a new row bunch.
Definition: bulk_data_service.h:304
static const size_t S_MAX_TOTAL_COLS
Limit allocation up to 600M columns.
Definition: bulk_data_service.h:423
size_t m_num_columns
Number of columns in each row.
Definition: bulk_data_service.h:433
Column_type & get_column(size_t row_offset, size_t col_index)
Get column using row offset and column index.
Definition: bulk_data_service.h:363
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
This file contains the field type.
enum_field_types
Column types for MySQL Note: Keep include/mysql/components/services/bits/stored_program_bits....
Definition: field_types.h:55
@ MYSQL_TYPE_BOOL
Currently just a placeholder.
Definition: field_types.h:79
@ MYSQL_TYPE_TIME2
Internal to MySQL.
Definition: field_types.h:75
@ MYSQL_TYPE_VARCHAR
Definition: field_types.h:71
@ MYSQL_TYPE_LONGLONG
Definition: field_types.h:64
@ MYSQL_TYPE_LONG_BLOB
Definition: field_types.h:86
@ MYSQL_TYPE_VAR_STRING
Definition: field_types.h:88
@ MYSQL_TYPE_BLOB
Definition: field_types.h:87
@ MYSQL_TYPE_TINY
Definition: field_types.h:57
@ MYSQL_TYPE_TIME
Definition: field_types.h:67
@ MYSQL_TYPE_SET
Definition: field_types.h:83
@ MYSQL_TYPE_NEWDATE
Internal to MySQL.
Definition: field_types.h:70
@ MYSQL_TYPE_VECTOR
Definition: field_types.h:77
@ MYSQL_TYPE_JSON
Definition: field_types.h:80
@ MYSQL_TYPE_STRING
Definition: field_types.h:89
@ MYSQL_TYPE_NULL
Definition: field_types.h:62
@ MYSQL_TYPE_ENUM
Definition: field_types.h:82
@ MYSQL_TYPE_TINY_BLOB
Definition: field_types.h:84
@ MYSQL_TYPE_LONG
Definition: field_types.h:59
@ MYSQL_TYPE_BIT
Definition: field_types.h:72
@ MYSQL_TYPE_INVALID
Definition: field_types.h:78
@ MYSQL_TYPE_GEOMETRY
Definition: field_types.h:90
@ MYSQL_TYPE_NEWDECIMAL
Definition: field_types.h:81
@ MYSQL_TYPE_DECIMAL
Definition: field_types.h:56
@ MYSQL_TYPE_TYPED_ARRAY
Used for replication only.
Definition: field_types.h:76
@ MYSQL_TYPE_DOUBLE
Definition: field_types.h:61
@ MYSQL_TYPE_MEDIUM_BLOB
Definition: field_types.h:85
@ MYSQL_TYPE_DATETIME2
Internal to MySQL.
Definition: field_types.h:74
@ MYSQL_TYPE_SHORT
Definition: field_types.h:58
@ MYSQL_TYPE_DATE
Definition: field_types.h:66
@ MYSQL_TYPE_FLOAT
Definition: field_types.h:60
@ MYSQL_TYPE_TIMESTAMP
Definition: field_types.h:63
@ MYSQL_TYPE_INT24
Definition: field_types.h:65
@ MYSQL_TYPE_DATETIME
Definition: field_types.h:68
@ MYSQL_TYPE_TIMESTAMP2
Definition: field_types.h:73
@ MYSQL_TYPE_YEAR
Definition: field_types.h:69
static int compare_keys(PFS_table_share *pfs, const TABLE_SHARE *share)
Definition: pfs_instr_class.cc:2438
static uint16 key1[1001]
Definition: hp_test2.cc:50
#define F
Definition: jit_executor_value.cc:374
void error(const char *format,...)
int mysql_format_from_raw(char *buffer, size_t buffer_length, const Row_meta &metadata, size_t start_index, size_t &consumed_length, Rows_mysql &sql_rows) noexcept
Definition: bulk_data_service.cc:1853
bool get_table_metadata(THD *thd, const TABLE *table, Table_meta &table_meta) noexcept
Definition: bulk_data_service.cc:2524
int mysql_format(THD *thd, const TABLE *table, const Rows_text &text_rows, size_t &next_index, char *buffer, size_t &buffer_length, const CHARSET_INFO *charset, const Row_meta &metadata, Rows_mysql &sql_rows, Bulk_load_error_location_details &error_details) noexcept
Definition: bulk_data_service.cc:1884
bool get_row_metadata_all(THD *thd, const TABLE *table, bool have_key, std::vector< Row_meta > &row_meta_all) noexcept
Definition: bulk_data_service.cc:2543
int mysql_format_using_key(const Row_meta &metadata, const Rows_mysql &sql_keys, size_t key_offset, Rows_mysql &sql_rows, size_t sql_index) noexcept
Definition: bulk_data_service.cc:1794
bool is_killed(THD *thd) noexcept
Definition: bulk_data_service.cc:1936
bool open_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context &blob_ctx, unsigned char *blobref, size_t thread) noexcept
Definition: bulk_data_service.cc:2609
size_t get_se_memory_size(THD *thd, const TABLE *table) noexcept
Definition: bulk_data_service.cc:2717
bool write_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context blob_ctx, unsigned char *blobref, size_t thread, const unsigned char *data, size_t data_len) noexcept
Definition: bulk_data_service.cc:2619
bool close_blob(THD *thd, void *load_ctx, const TABLE *table, Blob_context blob_ctx, unsigned char *blobref, size_t thread) noexcept
Definition: bulk_data_service.cc:2627
bool is_table_supported(THD *thd, const TABLE *table) noexcept
Definition: bulk_data_service.cc:2721
Definition: bulk_data_service.h:812
static PFS_engine_table_share_proxy table
Definition: pfs.cc:61
const std::string charset("charset")
bool load(THD *, const dd::String_type &fname, dd::String_type *buf)
Read an sdi file from disk and store in a buffer.
Definition: sdi_file.cc:308
std::string hex(const Container &c)
Definition: hex.h:61
bool index(const std::string &value, const String &search_for, uint32_t *idx)
Definition: contains.h:76
int key_type
Definition: method.h:38
Definition: aligned_atomic.h:44
ValueType max(X &&first)
Definition: gtid.h:103
const char * begin(const char *const c)
Definition: base64.h:44
mutable_buffer buffer(void *p, size_t n) noexcept
Definition: buffer.h:418
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:192
std::basic_ostringstream< char, std::char_traits< char >, ut::allocator< char > > ostringstream
Specialization of basic_ostringstream which uses ut::allocator.
Definition: ut0new.h:2872
#define DECLARE_METHOD(retval, name, args)
Declares a method as a part of the Service definition.
Definition: service.h:103
#define END_SERVICE_DEFINITION(name)
A macro to end the last Service definition started with the BEGIN_SERVICE_DEFINITION macro.
Definition: service.h:91
#define BEGIN_SERVICE_DEFINITION(name)
Declares a new Service.
Definition: service.h:86
Callbacks for collecting time statistics.
Definition: bulk_data_service.h:857
std::function< void()> m_fn_begin
Definition: bulk_data_service.h:859
std::function< void()> m_fn_end
Definition: bulk_data_service.h:861
Definition: bulk_data_service.h:54
std::string filename
Definition: bulk_data_service.h:55
std::string m_table_name
Definition: bulk_data_service.h:61
size_t m_bytes
Definition: bulk_data_service.h:62
size_t row_number
Definition: bulk_data_service.h:56
std::string column_input_data
Definition: bulk_data_service.h:59
std::string column_name
Definition: bulk_data_service.h:57
size_t m_column_length
Definition: bulk_data_service.h:63
std::string m_error_mesg
Definition: bulk_data_service.h:60
std::ostream & print(std::ostream &out) const
Definition: bulk_data_service.h:68
std::string column_type
Definition: bulk_data_service.h:58
Definition: m_ctype.h:421
Column metadata information.
Definition: bulk_data_service.h:440
bool m_is_prefix_key
If the key is prefix of the column.
Definition: bulk_data_service.h:498
enum_field_types m_type
Field type.
Definition: bulk_data_service.h:483
std::string m_field_name
Field name.
Definition: bulk_data_service.h:538
std::string get_type_string() const
Get the data type of the column as a string.
Definition: bulk_data_service.h:556
uint16_t m_index
Index of column in row.
Definition: bulk_data_service.h:523
bool m_is_single_byte_len
If character column length can be kept in one byte.
Definition: bulk_data_service.h:514
uint16_t m_null_byte
Byte index in NULL bitmap.
Definition: bulk_data_service.h:529
bool m_is_desc_key
If the key is descending.
Definition: bulk_data_service.h:495
Compare m_compare
If it is integer type.
Definition: bulk_data_service.h:504
bool m_is_pk
true if column belongs to primary index (key or non-key)
Definition: bulk_data_service.h:489
uint16_t m_fixed_len
The length of column data if fixed.
Definition: bulk_data_service.h:517
std::string to_string() const
Get a string representation of Column_meta object.
Definition: bulk_data_service.h:650
bool is_integer() const
Definition: bulk_data_service.h:469
Compare
Data comparison method.
Definition: bulk_data_service.h:442
uint16_t m_field_index
Position of column in table.
Definition: bulk_data_service.h:526
uint16_t m_max_len
Maximum length of data in bytes.
Definition: bulk_data_service.h:520
bool m_is_fixed_len
If it is fixed length type.
Definition: bulk_data_service.h:501
bool m_is_key
true if column is a key for primary or secondary index.
Definition: bulk_data_service.h:492
uint16_t m_null_bit
BIT number in NULL bitmap.
Definition: bulk_data_service.h:532
bool can_be_stored_externally() const
Based on the column data type check if it can be stored externally.
Definition: bulk_data_service.h:632
bool m_fixed_len_if_set_in_row
Check the row header to find out if it is fixed length.
Definition: bulk_data_service.h:511
std::ostream & print(std::ostream &out) const
Print this object into the given output stream.
Definition: bulk_data_service.h:663
bool m_is_nullable
If column could be NULL.
Definition: bulk_data_service.h:486
bool m_is_part_of_sk
true if this column is part of secondary index.
Definition: bulk_data_service.h:480
bool m_is_unsigned
If it is unsigned integer type.
Definition: bulk_data_service.h:507
std::string get_compare_string() const
Definition: bulk_data_service.h:453
const void * m_charset
Character set for char & varchar columns.
Definition: bulk_data_service.h:535
Definition: bulk_data_service.h:204
uint64_t m_int_data
Column data in integer format.
Definition: bulk_data_service.h:237
std::string to_string() const
Definition: bulk_data_service.h:260
bool m_is_null
If column is NULL.
Definition: bulk_data_service.h:212
char * get_row_begin(const Row_meta &row_meta, size_t col_index) const
Get the pointer to the beginning of row.
Definition: bulk_data_service.h:803
char * m_data_ptr
Column data or row begin.
Definition: bulk_data_service.h:257
char * get_data() const
Definition: bulk_data_service.h:214
int16_t m_type
Column Data Type.
Definition: bulk_data_service.h:206
uint16_t m_data_len
Column data length.
Definition: bulk_data_service.h:209
void set_data(char *ptr)
Definition: bulk_data_service.h:216
void init()
Definition: bulk_data_service.h:239
void row(char *row_begin)
Save the beginning of the row pointer in this object.
Definition: bulk_data_service.h:221
Definition: bulk_data_service.h:85
bool is_null() const
Check if the column is null, by checking special value for length.
Definition: bulk_data_service.h:109
bool is_ext_relaxed() const
Check if the column data is stored externally.
Definition: bulk_data_service.h:129
bool m_is_ext
If true, the column data is stored externally.
Definition: bulk_data_service.h:157
void set_null()
Mark the column to be null, by setting length to a special value.
Definition: bulk_data_service.h:102
std::ostream & print(std::ostream &out) const
Print this object into the given output stream.
Definition: bulk_data_service.h:186
const char * m_data_ptr
Column data.
Definition: bulk_data_service.h:87
bool is_row_id() const
Check if it is DB_ROW_ID column based on the value it contains.
Definition: bulk_data_service.h:94
uint64_t m_row_id
The generated DB_ROW_ID value.
Definition: bulk_data_service.h:97
void init()
Initialize the members.
Definition: bulk_data_service.h:141
void set_ext()
Mark that the column data has been stored externally.
Definition: bulk_data_service.h:135
bool is_ext() const
Check if the column data is stored externally.
Definition: bulk_data_service.h:120
size_t m_data_len
Column data length.
Definition: bulk_data_service.h:90
std::string to_string() const
Definition: bulk_data_service.h:160
Definition: mysql.h:300
Row metadata.
Definition: bulk_data_service.h:693
const Column_meta & get_column_meta_index_order(size_t col_index) const
Get the metadata of the given column.
Definition: bulk_data_service.h:717
const Column_meta & get_column_meta(size_t col_index) const
Get the meta data of the column.
Definition: bulk_data_service.h:725
size_t m_n_blob_cols
Number of columns that can be stored externally.
Definition: bulk_data_service.h:762
size_t m_bitmap_length
Total bitmap header length for the row.
Definition: bulk_data_service.h:732
std::string to_string() const
Get a string representation of this Row_meta object.
Definition: bulk_data_service.h:790
bool dbrowid_is_pk
true if DB_ROW_ID is the pk, false otherwise.
Definition: bulk_data_service.h:771
size_t m_first_key_len
Length of the first key column.
Definition: bulk_data_service.h:739
size_t m_header_length
Total header length.
Definition: bulk_data_service.h:735
Key_type
Key type for fast comparison.
Definition: bulk_data_service.h:695
uint32_t m_non_keys
Number of columns not used in primary Key.
Definition: bulk_data_service.h:749
uint32_t m_num_columns
Total number of columns.
Definition: bulk_data_service.h:756
uint32_t m_keys
Number of columns used in primary key.
Definition: bulk_data_service.h:746
size_t m_key_length
Key length in bytes for non-integer keys.
Definition: bulk_data_service.h:743
std::string m_name
Name of the key.
Definition: bulk_data_service.h:765
std::vector< Column_meta > m_columns
All columns in a row are arranged with key columns first.
Definition: bulk_data_service.h:704
std::vector< const Column_meta * > m_columns_text_order
All columns in a row arranged as per col_index.
Definition: bulk_data_service.h:707
Key_type m_key_type
Key type for comparison.
Definition: bulk_data_service.h:752
size_t m_approx_row_len
Approximate row length.
Definition: bulk_data_service.h:759
bool is_pk
true if primary key, false if secondary key.
Definition: bulk_data_service.h:768
Definition: table.h:1433
Table metadata.
Definition: bulk_data_service.h:678
size_t m_keynr_pk
Key number of the primary key.
Definition: bulk_data_service.h:683
bool dbrowid_is_pk
True if generated DB_ROW_ID is the pk.
Definition: bulk_data_service.h:686
std::string m_table_name
Table being bulk loaded.
Definition: bulk_data_service.h:689
size_t m_n_keys
Number of keys/indexes the table has.
Definition: bulk_data_service.h:680