1#ifndef SIMDJSON_DOM_FRACTURED_JSON_INL_H
2#define SIMDJSON_DOM_FRACTURED_JSON_INL_H
4#include "simdjson/dom/fractured_json.h"
5#include "simdjson/dom/serialization.h"
6#include "simdjson/dom/element-inl.h"
7#include "simdjson/dom/array-inl.h"
8#include "simdjson/dom/object-inl.h"
9#include "simdjson/dom/parser-inl.h"
10#include "simdjson/padded_string.h"
11#include "simdjson/internal/json_structure_analyzer.h"
12#include "simdjson/internal/fractured_formatter.h"
25inline element_metrics structure_analyzer::analyze(
const dom::element& elem,
26 const fractured_json_options& opts) {
27 current_opts_ = &opts;
28 return analyze_element(elem, 0);
31inline void structure_analyzer::clear() {
32 current_opts_ =
nullptr;
35inline element_metrics structure_analyzer::analyze_array(
const dom::array& arr,
36 const fractured_json_options& opts) {
37 current_opts_ = &opts;
38 return analyze_array(arr, 0);
41inline element_metrics structure_analyzer::analyze_object(
const dom::object& obj,
42 const fractured_json_options& opts) {
43 current_opts_ = &opts;
44 return analyze_object(obj, 0);
47inline element_metrics structure_analyzer::analyze_element(
const dom::element& elem,
size_t depth) {
48 switch (elem.type()) {
51 if (elem.get_array().get(arr) ==
SUCCESS) {
52 return analyze_array(arr, depth);
58 if (elem.get_object().get(obj) ==
SUCCESS) {
59 return analyze_object(obj, depth);
65 return analyze_scalar(elem);
67 return element_metrics{};
70inline element_metrics structure_analyzer::analyze_scalar(
const dom::element& elem) {
71 element_metrics metrics;
72 metrics.complexity = 0;
73 metrics.child_count = 0;
74 metrics.can_inline =
true;
75 metrics.recommended_layout = layout_mode::INLINE;
77 switch (elem.type()) {
80 if (elem.get_string().get(str) ==
SUCCESS) {
81 metrics.estimated_inline_len = estimate_string_length(str);
87 if (elem.get_int64().get(val) ==
SUCCESS) {
88 metrics.estimated_inline_len = estimate_number_length(val);
94 if (elem.get_uint64().get(val) ==
SUCCESS) {
95 metrics.estimated_inline_len = estimate_number_length(val);
101 if (elem.get_double().get(val) ==
SUCCESS) {
102 metrics.estimated_inline_len = estimate_number_length(val);
108 if (elem.get_bool().get(val) ==
SUCCESS) {
109 metrics.estimated_inline_len = val ? 4 : 5;
114 metrics.estimated_inline_len = 4;
123inline element_metrics structure_analyzer::analyze_array(
const dom::array& arr,
125 element_metrics metrics;
126 metrics.complexity = 1;
127 metrics.estimated_inline_len = 2;
128 metrics.child_count = 0;
130 size_t max_child_complexity = 0;
133 for (dom::element child : arr) {
135 metrics.estimated_inline_len += 2;
139 element_metrics child_metrics = analyze_element(child, depth + 1);
140 metrics.estimated_inline_len += child_metrics.estimated_inline_len;
141 max_child_complexity = (std::max)(max_child_complexity, child_metrics.complexity);
142 metrics.child_count++;
143 metrics.children.push_back(std::move(child_metrics));
147 metrics.complexity = 1 + max_child_complexity;
150 metrics.can_inline = (metrics.complexity <= current_opts_->max_inline_complexity) &&
151 (metrics.estimated_inline_len <= current_opts_->max_inline_length);
154 if (current_opts_->enable_table_format &&
155 metrics.child_count >= current_opts_->min_table_rows) {
156 metrics.is_uniform_array = check_array_uniformity(arr, metrics.common_keys);
160 if (metrics.child_count == 0) {
161 metrics.recommended_layout = layout_mode::INLINE;
162 }
else if (metrics.can_inline) {
163 metrics.recommended_layout = layout_mode::INLINE;
164 }
else if (metrics.is_uniform_array && !metrics.common_keys.empty()) {
165 metrics.recommended_layout = layout_mode::TABLE;
166 }
else if (current_opts_->enable_compact_multiline &&
167 max_child_complexity <= current_opts_->max_compact_array_complexity) {
168 metrics.recommended_layout = layout_mode::COMPACT_MULTILINE;
170 metrics.recommended_layout = layout_mode::EXPANDED;
176inline element_metrics structure_analyzer::analyze_object(
const dom::object& obj,
178 element_metrics metrics;
179 metrics.complexity = 1;
180 metrics.estimated_inline_len = 2;
181 metrics.child_count = 0;
183 size_t max_child_complexity = 0;
186 for (dom::key_value_pair field : obj) {
188 metrics.estimated_inline_len += 2;
193 metrics.estimated_inline_len += estimate_string_length(field.key) + 2;
195 element_metrics child_metrics = analyze_element(field.value, depth + 1);
196 metrics.estimated_inline_len += child_metrics.estimated_inline_len;
197 max_child_complexity = (std::max)(max_child_complexity, child_metrics.complexity);
198 metrics.child_count++;
199 metrics.children.push_back(std::move(child_metrics));
202 metrics.complexity = 1 + max_child_complexity;
204 metrics.can_inline = (metrics.complexity <= current_opts_->max_inline_complexity) &&
205 (metrics.estimated_inline_len <= current_opts_->max_inline_length);
208 if (metrics.child_count == 0 || metrics.can_inline) {
209 metrics.recommended_layout = layout_mode::INLINE;
211 metrics.recommended_layout = layout_mode::EXPANDED;
217inline size_t structure_analyzer::estimate_string_length(std::string_view s)
const {
220 if (c ==
'"' || c ==
'\\' ||
static_cast<unsigned char>(c) < 32) {
229inline size_t structure_analyzer::estimate_number_length(
double d)
const {
230 if (std::isnan(d) || std::isinf(d)) {
235 int len = snprintf(buf,
sizeof(buf),
"%.17g", d);
236 return len > 0 ?
static_cast<size_t>(len) : 20;
239inline size_t structure_analyzer::estimate_number_length(int64_t i)
const {
240 if (i == 0)
return 1;
242 if (i == INT64_MIN)
return 20;
243 size_t len = (i < 0) ? 1 : 0;
244 int64_t abs_val = (i < 0) ? -i : i;
245 while (abs_val > 0) {
252inline size_t structure_analyzer::estimate_number_length(uint64_t u)
const {
253 if (u == 0)
return 1;
262inline bool structure_analyzer::check_array_uniformity(
const dom::array& arr,
263 std::vector<std::string>& common_keys)
const {
266 std::set<std::string> shared_keys;
267 dom::object first_obj;
268 bool have_first =
false;
269 size_t object_count = 0;
271 for (dom::element elem : arr) {
277 if (elem.get_object().get(obj) !=
SUCCESS) {
281 std::set<std::string> current_keys;
282 for (dom::key_value_pair field : obj) {
283 current_keys.insert(std::string(field.key));
287 shared_keys = current_keys;
292 double similarity = compute_object_similarity(first_obj, obj);
293 if (similarity < current_opts_->table_similarity_threshold) {
298 std::set<std::string> intersection;
299 std::set_intersection(shared_keys.begin(), shared_keys.end(),
300 current_keys.begin(), current_keys.end(),
301 std::inserter(intersection, intersection.begin()));
302 shared_keys = intersection;
308 if (object_count < current_opts_->min_table_rows) {
313 if (shared_keys.empty()) {
317 common_keys.assign(shared_keys.begin(), shared_keys.end());
321inline double structure_analyzer::compute_object_similarity(
const dom::object& a,
322 const dom::object& b)
const {
323 std::set<std::string> keys_a, keys_b;
324 for (dom::key_value_pair field : a) {
325 keys_a.insert(std::string(field.key));
327 for (dom::key_value_pair field : b) {
328 keys_b.insert(std::string(field.key));
331 std::set<std::string> intersection;
332 std::set_intersection(keys_a.begin(), keys_a.end(),
333 keys_b.begin(), keys_b.end(),
334 std::inserter(intersection, intersection.begin()));
336 std::set<std::string> union_set;
337 std::set_union(keys_a.begin(), keys_a.end(),
338 keys_b.begin(), keys_b.end(),
339 std::inserter(union_set, union_set.begin()));
341 if (union_set.empty())
return 1.0;
342 return static_cast<double>(intersection.size()) /
static_cast<double>(union_set.size());
345inline layout_mode structure_analyzer::decide_layout(
const element_metrics& metrics,
347 size_t available_width)
const {
348 if (metrics.child_count == 0) {
349 return layout_mode::INLINE;
353 size_t indent_width = depth * current_opts_->indent_spaces;
354 if (metrics.can_inline &&
355 metrics.estimated_inline_len + indent_width <= available_width) {
356 return layout_mode::INLINE;
360 if (metrics.is_uniform_array && !metrics.common_keys.empty()) {
361 return layout_mode::TABLE;
365 if (current_opts_->enable_compact_multiline &&
366 metrics.complexity <= current_opts_->max_compact_array_complexity + 1) {
367 return layout_mode::COMPACT_MULTILINE;
370 return layout_mode::EXPANDED;
377inline fractured_formatter::fractured_formatter(
const fractured_json_options& opts)
378 : options_(opts), column_widths_{} {}
380simdjson_inline
void fractured_formatter::print_newline() {
381 if (current_layout_ == layout_mode::INLINE) {
385 current_line_length_ = 0;
388simdjson_inline
void fractured_formatter::print_indents(
size_t depth) {
389 if (current_layout_ == layout_mode::INLINE) {
392 for (
size_t i = 0; i < depth * options_.indent_spaces; i++) {
394 current_line_length_++;
398simdjson_inline
void fractured_formatter::print_space() {
400 current_line_length_++;
403inline void fractured_formatter::set_layout_mode(layout_mode mode) {
404 current_layout_ = mode;
407inline layout_mode fractured_formatter::get_layout_mode()
const {
408 return current_layout_;
411inline void fractured_formatter::set_depth(
size_t depth) {
412 current_depth_ = depth;
415inline size_t fractured_formatter::get_depth()
const {
416 return current_depth_;
419inline void fractured_formatter::track_line_length(
size_t chars) {
420 current_line_length_ += chars;
423inline void fractured_formatter::reset_line_length() {
424 current_line_length_ = 0;
427inline size_t fractured_formatter::get_line_length()
const {
428 return current_line_length_;
431inline bool fractured_formatter::should_break_line(
size_t upcoming_length)
const {
432 return (current_line_length_ + upcoming_length) > options_.max_total_line_length;
435inline const fractured_json_options& fractured_formatter::options()
const {
439inline void fractured_formatter::begin_table_row() {
440 in_table_mode_ =
true;
444inline void fractured_formatter::end_table_row() {
445 in_table_mode_ =
false;
449inline void fractured_formatter::set_column_widths(
const std::vector<size_t>& widths) {
450 column_widths_ = widths;
453inline size_t fractured_formatter::get_column_index()
const {
454 return current_column_;
457inline void fractured_formatter::next_column() {
461inline void fractured_formatter::align_to_column_width(
size_t actual_width) {
462 if (current_column_ < column_widths_.size()) {
463 size_t target_width = column_widths_[current_column_];
464 while (actual_width < target_width) {
467 current_line_length_++;
476inline fractured_string_builder::fractured_string_builder(
const fractured_json_options& opts)
477 : format_(opts), analyzer_{}, options_(opts) {}
479inline void fractured_string_builder::append(
const dom::element& value) {
481 element_metrics root_metrics = analyzer_.analyze(value, options_);
484 format_element(value, root_metrics, 0);
487inline void fractured_string_builder::append(
const dom::array& value) {
489 element_metrics metrics = analyzer_.analyze_array(value, options_);
490 format_array(value, metrics, 0);
493inline void fractured_string_builder::append(
const dom::object& value) {
495 element_metrics metrics = analyzer_.analyze_object(value, options_);
496 format_object(value, metrics, 0);
499simdjson_inline
void fractured_string_builder::clear() {
504simdjson_inline std::string_view fractured_string_builder::str()
const {
505 return format_.str();
508inline void fractured_string_builder::format_element(
const dom::element& elem,
509 const element_metrics& metrics,
511 switch (elem.type()) {
512 case dom::element_type::ARRAY: {
514 if (elem.get_array().get(arr) == SUCCESS) {
515 format_array(arr, metrics, depth);
519 case dom::element_type::OBJECT: {
521 if (elem.get_object().get(obj) == SUCCESS) {
522 format_object(obj, metrics, depth);
532inline void fractured_string_builder::format_array(
const dom::array& arr,
533 const element_metrics& metrics,
535 switch (metrics.recommended_layout) {
536 case layout_mode::INLINE:
537 format_array_inline(arr, metrics);
539 case layout_mode::COMPACT_MULTILINE:
540 format_array_compact_multiline(arr, metrics, depth);
542 case layout_mode::TABLE:
543 format_array_as_table(arr, metrics, depth);
545 case layout_mode::EXPANDED:
547 format_array_expanded(arr, metrics, depth);
552inline void fractured_string_builder::format_array_inline(
const dom::array& arr,
553 const element_metrics& metrics) {
554 layout_mode prev_layout = format_.get_layout_mode();
555 format_.set_layout_mode(layout_mode::INLINE);
557 format_.start_array();
561 size_t child_idx = 0;
562 for (dom::element elem : arr) {
566 if (options_.comma_padding) {
567 format_.print_space();
569 }
else if (options_.simple_bracket_padding) {
570 format_.print_space();
573 const element_metrics& child_metrics = (child_idx < metrics.children.size())
574 ? metrics.children[child_idx] : element_metrics{};
575 format_element(elem, child_metrics, 0);
579 if (options_.simple_bracket_padding && !empty) {
580 format_.print_space();
584 format_.set_layout_mode(prev_layout);
587inline void fractured_string_builder::format_array_compact_multiline(
const dom::array& arr,
588 const element_metrics& metrics,
590 format_.start_array();
591 format_.print_newline();
592 format_.print_indents(depth + 1);
594 size_t items_on_line = 0;
596 size_t child_idx = 0;
598 for (dom::element elem : arr) {
603 if (items_on_line >= options_.max_items_per_line ||
604 format_.should_break_line(20)) {
605 format_.print_newline();
606 format_.print_indents(depth + 1);
608 }
else if (options_.comma_padding) {
609 format_.print_space();
615 layout_mode prev_layout = format_.get_layout_mode();
616 format_.set_layout_mode(layout_mode::INLINE);
617 const element_metrics& child_metrics = (child_idx < metrics.children.size())
618 ? metrics.children[child_idx] : element_metrics{};
619 format_element(elem, child_metrics, depth + 1);
620 format_.set_layout_mode(prev_layout);
626 format_.print_newline();
627 format_.print_indents(depth);
631inline void fractured_string_builder::format_array_as_table(
const dom::array& arr,
632 const element_metrics& metrics,
634 const std::vector<std::string>& columns = metrics.common_keys;
635 if (columns.empty()) {
636 format_array_expanded(arr, metrics, depth);
641 std::vector<size_t> col_widths = calculate_column_widths(arr, columns);
642 format_.set_column_widths(col_widths);
644 format_.start_array();
645 format_.print_newline();
647 bool first_row =
true;
648 size_t child_idx = 0;
649 for (dom::element elem : arr) {
652 format_.print_newline();
656 format_.print_indents(depth + 1);
657 format_.begin_table_row();
661 if (elem.get_object().get(obj) != SUCCESS) {
667 const element_metrics& row_metrics = (child_idx < metrics.children.size())
668 ? metrics.children[child_idx] : element_metrics{};
670 format_.start_object();
671 if (options_.simple_bracket_padding) {
672 format_.print_space();
675 bool first_col =
true;
676 const size_t num_columns = columns.size();
678 for (
size_t col_idx = 0; col_idx < num_columns; col_idx++) {
679 const std::string& key = columns[col_idx];
680 const bool is_last_col = (col_idx == num_columns - 1);
684 if (options_.comma_padding) {
685 format_.print_space();
692 if (options_.colon_padding) {
693 format_.print_space();
699 size_t field_idx = 0;
700 for (dom::key_value_pair field : obj) {
701 if (field.key == key) {
711 layout_mode prev_layout = format_.get_layout_mode();
712 format_.set_layout_mode(layout_mode::INLINE);
713 const element_metrics& value_metrics = (field_idx < row_metrics.children.size())
714 ? row_metrics.children[field_idx] : element_metrics{};
715 format_element(value, value_metrics, depth + 1);
716 format_.set_layout_mode(prev_layout);
723 size_t actual_len = found ? measure_value_length(value) : 4;
724 size_t target_width = col_widths[col_idx];
725 while (actual_len < target_width) {
726 format_.one_char(
' ');
731 format_.next_column();
734 if (options_.simple_bracket_padding) {
735 format_.print_space();
737 format_.end_object();
738 format_.end_table_row();
742 format_.print_newline();
743 format_.print_indents(depth);
747inline void fractured_string_builder::format_array_expanded(
const dom::array& arr,
748 const element_metrics& metrics,
750 format_.start_array();
754 size_t child_idx = 0;
756 for (dom::element elem : arr) {
763 format_.print_newline();
764 format_.print_indents(depth + 1);
765 const element_metrics& child_metrics = (child_idx < metrics.children.size())
766 ? metrics.children[child_idx] : element_metrics{};
767 format_element(elem, child_metrics, depth + 1);
772 format_.print_newline();
773 format_.print_indents(depth);
778inline void fractured_string_builder::format_object(
const dom::object& obj,
779 const element_metrics& metrics,
781 if (metrics.recommended_layout == layout_mode::INLINE || metrics.can_inline) {
782 format_object_inline(obj, metrics);
784 format_object_expanded(obj, metrics, depth);
788inline void fractured_string_builder::format_object_inline(
const dom::object& obj,
789 const element_metrics& metrics) {
790 layout_mode prev_layout = format_.get_layout_mode();
791 format_.set_layout_mode(layout_mode::INLINE);
793 format_.start_object();
797 size_t child_idx = 0;
799 for (dom::key_value_pair field : obj) {
803 if (options_.comma_padding) {
804 format_.print_space();
806 }
else if (options_.simple_bracket_padding) {
807 format_.print_space();
811 format_.key(field.key);
812 if (options_.colon_padding) {
813 format_.print_space();
815 const element_metrics& child_metrics = (child_idx < metrics.children.size())
816 ? metrics.children[child_idx] : element_metrics{};
817 format_element(field.value, child_metrics, 0);
821 if (options_.simple_bracket_padding && !empty) {
822 format_.print_space();
824 format_.end_object();
826 format_.set_layout_mode(prev_layout);
829inline void fractured_string_builder::format_object_expanded(
const dom::object& obj,
830 const element_metrics& metrics,
832 format_.start_object();
836 size_t child_idx = 0;
838 for (dom::key_value_pair field : obj) {
845 format_.print_newline();
846 format_.print_indents(depth + 1);
847 format_.key(field.key);
848 if (options_.colon_padding) {
849 format_.print_space();
851 const element_metrics& child_metrics = (child_idx < metrics.children.size())
852 ? metrics.children[child_idx] : element_metrics{};
853 format_element(field.value, child_metrics, depth + 1);
858 format_.print_newline();
859 format_.print_indents(depth);
861 format_.end_object();
864inline void fractured_string_builder::format_scalar(
const dom::element& elem) {
865 switch (elem.type()) {
866 case dom::element_type::STRING: {
867 std::string_view str;
868 if (elem.get_string().get(str) == SUCCESS) {
873 case dom::element_type::INT64: {
875 if (elem.get_int64().get(val) == SUCCESS) {
880 case dom::element_type::UINT64: {
882 if (elem.get_uint64().get(val) == SUCCESS) {
887 case dom::element_type::DOUBLE: {
889 if (elem.get_double().get(val) == SUCCESS) {
894 case dom::element_type::BOOL: {
896 if (elem.get_bool().get(val) == SUCCESS) {
897 val ? format_.true_atom() : format_.false_atom();
901 case dom::element_type::NULL_VALUE:
909inline size_t fractured_string_builder::measure_value_length(
const dom::element& elem)
const {
910 switch (elem.type()) {
911 case dom::element_type::STRING: {
912 std::string_view str;
913 if (elem.get_string().get(str) == SUCCESS) {
917 if (c ==
'"' || c ==
'\\' ||
static_cast<unsigned char>(c) < 32) {
927 case dom::element_type::INT64: {
929 if (elem.get_int64().get(val) == SUCCESS) {
930 if (val == 0)
return 1;
932 if (val == INT64_MIN)
return 20;
933 size_t len = (val < 0) ? 1 : 0;
934 int64_t abs_val = (val < 0) ? -val : val;
935 while (abs_val > 0) { len++; abs_val /= 10; }
940 case dom::element_type::UINT64: {
942 if (elem.get_uint64().get(val) == SUCCESS) {
943 if (val == 0)
return 1;
945 while (val > 0) { len++; val /= 10; }
950 case dom::element_type::DOUBLE: {
952 if (elem.get_double().get(val) == SUCCESS) {
954 int len = snprintf(buf,
sizeof(buf),
"%.17g", val);
955 return len > 0 ?
static_cast<size_t>(len) : 1;
959 case dom::element_type::BOOL: {
961 if (elem.get_bool().get(val) == SUCCESS) {
966 case dom::element_type::NULL_VALUE:
973inline std::vector<size_t> fractured_string_builder::calculate_column_widths(
974 const dom::array& arr,
975 const std::vector<std::string>& columns)
const {
977 std::vector<size_t> widths(columns.size(), 0);
979 for (dom::element elem : arr) {
981 if (elem.get_object().get(obj) != SUCCESS) {
985 for (
size_t col_idx = 0; col_idx < columns.size(); col_idx++) {
986 const std::string& key = columns[col_idx];
988 for (dom::key_value_pair field : obj) {
989 if (field.key == key) {
991 size_t len = measure_value_length(field.value);
992 widths[col_idx] = (std::max)(widths[col_idx], len);
1015 internal::fractured_string_builder sb(options);
1017 std::string_view result = sb.str();
1018 return std::string(result.data(), result.size());
1021#if SIMDJSON_EXCEPTIONS
1023std::string fractured_json(simdjson_result<T> x) {
1025 throw simdjson_error(x.error());
1031std::string
fractured_json(simdjson_result<T> x,
const fractured_json_options& options) {
1033 throw simdjson_error(x.error());
1041template std::string
fractured_json(dom::element x,
const fractured_json_options& options);
1043template std::string
fractured_json(dom::array x,
const fractured_json_options& options);
1045template std::string
fractured_json(dom::object x,
const fractured_json_options& options);
1047#if SIMDJSON_EXCEPTIONS
1048template std::string
fractured_json(simdjson_result<dom::element> x);
1049template std::string
fractured_json(simdjson_result<dom::element> x,
const fractured_json_options& options);
1067 auto error = parser.
parse(padded).get(doc);
1070 return std::string(json_str);
A persistent document parser.
simdjson_result< element > parse(const uint8_t *buf, size_t len, bool realloc_if_needed=true) &noexcept
Parse a JSON document and return a temporary reference to it.
@ UINT64
uint64_t: any integer that fits in uint64_t but not int64_t
@ DOUBLE
double: Any number with a "." or "e" that fits in double.
The top level simdjson namespace, containing everything the library provides.
std::string fractured_json_string(std::string_view json_str)
Format a JSON string using FracturedJson formatting.
std::string fractured_json(T x)
Format JSON using FracturedJson formatting with default options.
Configuration options for FracturedJson formatting.
String with extra allocation for ease of use with parser::parse()