simdjson 4.4.0
Ridiculously Fast JSON
Loading...
Searching...
No Matches
fractured_json-inl.h
1#ifndef SIMDJSON_DOM_FRACTURED_JSON_INL_H
2#define SIMDJSON_DOM_FRACTURED_JSON_INL_H
3
4#include "simdjson/dom/fractured_json.h"
5#include "simdjson/dom/serialization.h"
6#include "simdjson/dom/element-inl.h"
7#include "simdjson/dom/array-inl.h"
8#include "simdjson/dom/object-inl.h"
9#include "simdjson/dom/parser-inl.h"
10#include "simdjson/padded_string.h"
11#include "simdjson/internal/json_structure_analyzer.h"
12#include "simdjson/internal/fractured_formatter.h"
13
14#include <cmath>
15#include <algorithm>
16#include <cstring>
17
18namespace simdjson {
19namespace internal {
20
21//
22// Structure Analyzer Implementation
23//
24
25inline element_metrics structure_analyzer::analyze(const dom::element& elem,
26 const fractured_json_options& opts) {
27 current_opts_ = &opts;
28 return analyze_element(elem, 0);
29}
30
31inline void structure_analyzer::clear() {
32 current_opts_ = nullptr;
33}
34
35inline element_metrics structure_analyzer::analyze_array(const dom::array& arr,
36 const fractured_json_options& opts) {
37 current_opts_ = &opts;
38 return analyze_array(arr, 0);
39}
40
41inline element_metrics structure_analyzer::analyze_object(const dom::object& obj,
42 const fractured_json_options& opts) {
43 current_opts_ = &opts;
44 return analyze_object(obj, 0);
45}
46
47inline element_metrics structure_analyzer::analyze_element(const dom::element& elem, size_t depth) {
48 switch (elem.type()) {
50 dom::array arr;
51 if (elem.get_array().get(arr) == SUCCESS) {
52 return analyze_array(arr, depth);
53 }
54 break;
55 }
57 dom::object obj;
58 if (elem.get_object().get(obj) == SUCCESS) {
59 return analyze_object(obj, depth);
60 }
61 break;
62 }
63 default:
64 // Handle all scalar types with a helper
65 return analyze_scalar(elem);
66 }
67 return element_metrics{};
68}
69
70inline element_metrics structure_analyzer::analyze_scalar(const dom::element& elem) {
71 element_metrics metrics;
72 metrics.complexity = 0;
73 metrics.child_count = 0;
74 metrics.can_inline = true;
75 metrics.recommended_layout = layout_mode::INLINE;
76
77 switch (elem.type()) {
79 std::string_view str;
80 if (elem.get_string().get(str) == SUCCESS) {
81 metrics.estimated_inline_len = estimate_string_length(str);
82 }
83 break;
84 }
86 int64_t val;
87 if (elem.get_int64().get(val) == SUCCESS) {
88 metrics.estimated_inline_len = estimate_number_length(val);
89 }
90 break;
91 }
93 uint64_t val;
94 if (elem.get_uint64().get(val) == SUCCESS) {
95 metrics.estimated_inline_len = estimate_number_length(val);
96 }
97 break;
98 }
100 double val;
101 if (elem.get_double().get(val) == SUCCESS) {
102 metrics.estimated_inline_len = estimate_number_length(val);
103 }
104 break;
105 }
107 bool val;
108 if (elem.get_bool().get(val) == SUCCESS) {
109 metrics.estimated_inline_len = val ? 4 : 5; // "true" or "false"
110 }
111 break;
112 }
114 metrics.estimated_inline_len = 4; // "null"
115 break;
116 default:
117 break;
118 }
119
120 return metrics;
121}
122
123inline element_metrics structure_analyzer::analyze_array(const dom::array& arr,
124 size_t depth) {
125 element_metrics metrics;
126 metrics.complexity = 1; // At least 1 for being an array
127 metrics.estimated_inline_len = 2; // "[]"
128 metrics.child_count = 0;
129
130 size_t max_child_complexity = 0;
131 bool first = true;
132
133 for (dom::element child : arr) {
134 if (!first) {
135 metrics.estimated_inline_len += 2; // ", "
136 }
137 first = false;
138
139 element_metrics child_metrics = analyze_element(child, depth + 1);
140 metrics.estimated_inline_len += child_metrics.estimated_inline_len;
141 max_child_complexity = (std::max)(max_child_complexity, child_metrics.complexity);
142 metrics.child_count++;
143 metrics.children.push_back(std::move(child_metrics));
144 }
145
146 // Complexity is 1 + max child complexity
147 metrics.complexity = 1 + max_child_complexity;
148
149 // Check if can inline
150 metrics.can_inline = (metrics.complexity <= current_opts_->max_inline_complexity) &&
151 (metrics.estimated_inline_len <= current_opts_->max_inline_length);
152
153 // Check for uniform array (table formatting)
154 if (current_opts_->enable_table_format &&
155 metrics.child_count >= current_opts_->min_table_rows) {
156 metrics.is_uniform_array = check_array_uniformity(arr, metrics.common_keys);
157 }
158
159 // Decide layout
160 if (metrics.child_count == 0) {
161 metrics.recommended_layout = layout_mode::INLINE;
162 } else if (metrics.can_inline) {
163 metrics.recommended_layout = layout_mode::INLINE;
164 } else if (metrics.is_uniform_array && !metrics.common_keys.empty()) {
165 metrics.recommended_layout = layout_mode::TABLE;
166 } else if (current_opts_->enable_compact_multiline &&
167 max_child_complexity <= current_opts_->max_compact_array_complexity) {
168 metrics.recommended_layout = layout_mode::COMPACT_MULTILINE;
169 } else {
170 metrics.recommended_layout = layout_mode::EXPANDED;
171 }
172
173 return metrics;
174}
175
176inline element_metrics structure_analyzer::analyze_object(const dom::object& obj,
177 size_t depth) {
178 element_metrics metrics;
179 metrics.complexity = 1;
180 metrics.estimated_inline_len = 2; // "{}"
181 metrics.child_count = 0;
182
183 size_t max_child_complexity = 0;
184 bool first = true;
185
186 for (dom::key_value_pair field : obj) {
187 if (!first) {
188 metrics.estimated_inline_len += 2; // ", "
189 }
190 first = false;
191
192 // Key length: quotes + key + colon + space
193 metrics.estimated_inline_len += estimate_string_length(field.key) + 2;
194
195 element_metrics child_metrics = analyze_element(field.value, depth + 1);
196 metrics.estimated_inline_len += child_metrics.estimated_inline_len;
197 max_child_complexity = (std::max)(max_child_complexity, child_metrics.complexity);
198 metrics.child_count++;
199 metrics.children.push_back(std::move(child_metrics));
200 }
201
202 metrics.complexity = 1 + max_child_complexity;
203
204 metrics.can_inline = (metrics.complexity <= current_opts_->max_inline_complexity) &&
205 (metrics.estimated_inline_len <= current_opts_->max_inline_length);
206
207 // Objects use inline or expanded (no table/compact for objects)
208 if (metrics.child_count == 0 || metrics.can_inline) {
209 metrics.recommended_layout = layout_mode::INLINE;
210 } else {
211 metrics.recommended_layout = layout_mode::EXPANDED;
212 }
213
214 return metrics;
215}
216
217inline size_t structure_analyzer::estimate_string_length(std::string_view s) const {
218 size_t len = 2; // quotes
219 for (char c : s) {
220 if (c == '"' || c == '\\' || static_cast<unsigned char>(c) < 32) {
221 len += 2; // escape sequence (at least)
222 } else {
223 len += 1;
224 }
225 }
226 return len;
227}
228
229inline size_t structure_analyzer::estimate_number_length(double d) const {
230 if (std::isnan(d) || std::isinf(d)) {
231 return 4; // "null" for invalid numbers
232 }
233 // Rough estimate: up to 17 significant digits + sign + decimal point + exponent
234 char buf[32];
235 int len = snprintf(buf, sizeof(buf), "%.17g", d);
236 return len > 0 ? static_cast<size_t>(len) : 20;
237}
238
239inline size_t structure_analyzer::estimate_number_length(int64_t i) const {
240 if (i == 0) return 1;
241 // Handle INT64_MIN specially to avoid overflow when negating
242 if (i == INT64_MIN) return 20; // "-9223372036854775808" is 20 characters
243 size_t len = (i < 0) ? 1 : 0; // negative sign
244 int64_t abs_val = (i < 0) ? -i : i;
245 while (abs_val > 0) {
246 len++;
247 abs_val /= 10;
248 }
249 return len;
250}
251
252inline size_t structure_analyzer::estimate_number_length(uint64_t u) const {
253 if (u == 0) return 1;
254 size_t len = 0;
255 while (u > 0) {
256 len++;
257 u /= 10;
258 }
259 return len;
260}
261
262inline bool structure_analyzer::check_array_uniformity(const dom::array& arr,
263 std::vector<std::string>& common_keys) const {
264 common_keys.clear();
265
266 std::set<std::string> shared_keys;
267 dom::object first_obj;
268 bool have_first = false;
269 size_t object_count = 0;
270
271 for (dom::element elem : arr) {
272 if (elem.type() != dom::element_type::OBJECT) {
273 return false; // Not all elements are objects
274 }
275
276 dom::object obj;
277 if (elem.get_object().get(obj) != SUCCESS) {
278 return false;
279 }
280
281 std::set<std::string> current_keys;
282 for (dom::key_value_pair field : obj) {
283 current_keys.insert(std::string(field.key));
284 }
285
286 if (!have_first) {
287 shared_keys = current_keys;
288 first_obj = obj;
289 have_first = true;
290 } else {
291 // Check similarity threshold against the first object
292 double similarity = compute_object_similarity(first_obj, obj);
293 if (similarity < current_opts_->table_similarity_threshold) {
294 return false; // Objects are too dissimilar for table format
295 }
296
297 // Intersect with current keys
298 std::set<std::string> intersection;
299 std::set_intersection(shared_keys.begin(), shared_keys.end(),
300 current_keys.begin(), current_keys.end(),
301 std::inserter(intersection, intersection.begin()));
302 shared_keys = intersection;
303 }
304
305 object_count++;
306 }
307
308 if (object_count < current_opts_->min_table_rows) {
309 return false;
310 }
311
312 // Require at least one common key for table formatting
313 if (shared_keys.empty()) {
314 return false;
315 }
316
317 common_keys.assign(shared_keys.begin(), shared_keys.end());
318 return true;
319}
320
321inline double structure_analyzer::compute_object_similarity(const dom::object& a,
322 const dom::object& b) const {
323 std::set<std::string> keys_a, keys_b;
324 for (dom::key_value_pair field : a) {
325 keys_a.insert(std::string(field.key));
326 }
327 for (dom::key_value_pair field : b) {
328 keys_b.insert(std::string(field.key));
329 }
330
331 std::set<std::string> intersection;
332 std::set_intersection(keys_a.begin(), keys_a.end(),
333 keys_b.begin(), keys_b.end(),
334 std::inserter(intersection, intersection.begin()));
335
336 std::set<std::string> union_set;
337 std::set_union(keys_a.begin(), keys_a.end(),
338 keys_b.begin(), keys_b.end(),
339 std::inserter(union_set, union_set.begin()));
340
341 if (union_set.empty()) return 1.0;
342 return static_cast<double>(intersection.size()) / static_cast<double>(union_set.size());
343}
344
345inline layout_mode structure_analyzer::decide_layout(const element_metrics& metrics,
346 size_t depth,
347 size_t available_width) const {
348 if (metrics.child_count == 0) {
349 return layout_mode::INLINE;
350 }
351
352 // Check inline feasibility
353 size_t indent_width = depth * current_opts_->indent_spaces;
354 if (metrics.can_inline &&
355 metrics.estimated_inline_len + indent_width <= available_width) {
356 return layout_mode::INLINE;
357 }
358
359 // Check table mode
360 if (metrics.is_uniform_array && !metrics.common_keys.empty()) {
361 return layout_mode::TABLE;
362 }
363
364 // Check compact multiline
365 if (current_opts_->enable_compact_multiline &&
366 metrics.complexity <= current_opts_->max_compact_array_complexity + 1) {
367 return layout_mode::COMPACT_MULTILINE;
368 }
369
370 return layout_mode::EXPANDED;
371}
372
373//
374// Fractured Formatter Implementation
375//
376
377inline fractured_formatter::fractured_formatter(const fractured_json_options& opts)
378 : options_(opts), column_widths_{} {}
379
380simdjson_inline void fractured_formatter::print_newline() {
381 if (current_layout_ == layout_mode::INLINE) {
382 return; // No newlines in inline mode
383 }
384 one_char('\n');
385 current_line_length_ = 0;
386}
387
388simdjson_inline void fractured_formatter::print_indents(size_t depth) {
389 if (current_layout_ == layout_mode::INLINE) {
390 return; // No indentation in inline mode
391 }
392 for (size_t i = 0; i < depth * options_.indent_spaces; i++) {
393 one_char(' ');
394 current_line_length_++;
395 }
396}
397
398simdjson_inline void fractured_formatter::print_space() {
399 one_char(' ');
400 current_line_length_++;
401}
402
403inline void fractured_formatter::set_layout_mode(layout_mode mode) {
404 current_layout_ = mode;
405}
406
407inline layout_mode fractured_formatter::get_layout_mode() const {
408 return current_layout_;
409}
410
411inline void fractured_formatter::set_depth(size_t depth) {
412 current_depth_ = depth;
413}
414
415inline size_t fractured_formatter::get_depth() const {
416 return current_depth_;
417}
418
419inline void fractured_formatter::track_line_length(size_t chars) {
420 current_line_length_ += chars;
421}
422
423inline void fractured_formatter::reset_line_length() {
424 current_line_length_ = 0;
425}
426
427inline size_t fractured_formatter::get_line_length() const {
428 return current_line_length_;
429}
430
431inline bool fractured_formatter::should_break_line(size_t upcoming_length) const {
432 return (current_line_length_ + upcoming_length) > options_.max_total_line_length;
433}
434
435inline const fractured_json_options& fractured_formatter::options() const {
436 return options_;
437}
438
439inline void fractured_formatter::begin_table_row() {
440 in_table_mode_ = true;
441 current_column_ = 0;
442}
443
444inline void fractured_formatter::end_table_row() {
445 in_table_mode_ = false;
446 current_column_ = 0;
447}
448
449inline void fractured_formatter::set_column_widths(const std::vector<size_t>& widths) {
450 column_widths_ = widths;
451}
452
453inline size_t fractured_formatter::get_column_index() const {
454 return current_column_;
455}
456
457inline void fractured_formatter::next_column() {
458 current_column_++;
459}
460
461inline void fractured_formatter::align_to_column_width(size_t actual_width) {
462 if (current_column_ < column_widths_.size()) {
463 size_t target_width = column_widths_[current_column_];
464 while (actual_width < target_width) {
465 one_char(' ');
466 actual_width++;
467 current_line_length_++;
468 }
469 }
470}
471
472//
473// Fractured String Builder Implementation
474//
475
476inline fractured_string_builder::fractured_string_builder(const fractured_json_options& opts)
477 : format_(opts), analyzer_{}, options_(opts) {}
478
479inline void fractured_string_builder::append(const dom::element& value) {
480 // Phase 1: Analyze structure (metrics tree is built recursively)
481 element_metrics root_metrics = analyzer_.analyze(value, options_);
482
483 // Phase 2: Format using metrics tree (passed through recursion)
484 format_element(value, root_metrics, 0);
485}
486
487inline void fractured_string_builder::append(const dom::array& value) {
488 // Analyze the array to get proper metrics with children
489 element_metrics metrics = analyzer_.analyze_array(value, options_);
490 format_array(value, metrics, 0);
491}
492
493inline void fractured_string_builder::append(const dom::object& value) {
494 // Analyze the object to get proper metrics with children
495 element_metrics metrics = analyzer_.analyze_object(value, options_);
496 format_object(value, metrics, 0);
497}
498
499simdjson_inline void fractured_string_builder::clear() {
500 format_.clear();
501 analyzer_.clear();
502}
503
504simdjson_inline std::string_view fractured_string_builder::str() const {
505 return format_.str();
506}
507
508inline void fractured_string_builder::format_element(const dom::element& elem,
509 const element_metrics& metrics,
510 size_t depth) {
511 switch (elem.type()) {
512 case dom::element_type::ARRAY: {
513 dom::array arr;
514 if (elem.get_array().get(arr) == SUCCESS) {
515 format_array(arr, metrics, depth);
516 }
517 break;
518 }
519 case dom::element_type::OBJECT: {
520 dom::object obj;
521 if (elem.get_object().get(obj) == SUCCESS) {
522 format_object(obj, metrics, depth);
523 }
524 break;
525 }
526 default:
527 format_scalar(elem);
528 break;
529 }
530}
531
532inline void fractured_string_builder::format_array(const dom::array& arr,
533 const element_metrics& metrics,
534 size_t depth) {
535 switch (metrics.recommended_layout) {
536 case layout_mode::INLINE:
537 format_array_inline(arr, metrics);
538 break;
539 case layout_mode::COMPACT_MULTILINE:
540 format_array_compact_multiline(arr, metrics, depth);
541 break;
542 case layout_mode::TABLE:
543 format_array_as_table(arr, metrics, depth);
544 break;
545 case layout_mode::EXPANDED:
546 default:
547 format_array_expanded(arr, metrics, depth);
548 break;
549 }
550}
551
552inline void fractured_string_builder::format_array_inline(const dom::array& arr,
553 const element_metrics& metrics) {
554 layout_mode prev_layout = format_.get_layout_mode();
555 format_.set_layout_mode(layout_mode::INLINE);
556
557 format_.start_array();
558
559 bool first = true;
560 bool empty = true;
561 size_t child_idx = 0;
562 for (dom::element elem : arr) {
563 empty = false;
564 if (!first) {
565 format_.comma();
566 if (options_.comma_padding) {
567 format_.print_space();
568 }
569 } else if (options_.simple_bracket_padding) {
570 format_.print_space();
571 }
572 first = false;
573 const element_metrics& child_metrics = (child_idx < metrics.children.size())
574 ? metrics.children[child_idx] : element_metrics{};
575 format_element(elem, child_metrics, 0);
576 child_idx++;
577 }
578
579 if (options_.simple_bracket_padding && !empty) {
580 format_.print_space();
581 }
582 format_.end_array();
583
584 format_.set_layout_mode(prev_layout);
585}
586
587inline void fractured_string_builder::format_array_compact_multiline(const dom::array& arr,
588 const element_metrics& metrics,
589 size_t depth) {
590 format_.start_array();
591 format_.print_newline();
592 format_.print_indents(depth + 1);
593
594 size_t items_on_line = 0;
595 bool first = true;
596 size_t child_idx = 0;
597
598 for (dom::element elem : arr) {
599 if (!first) {
600 format_.comma();
601
602 // Check if we should break to new line
603 if (items_on_line >= options_.max_items_per_line ||
604 format_.should_break_line(20)) { // 20 is rough estimate for next item
605 format_.print_newline();
606 format_.print_indents(depth + 1);
607 items_on_line = 0;
608 } else if (options_.comma_padding) {
609 format_.print_space();
610 }
611 }
612 first = false;
613
614 // Format element inline
615 layout_mode prev_layout = format_.get_layout_mode();
616 format_.set_layout_mode(layout_mode::INLINE);
617 const element_metrics& child_metrics = (child_idx < metrics.children.size())
618 ? metrics.children[child_idx] : element_metrics{};
619 format_element(elem, child_metrics, depth + 1);
620 format_.set_layout_mode(prev_layout);
621
622 items_on_line++;
623 child_idx++;
624 }
625
626 format_.print_newline();
627 format_.print_indents(depth);
628 format_.end_array();
629}
630
631inline void fractured_string_builder::format_array_as_table(const dom::array& arr,
632 const element_metrics& metrics,
633 size_t depth) {
634 const std::vector<std::string>& columns = metrics.common_keys;
635 if (columns.empty()) {
636 format_array_expanded(arr, metrics, depth);
637 return;
638 }
639
640 // Calculate column widths for alignment
641 std::vector<size_t> col_widths = calculate_column_widths(arr, columns);
642 format_.set_column_widths(col_widths);
643
644 format_.start_array();
645 format_.print_newline();
646
647 bool first_row = true;
648 size_t child_idx = 0;
649 for (dom::element elem : arr) {
650 if (!first_row) {
651 format_.comma();
652 format_.print_newline();
653 }
654 first_row = false;
655
656 format_.print_indents(depth + 1);
657 format_.begin_table_row();
658
659 // Format object as inline with aligned columns
660 dom::object obj;
661 if (elem.get_object().get(obj) != SUCCESS) {
662 child_idx++;
663 continue;
664 }
665
666 // Get child metrics for this row (object)
667 const element_metrics& row_metrics = (child_idx < metrics.children.size())
668 ? metrics.children[child_idx] : element_metrics{};
669
670 format_.start_object();
671 if (options_.simple_bracket_padding) {
672 format_.print_space();
673 }
674
675 bool first_col = true;
676 const size_t num_columns = columns.size();
677
678 for (size_t col_idx = 0; col_idx < num_columns; col_idx++) {
679 const std::string& key = columns[col_idx];
680 const bool is_last_col = (col_idx == num_columns - 1);
681
682 if (!first_col) {
683 format_.comma();
684 if (options_.comma_padding) {
685 format_.print_space();
686 }
687 }
688 first_col = false;
689
690 // Write key
691 format_.key(key);
692 if (options_.colon_padding) {
693 format_.print_space();
694 }
695
696 // Find the value for this key and its metrics
697 dom::element value;
698 bool found = false;
699 size_t field_idx = 0;
700 for (dom::key_value_pair field : obj) {
701 if (field.key == key) {
702 value = field.value;
703 found = true;
704 break;
705 }
706 field_idx++;
707 }
708
709 // Write value
710 if (found) {
711 layout_mode prev_layout = format_.get_layout_mode();
712 format_.set_layout_mode(layout_mode::INLINE);
713 const element_metrics& value_metrics = (field_idx < row_metrics.children.size())
714 ? row_metrics.children[field_idx] : element_metrics{};
715 format_element(value, value_metrics, depth + 1);
716 format_.set_layout_mode(prev_layout);
717 } else {
718 format_.null_atom();
719 }
720
721 // Only pad non-last columns to align values across rows
722 if (!is_last_col) {
723 size_t actual_len = found ? measure_value_length(value) : 4; // 4 for "null"
724 size_t target_width = col_widths[col_idx];
725 while (actual_len < target_width) {
726 format_.one_char(' ');
727 actual_len++;
728 }
729 }
730
731 format_.next_column();
732 }
733
734 if (options_.simple_bracket_padding) {
735 format_.print_space();
736 }
737 format_.end_object();
738 format_.end_table_row();
739 child_idx++;
740 }
741
742 format_.print_newline();
743 format_.print_indents(depth);
744 format_.end_array();
745}
746
747inline void fractured_string_builder::format_array_expanded(const dom::array& arr,
748 const element_metrics& metrics,
749 size_t depth) {
750 format_.start_array();
751
752 bool empty = true;
753 bool first = true;
754 size_t child_idx = 0;
755
756 for (dom::element elem : arr) {
757 empty = false;
758 if (!first) {
759 format_.comma();
760 }
761 first = false;
762
763 format_.print_newline();
764 format_.print_indents(depth + 1);
765 const element_metrics& child_metrics = (child_idx < metrics.children.size())
766 ? metrics.children[child_idx] : element_metrics{};
767 format_element(elem, child_metrics, depth + 1);
768 child_idx++;
769 }
770
771 if (!empty) {
772 format_.print_newline();
773 format_.print_indents(depth);
774 }
775 format_.end_array();
776}
777
778inline void fractured_string_builder::format_object(const dom::object& obj,
779 const element_metrics& metrics,
780 size_t depth) {
781 if (metrics.recommended_layout == layout_mode::INLINE || metrics.can_inline) {
782 format_object_inline(obj, metrics);
783 } else {
784 format_object_expanded(obj, metrics, depth);
785 }
786}
787
788inline void fractured_string_builder::format_object_inline(const dom::object& obj,
789 const element_metrics& metrics) {
790 layout_mode prev_layout = format_.get_layout_mode();
791 format_.set_layout_mode(layout_mode::INLINE);
792
793 format_.start_object();
794
795 bool empty = true;
796 bool first = true;
797 size_t child_idx = 0;
798
799 for (dom::key_value_pair field : obj) {
800 empty = false;
801 if (!first) {
802 format_.comma();
803 if (options_.comma_padding) {
804 format_.print_space();
805 }
806 } else if (options_.simple_bracket_padding) {
807 format_.print_space();
808 }
809 first = false;
810
811 format_.key(field.key);
812 if (options_.colon_padding) {
813 format_.print_space();
814 }
815 const element_metrics& child_metrics = (child_idx < metrics.children.size())
816 ? metrics.children[child_idx] : element_metrics{};
817 format_element(field.value, child_metrics, 0);
818 child_idx++;
819 }
820
821 if (options_.simple_bracket_padding && !empty) {
822 format_.print_space();
823 }
824 format_.end_object();
825
826 format_.set_layout_mode(prev_layout);
827}
828
829inline void fractured_string_builder::format_object_expanded(const dom::object& obj,
830 const element_metrics& metrics,
831 size_t depth) {
832 format_.start_object();
833
834 bool empty = true;
835 bool first = true;
836 size_t child_idx = 0;
837
838 for (dom::key_value_pair field : obj) {
839 empty = false;
840 if (!first) {
841 format_.comma();
842 }
843 first = false;
844
845 format_.print_newline();
846 format_.print_indents(depth + 1);
847 format_.key(field.key);
848 if (options_.colon_padding) {
849 format_.print_space();
850 }
851 const element_metrics& child_metrics = (child_idx < metrics.children.size())
852 ? metrics.children[child_idx] : element_metrics{};
853 format_element(field.value, child_metrics, depth + 1);
854 child_idx++;
855 }
856
857 if (!empty) {
858 format_.print_newline();
859 format_.print_indents(depth);
860 }
861 format_.end_object();
862}
863
864inline void fractured_string_builder::format_scalar(const dom::element& elem) {
865 switch (elem.type()) {
866 case dom::element_type::STRING: {
867 std::string_view str;
868 if (elem.get_string().get(str) == SUCCESS) {
869 format_.string(str);
870 }
871 break;
872 }
873 case dom::element_type::INT64: {
874 int64_t val;
875 if (elem.get_int64().get(val) == SUCCESS) {
876 format_.number(val);
877 }
878 break;
879 }
880 case dom::element_type::UINT64: {
881 uint64_t val;
882 if (elem.get_uint64().get(val) == SUCCESS) {
883 format_.number(val);
884 }
885 break;
886 }
887 case dom::element_type::DOUBLE: {
888 double val;
889 if (elem.get_double().get(val) == SUCCESS) {
890 format_.number(val);
891 }
892 break;
893 }
894 case dom::element_type::BOOL: {
895 bool val;
896 if (elem.get_bool().get(val) == SUCCESS) {
897 val ? format_.true_atom() : format_.false_atom();
898 }
899 break;
900 }
901 case dom::element_type::NULL_VALUE:
902 format_.null_atom();
903 break;
904 default:
905 break;
906 }
907}
908
909inline size_t fractured_string_builder::measure_value_length(const dom::element& elem) const {
910 switch (elem.type()) {
911 case dom::element_type::STRING: {
912 std::string_view str;
913 if (elem.get_string().get(str) == SUCCESS) {
914 // Count actual escaped length
915 size_t len = 2; // quotes
916 for (char c : str) {
917 if (c == '"' || c == '\\' || static_cast<unsigned char>(c) < 32) {
918 len += 2; // escape sequence
919 } else {
920 len += 1;
921 }
922 }
923 return len;
924 }
925 return 2;
926 }
927 case dom::element_type::INT64: {
928 int64_t val;
929 if (elem.get_int64().get(val) == SUCCESS) {
930 if (val == 0) return 1;
931 // Handle INT64_MIN specially to avoid overflow when negating
932 if (val == INT64_MIN) return 20; // "-9223372036854775808" is 20 characters
933 size_t len = (val < 0) ? 1 : 0;
934 int64_t abs_val = (val < 0) ? -val : val;
935 while (abs_val > 0) { len++; abs_val /= 10; }
936 return len;
937 }
938 return 1;
939 }
940 case dom::element_type::UINT64: {
941 uint64_t val;
942 if (elem.get_uint64().get(val) == SUCCESS) {
943 if (val == 0) return 1;
944 size_t len = 0;
945 while (val > 0) { len++; val /= 10; }
946 return len;
947 }
948 return 1;
949 }
950 case dom::element_type::DOUBLE: {
951 double val;
952 if (elem.get_double().get(val) == SUCCESS) {
953 char buf[32];
954 int len = snprintf(buf, sizeof(buf), "%.17g", val);
955 return len > 0 ? static_cast<size_t>(len) : 1;
956 }
957 return 1;
958 }
959 case dom::element_type::BOOL: {
960 bool val;
961 if (elem.get_bool().get(val) == SUCCESS) {
962 return val ? 4 : 5; // "true" or "false"
963 }
964 return 5;
965 }
966 case dom::element_type::NULL_VALUE:
967 return 4; // "null"
968 default:
969 return 4;
970 }
971}
972
973inline std::vector<size_t> fractured_string_builder::calculate_column_widths(
974 const dom::array& arr,
975 const std::vector<std::string>& columns) const {
976
977 std::vector<size_t> widths(columns.size(), 0);
978
979 for (dom::element elem : arr) {
980 dom::object obj;
981 if (elem.get_object().get(obj) != SUCCESS) {
982 continue;
983 }
984
985 for (size_t col_idx = 0; col_idx < columns.size(); col_idx++) {
986 const std::string& key = columns[col_idx];
987
988 for (dom::key_value_pair field : obj) {
989 if (field.key == key) {
990 // Measure actual value length
991 size_t len = measure_value_length(field.value);
992 widths[col_idx] = (std::max)(widths[col_idx], len);
993 break;
994 }
995 }
996 }
997 }
998
999 return widths;
1000}
1001
1002} // namespace internal
1003
1004//
1005// Public API Implementation
1006//
1007
1008template <class T>
1009std::string fractured_json(T x) {
1011}
1012
1013template <class T>
1014std::string fractured_json(T x, const fractured_json_options& options) {
1015 internal::fractured_string_builder sb(options);
1016 sb.append(x);
1017 std::string_view result = sb.str();
1018 return std::string(result.data(), result.size());
1019}
1020
1021#if SIMDJSON_EXCEPTIONS
1022template <class T>
1023std::string fractured_json(simdjson_result<T> x) {
1024 if (x.error()) {
1025 throw simdjson_error(x.error());
1026 }
1027 return fractured_json(x.value());
1028}
1029
1030template <class T>
1031std::string fractured_json(simdjson_result<T> x, const fractured_json_options& options) {
1032 if (x.error()) {
1033 throw simdjson_error(x.error());
1034 }
1035 return fractured_json(x.value(), options);
1036}
1037#endif
1038
1039// Explicit template instantiations for common types
1040template std::string fractured_json(dom::element x);
1041template std::string fractured_json(dom::element x, const fractured_json_options& options);
1042template std::string fractured_json(dom::array x);
1043template std::string fractured_json(dom::array x, const fractured_json_options& options);
1044template std::string fractured_json(dom::object x);
1045template std::string fractured_json(dom::object x, const fractured_json_options& options);
1046
1047#if SIMDJSON_EXCEPTIONS
1048template std::string fractured_json(simdjson_result<dom::element> x);
1049template std::string fractured_json(simdjson_result<dom::element> x, const fractured_json_options& options);
1050#endif
1051
1052//
1053// String-based API for formatting any JSON string
1054//
1055
1056inline std::string fractured_json_string(std::string_view json_str) {
1058}
1059
1060inline std::string fractured_json_string(std::string_view json_str,
1061 const fractured_json_options& options) {
1062 // Parse the JSON string
1063 dom::parser parser;
1064 dom::element doc;
1065 // Need to pad the string for simdjson
1066 auto padded = padded_string(json_str);
1067 auto error = parser.parse(padded).get(doc);
1068 if (error) {
1069 // If parsing fails, return the original string
1070 return std::string(json_str);
1071 }
1072 return fractured_json(doc, options);
1073}
1074
1075} // namespace simdjson
1076
1077#endif // SIMDJSON_DOM_FRACTURED_JSON_INL_H
A JSON element.
Definition element.h:33
A persistent document parser.
Definition parser.h:30
simdjson_result< element > parse(const uint8_t *buf, size_t len, bool realloc_if_needed=true) &noexcept
Parse a JSON document and return a temporary reference to it.
Definition parser-inl.h:153
@ STRING
std::string_view
@ UINT64
uint64_t: any integer that fits in uint64_t but not int64_t
@ DOUBLE
double: Any number with a "." or "e" that fits in double.
The top level simdjson namespace, containing everything the library provides.
Definition base.h:8
std::string fractured_json_string(std::string_view json_str)
Format a JSON string using FracturedJson formatting.
std::string fractured_json(T x)
Format JSON using FracturedJson formatting with default options.
@ SUCCESS
No error.
Definition error.h:20
Configuration options for FracturedJson formatting.
String with extra allocation for ease of use with parser::parse()