22
33from __future__ import annotations
44
5+ from dataclasses import dataclass
56from typing import Any
67
78from .scalar import format_scalar , format_key
89
910
10- def encode_generic (data : Any ) -> str :
11+ @dataclass
12+ class GenericOptions :
13+ """Options for controlling generic encoding behavior."""
14+ no_flatten : bool = False
15+ """When True, disables promotion of fixed-shape nested objects to path
16+ columns (e.g. "customer>name"). Nested objects use attachment syntax
17+ instead. Set when targeting open-weight models that show lower
18+ comprehension on flattened encoding."""
19+
20+
21+ def encode_generic (data : Any , opts : GenericOptions | None = None ) -> str :
22+ if opts is None :
23+ opts = GenericOptions ()
1124 out : list [str ] = ["GCF profile=generic" ]
12- _encode_root_value (data , out )
25+ _encode_root_value (data , out , opts )
1326 return "\n " .join (out ) + "\n "
1427
1528
16- def _encode_root_value (v : Any , out : list [str ]) -> None :
29+ def _encode_root_value (v : Any , out : list [str ], opts : GenericOptions ) -> None :
1730 if v is None :
1831 out .append ("=-" )
1932 elif isinstance (v , dict ):
20- _encode_object (v , out , 0 )
33+ _encode_object (v , out , 0 , opts )
2134 elif isinstance (v , list ):
22- _encode_root_array (v , out )
35+ _encode_root_array (v , out , opts )
2336 else :
2437 out .append (f"={ format_scalar (v )} " )
2538
2639
27- def _encode_object (d : dict , out : list [str ], depth : int ) -> None :
40+ def _encode_object (d : dict , out : list [str ], depth : int , opts : GenericOptions ) -> None :
2841 prefix = _indent (depth )
2942 for key , value in d .items ():
3043 fk = format_key (key )
3144 if isinstance (value , dict ):
3245 out .append (f"{ prefix } ## { fk } " )
33- _encode_object (value , out , depth + 1 )
46+ _encode_object (value , out , depth + 1 , opts )
3447 elif isinstance (value , list ):
35- _encode_named_array (fk , value , out , depth )
48+ _encode_named_array (fk , value , out , depth , opts )
3649 else :
3750 out .append (f"{ prefix } { fk } ={ format_scalar (value )} " )
3851
3952
40- def _encode_root_array (arr : list , out : list [str ]) -> None :
53+ def _encode_root_array (arr : list , out : list [str ], opts : GenericOptions ) -> None :
4154 if not arr :
4255 out .append ("## [0]" )
4356 return
@@ -47,12 +60,12 @@ def _encode_root_array(arr: list, out: list[str]) -> None:
4760 return
4861 fields = _tabular_fields (arr )
4962 if fields is not None :
50- _encode_tabular ("## " , arr , fields , out , 0 )
63+ _encode_tabular ("## " , arr , fields , out , 0 , opts )
5164 return
52- _encode_expanded ("## " , arr , out , 0 )
65+ _encode_expanded ("## " , arr , out , 0 , opts )
5366
5467
55- def _encode_named_array (name : str , arr : list , out : list [str ], depth : int ) -> None :
68+ def _encode_named_array (name : str , arr : list , out : list [str ], depth : int , opts : GenericOptions ) -> None :
5669 prefix = _indent (depth )
5770 if not arr :
5871 out .append (f"{ prefix } ## { name } [0]" )
@@ -63,9 +76,9 @@ def _encode_named_array(name: str, arr: list, out: list[str], depth: int) -> Non
6376 return
6477 fields = _tabular_fields (arr )
6578 if fields is not None :
66- _encode_tabular (f"{ prefix } ## { name } " , arr , fields , out , depth )
79+ _encode_tabular (f"{ prefix } ## { name } " , arr , fields , out , depth , opts )
6780 return
68- _encode_expanded (f"{ prefix } ## { name } " , arr , out , depth )
81+ _encode_expanded (f"{ prefix } ## { name } " , arr , out , depth , opts )
6982
7083
7184def _tabular_fields (arr : list ) -> list [str ] | None :
@@ -152,6 +165,9 @@ def _analyze_flattenable(
152165 arr : list [dict ], field_name : str , parent_path : str
153166) -> list [dict ] | None :
154167 """Analyze whether a field can be flattened. Returns list of leaf descriptors or None."""
168+ # Field names containing ">" cannot be flattened (would create ambiguous paths).
169+ if ">" in field_name :
170+ return None
155171 canonical_shape : dict [str , str ] | None = None # key -> "scalar" | "nested"
156172
157173 for item in arr :
@@ -249,26 +265,39 @@ def _resolve_key_chain(item: Any, keys: list[str]) -> tuple[Any, bool]:
249265
250266
251267def _encode_tabular (
252- header_prefix : str , arr : list [dict ], fields : list [str ], out : list [str ], depth : int
268+ header_prefix : str , arr : list [dict ], fields : list [str ], out : list [str ], depth : int , opts : GenericOptions
253269) -> None :
254270 prefix = _indent (depth )
255271
256272 # Phase 0: Analyze fields for flattening.
257273 flatten_map : dict [str , list [dict ]] = {}
258- for f in fields :
259- leaves = _analyze_flattenable (arr , f , "" )
260- if leaves and len (leaves ) > 0 :
261- flatten_map [f ] = leaves
274+ if not opts .no_flatten :
275+ for f in fields :
276+ leaves = _analyze_flattenable (arr , f , "" )
277+ if leaves and len (leaves ) > 0 :
278+ flatten_map [f ] = leaves
279+
280+ # Fields whose names contain ">" must not appear as tabular columns
281+ # because the decoder would interpret them as flattened path columns.
282+ # Track them for per-row attachment emission (spec rule 7.4.6.1.4).
283+ gt_fields = {f for f in fields if f not in flatten_map and ">" in f }
262284
263285 # Build expanded column list.
264286 columns : list [dict ] = []
265287 for f in fields :
288+ if f in gt_fields :
289+ continue
266290 if f in flatten_map :
267291 for leaf in flatten_map [f ]:
268292 columns .append ({"header" : format_key (leaf ["path" ]), "type" : "flat" , "field" : f , "keys" : leaf ["keys" ]})
269293 else :
270294 columns .append ({"header" : format_key (f ), "type" : "original" , "field" : f , "keys" : []})
271295
296+ # If all fields were excluded (all contain ">"), fall back to expanded.
297+ if not columns :
298+ _encode_expanded (header_prefix , arr , out , depth , opts )
299+ return
300+
272301 # Pre-compute inline schemas and shared array schemas (skip flattened fields).
273302 inline_schemas : dict [str , list [str ]] = {}
274303 shared_arr_schemas : dict [str , list [str ]] = {}
@@ -333,6 +362,15 @@ def _encode_tabular(
333362 else :
334363 cells .append (format_scalar (v , "|" ))
335364
365+ # Emit fields with ">" in their names as per-row attachments.
366+ for f in fields :
367+ if f not in gt_fields :
368+ continue
369+ if f not in item :
370+ continue
371+ row_has_attachment = True
372+ attachments .append ((f , item [f ], False , None ))
373+
336374 row = "|" .join (cells )
337375 if row_has_attachment :
338376 out .append (f"{ prefix } @{ i } { row } " )
@@ -351,17 +389,25 @@ def _encode_tabular(
351389 elif isinstance (att_val , list ):
352390 sas = shared_arr_schemas .get (att_name )
353391 if sas and i > 0 :
354- _encode_attachment_array_shared (prefix , fk , att_val , out , depth + 2 , sas )
392+ _encode_attachment_array_shared (prefix , fk , att_val , out , depth + 2 , sas , opts )
355393 else :
356- _encode_attachment_array (prefix , fk , att_val , out , depth + 2 )
394+ _encode_attachment_array (prefix , fk , att_val , out , depth + 2 , opts )
357395 elif isinstance (att_val , dict ):
358396 out .append (f"{ prefix } .{ fk } {{}}" )
359- _encode_object (att_val , out , depth + 2 )
397+ _encode_object (att_val , out , depth + 2 , opts )
398+ else :
399+ # Scalar attachment (e.g. field names containing ">").
400+ if att_val is None :
401+ out .append (f"{ prefix } .{ fk } =-" )
402+ else :
403+ out .append (f"{ prefix } .{ fk } ={ format_scalar (att_val )} " )
360404
361405
362406def _encode_attachment_array (
363- att_prefix : str , fk : str , arr : list , out : list [str ], depth : int
407+ att_prefix : str , fk : str , arr : list , out : list [str ], depth : int , opts : GenericOptions | None = None
364408) -> None :
409+ if opts is None :
410+ opts = GenericOptions ()
365411 if not arr :
366412 out .append (f"{ att_prefix } .{ fk } [0]" )
367413 elif _all_primitives (arr ):
@@ -370,13 +416,13 @@ def _encode_attachment_array(
370416 else :
371417 fields = _tabular_fields (arr )
372418 if fields is not None :
373- _encode_tabular (f"{ att_prefix } .{ fk } " , arr , fields , out , depth )
419+ _encode_tabular (f"{ att_prefix } .{ fk } " , arr , fields , out , depth , opts )
374420 else :
375- _encode_expanded (f"{ att_prefix } .{ fk } " , arr , out , depth )
421+ _encode_expanded (f"{ att_prefix } .{ fk } " , arr , out , depth , opts )
376422
377423
378424def _encode_attachment_array_shared (
379- att_prefix : str , fk : str , arr : list , out : list [str ], depth : int , shared_fields : list [str ]
425+ att_prefix : str , fk : str , arr : list , out : list [str ], depth : int , shared_fields : list [str ], opts : GenericOptions | None = None
380426) -> None :
381427 if not arr :
382428 out .append (f"{ att_prefix } .{ fk } [0]" )
@@ -403,25 +449,29 @@ def _encode_attachment_array_shared(
403449 out .append (f"{ prefix } { '|' .join (cells )} " )
404450 else :
405451 # Fields don't match: fall back to full encoding.
406- _encode_attachment_array (att_prefix , fk , arr , out , depth )
452+ _encode_attachment_array (att_prefix , fk , arr , out , depth , opts )
407453
408454
409- def _encode_expanded (header_prefix : str , arr : list , out : list [str ], depth : int ) -> None :
455+ def _encode_expanded (header_prefix : str , arr : list , out : list [str ], depth : int , opts : GenericOptions | None = None ) -> None :
456+ if opts is None :
457+ opts = GenericOptions ()
410458 prefix = _indent (depth )
411459 out .append (f"{ header_prefix } [{ len (arr )} ]" )
412460 for i , item in enumerate (arr ):
413461 if isinstance (item , dict ):
414462 out .append (f"{ prefix } @{ i } {{}}" )
415- _encode_object (item , out , depth + 1 )
463+ _encode_object (item , out , depth + 1 , opts )
416464 elif isinstance (item , list ):
417- _encode_expanded_array_item (prefix , i , item , out , depth )
465+ _encode_expanded_array_item (prefix , i , item , out , depth , opts )
418466 else :
419467 out .append (f"{ prefix } @{ i } ={ format_scalar (item )} " )
420468
421469
422470def _encode_expanded_array_item (
423- prefix : str , idx : int , arr : list , out : list [str ], depth : int
471+ prefix : str , idx : int , arr : list , out : list [str ], depth : int , opts : GenericOptions | None = None
424472) -> None :
473+ if opts is None :
474+ opts = GenericOptions ()
425475 if not arr :
426476 out .append (f"{ prefix } @{ idx } [0]" )
427477 elif _all_primitives (arr ):
@@ -430,9 +480,9 @@ def _encode_expanded_array_item(
430480 else :
431481 fields = _tabular_fields (arr )
432482 if fields is not None :
433- _encode_tabular (f"{ prefix } @{ idx } " , arr , fields , out , depth + 1 )
483+ _encode_tabular (f"{ prefix } @{ idx } " , arr , fields , out , depth + 1 , opts )
434484 else :
435- _encode_expanded (f"{ prefix } @{ idx } " , arr , out , depth + 1 )
485+ _encode_expanded (f"{ prefix } @{ idx } " , arr , out , depth + 1 , opts )
436486
437487
438488def _all_primitives (arr : list ) -> bool :
0 commit comments