|
4 | 4 | TODO: Handle optional parameters and allow for schema evolution. https://github.com/edx/edx-arch-experiments/issues/53 |
5 | 5 | """ |
6 | 6 |
|
7 | | - |
8 | | -from typing import get_args, get_origin |
| 7 | +from typing import Any, Type, get_args, get_origin |
9 | 8 |
|
10 | 9 | from .custom_serializers import DEFAULT_CUSTOM_SERIALIZERS |
11 | 10 | from .types import PYTHON_TYPE_TO_AVRO_MAPPING, SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING |
@@ -74,37 +73,19 @@ def _create_avro_field_definition(data_key, data_type, previously_seen_types, |
74 | 73 | raise Exception("Unable to generate Avro schema for dict or array fields without annotation types.") |
75 | 74 | avro_type = PYTHON_TYPE_TO_AVRO_MAPPING[data_type] |
76 | 75 | field["type"] = avro_type |
77 | | - elif data_type_origin == list: |
78 | | - # Returns types of list contents. |
79 | | - # Example: if data_type == List[int], arg_data_type = (int,) |
80 | | - arg_data_type = get_args(data_type) |
81 | | - if not arg_data_type: |
82 | | - raise TypeError( |
83 | | - "List without annotation type is not supported. The argument should be a type, for eg., List[int]" |
84 | | - ) |
85 | | - avro_type = SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING.get(arg_data_type[0]) |
86 | | - if avro_type is None: |
87 | | - raise TypeError( |
88 | | - "Only following types are supported for list arguments:" |
89 | | - f" {set(SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING.keys())}" |
90 | | - ) |
91 | | - field["type"] = {"type": PYTHON_TYPE_TO_AVRO_MAPPING[data_type_origin], "items": avro_type} |
92 | | - elif data_type_origin == dict: |
93 | | - # Returns types of dict contents. |
94 | | - # Example: if data_type == Dict[str, int], arg_data_type = (str, int) |
95 | | - arg_data_type = get_args(data_type) |
96 | | - if not arg_data_type: |
97 | | - raise TypeError( |
98 | | - "Dict without annotation type is not supported. The argument should be a type, for eg., Dict[str, int]" |
99 | | - ) |
100 | | - avro_type = SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING.get(arg_data_type[1]) |
101 | | - if avro_type is None: |
102 | | - raise TypeError( |
103 | | - "Only following types are supported for dict arguments:" |
104 | | - f" {set(SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING.keys())}" |
105 | | - ) |
106 | | - field["type"] = {"type": PYTHON_TYPE_TO_AVRO_MAPPING[data_type_origin], "values": avro_type} |
107 | | - # Case 3: data_type is an attrs class |
| 76 | + # Case 3: data_type is a list (possibly with complex items) |
| 77 | + elif data_type_origin is list: |
| 78 | + item_avro_type = _get_avro_type_for_list_item( |
| 79 | + data_type, previously_seen_types, all_field_type_overrides |
| 80 | + ) |
| 81 | + field["type"] = {"type": "array", "items": item_avro_type} |
| 82 | + # Case 4: data_type is a dictionary (possibly with complex values) |
| 83 | + elif data_type_origin is dict: |
| 84 | + item_avro_type = _get_avro_type_for_dict_item( |
| 85 | + data_type, previously_seen_types, all_field_type_overrides |
| 86 | + ) |
| 87 | + field["type"] = {"type": "map", "values": item_avro_type} |
| 88 | + # Case 5: data_type is an attrs class |
108 | 89 | elif hasattr(data_type, "__attrs_attrs__"): |
109 | 90 | # Inner Attrs Class |
110 | 91 |
|
@@ -135,3 +116,129 @@ def _create_avro_field_definition(data_key, data_type, previously_seen_types, |
135 | 116 | single_type = field["type"] |
136 | 117 | field["type"] = ["null", single_type] |
137 | 118 | return field |
| 119 | + |
| 120 | + |
| 121 | +def _get_avro_type_for_dict_item( |
| 122 | + data_type: Type[dict], previously_seen_types: set, type_overrides: dict[Any, str] |
| 123 | +) -> str | dict[str, str]: |
| 124 | + """ |
| 125 | + Determine the Avro type definition for a dictionary value based on its Python type. |
| 126 | +
|
| 127 | + This function converts Python dictionary value types to their corresponding |
| 128 | + Avro type representations. It supports simple types, complex nested types (like |
| 129 | + dictionaries and lists), and custom classes decorated with attrs. |
| 130 | +
|
| 131 | + Args: |
| 132 | + data_type (Type[dict]): The Python dictionary type with its type annotation |
| 133 | + (e.g., Dict[str, str], Dict[str, int], Dict[str, List[str]]) |
| 134 | + previously_seen_types (set): Set of type names that have already been |
| 135 | + processed, used to prevent duplicate record definitions |
| 136 | + type_overrides (dict[Any, str]): Dictionary mapping custom Python types to |
| 137 | + their Avro type representations |
| 138 | +
|
| 139 | + Returns: |
| 140 | + One of the following Avro type representations: |
| 141 | + - A string (e.g., "string", "int", "boolean") for simple types |
| 142 | + - A dictionary with a complex type definition for container types, such as: |
| 143 | + - {"type": "array", "items": <avro_type>} for lists |
| 144 | + - {"type": "map", "values": <avro_type>} for nested dictionaries |
| 145 | + - {"name": "<TypeName>", "type": "record", "fields": [...]} for attrs classes |
| 146 | + - A string with a record name for previously defined record types |
| 147 | +
|
| 148 | + Raises: |
| 149 | + TypeError: If the dictionary has no type annotation, has non-string keys, |
| 150 | + or contains unsupported value types |
| 151 | + """ |
| 152 | + # Validate dict has type annotation |
| 153 | + # Example: if data_type == Dict[str, int], arg_data_type = (str, int) |
| 154 | + arg_data_type = get_args(data_type) |
| 155 | + if not arg_data_type: |
| 156 | + raise TypeError( |
| 157 | + "Dict without annotation type is not supported. The argument should be a type, e.g. Dict[str, int]" |
| 158 | + ) |
| 159 | + |
| 160 | + value_type = arg_data_type[1] |
| 161 | + |
| 162 | + # Case 1: Simple types mapped in SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING |
| 163 | + avro_type = SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING.get(value_type) |
| 164 | + if avro_type is not None: |
| 165 | + return avro_type |
| 166 | + |
| 167 | + # Case 2: Complex types (dict, list, or attrs class) |
| 168 | + if get_origin(value_type) in (dict, list) or hasattr(value_type, "__attrs_attrs__"): |
| 169 | + # Create a temporary field for the value type and extract its type definition |
| 170 | + temp_field = _create_avro_field_definition("temp", value_type, previously_seen_types, type_overrides) |
| 171 | + return temp_field["type"] |
| 172 | + |
| 173 | + # Case 3: Unannotated containers (raise specific errors) |
| 174 | + if value_type is dict: |
| 175 | + raise TypeError("A Dictionary as a dictionary value should have a type annotation.") |
| 176 | + if value_type is list: |
| 177 | + raise TypeError("A List as a dictionary value should have a type annotation.") |
| 178 | + |
| 179 | + # Case 4: Unsupported types |
| 180 | + raise TypeError(f"Type {value_type} is not supported for dict values.") |
| 181 | + |
| 182 | + |
| 183 | +def _get_avro_type_for_list_item( |
| 184 | + data_type: Type[list], previously_seen_types: set, type_overrides: dict[Any, str] |
| 185 | +) -> str | dict[str, str]: |
| 186 | + """ |
| 187 | + Determine the Avro type definition for a list item based on its Python type. |
| 188 | +
|
| 189 | + This function handles conversion of various Python types that can be |
| 190 | + contained within a list to their corresponding Avro type representations. |
| 191 | + It supports simple types, complex nested types (like dictionaries and lists), |
| 192 | + and custom classes decorated with attrs. |
| 193 | +
|
| 194 | + Args: |
| 195 | + data_type (Type[list]): The Python list type with its type annotation |
| 196 | + (e.g., List[str], List[int], List[Dict[str, str]], etc.) |
| 197 | + previously_seen_types (set): Set of type names that have already been |
| 198 | + processed, used to prevent duplicate record definitions |
| 199 | + type_overrides (dict[Any, str]): Dictionary mapping custom Python types |
| 200 | + to their Avro type representations |
| 201 | +
|
| 202 | + Returns: |
| 203 | + One of the following Avro type representations: |
| 204 | + - A string (e.g., "string", "long", "boolean") for simple types |
| 205 | + - A dictionary with a complex type definition for container types, such as: |
| 206 | + - {"type": "array", "items": <avro_type>} for lists |
| 207 | + - {"type": "map", "values": <avro_type>} for dictionaries |
| 208 | + - {"name": "<TypeName>", "type": "record", "fields": [...]} for attrs classes |
| 209 | + - A string with a record name for previously defined record types |
| 210 | +
|
| 211 | + Raises: |
| 212 | + TypeError: If the list has no type annotation, contains unsupported |
| 213 | + types, or contains containers (dict, list) without proper type |
| 214 | + annotations |
| 215 | + """ |
| 216 | + # Validate list has type annotation |
| 217 | + # Example: if data_type == List[int], arg_data_type = (int,) |
| 218 | + arg_data_type = get_args(data_type) |
| 219 | + if not arg_data_type: |
| 220 | + raise TypeError( |
| 221 | + "List without annotation type is not supported. The argument should be a type, e.g. List[int]" |
| 222 | + ) |
| 223 | + |
| 224 | + item_type = arg_data_type[0] |
| 225 | + |
| 226 | + # Case 1: Simple types mapped in SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING |
| 227 | + avro_type = SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING.get(item_type) |
| 228 | + if avro_type is not None: |
| 229 | + return avro_type |
| 230 | + |
| 231 | + # Case 2: Complex types (dict, list, or attrs class) |
| 232 | + if get_origin(item_type) in (dict, list) or hasattr(item_type, "__attrs_attrs__"): |
| 233 | + # Create a temporary field for the value type and extract its type definition |
| 234 | + temp_field = _create_avro_field_definition("temp", item_type, previously_seen_types, type_overrides) |
| 235 | + return temp_field["type"] |
| 236 | + |
| 237 | + # Case 3: Unannotated containers (raise specific errors) |
| 238 | + if item_type is dict: |
| 239 | + raise TypeError("A Dictionary as a list item should have a type annotation.") |
| 240 | + if item_type is list: |
| 241 | + raise TypeError("A List as a list item should have a type annotation.") |
| 242 | + |
| 243 | + # Case 4: Unsupported types |
| 244 | + raise TypeError(f"Type {item_type} is not supported for list items.") |
0 commit comments