66use indexmap:: IndexMap ;
77
88use crate :: error:: Error ;
9- use crate :: schema:: { SchemaRef , StructField , StructType } ;
9+ use crate :: expressions:: ColumnName ;
10+ use crate :: schema:: { DataType , SchemaRef , StructField , StructType } ;
1011use crate :: DeltaResult ;
1112
1213/// A schema evolution operation to be applied during ALTER TABLE.
@@ -18,6 +19,60 @@ use crate::DeltaResult;
1819pub ( crate ) enum SchemaOperation {
1920 /// Add a top-level column.
2021 AddColumn { field : StructField } ,
22+
23+ /// Change a column's nullability from NOT NULL to nullable.
24+ SetNullable { path : ColumnName } ,
25+ }
26+
27+ // Helper to modify a nested column. For each component in `remaining`, locates the matching
28+ // field (case-insensitive), then descends into the next nested struct. At the leaf, applies
29+ // `modifier` to produce the replacement field. Returns the modified field list. `full_path` is
30+ // used only in error messages.
31+ //
32+ // Unlike `StructType::walk_column_fields_by` which is iterative and read-only, this must be
33+ // recursive so it can rebuild parent structs bottom-up after modifying the leaf.
34+ //
35+ // Returns an error if a field in the path does not exist or an intermediate field is not a struct.
36+ fn modify_field_at_path (
37+ mut fields : Vec < StructField > ,
38+ full_path : & [ String ] ,
39+ remaining : & [ String ] ,
40+ modifier : & dyn Fn ( StructField ) -> DeltaResult < StructField > ,
41+ ) -> DeltaResult < Vec < StructField > > {
42+ let ( first, rest) = remaining
43+ . split_first ( )
44+ . ok_or_else ( || Error :: internal_error ( "modify_field_at_path called with empty path" ) ) ?;
45+
46+ // Delta column names are case-insensitive
47+ let idx = fields
48+ . iter ( )
49+ . position ( |f| f. name ( ) . eq_ignore_ascii_case ( first) )
50+ . ok_or_else ( || {
51+ Error :: generic ( format ! (
52+ "Column '{}' does not exist" ,
53+ ColumnName :: new( full_path. iter( ) )
54+ ) )
55+ } ) ?;
56+
57+ if rest. is_empty ( ) {
58+ let original = fields. remove ( idx) ;
59+ fields. insert ( idx, modifier ( original) ?) ;
60+ } else {
61+ // Take ownership of the field to avoid cloning inner struct fields
62+ let mut field = fields. remove ( idx) ;
63+ let DataType :: Struct ( inner) = field. data_type else {
64+ return Err ( Error :: generic ( format ! (
65+ "Column '{}' is not a struct; cannot traverse into it" ,
66+ ColumnName :: new( full_path. iter( ) )
67+ ) ) ) ;
68+ } ;
69+ // into_fields() gives owned fields -- no cloning
70+ let new_inner_fields =
71+ modify_field_at_path ( inner. into_fields ( ) . collect ( ) , full_path, rest, modifier) ?;
72+ field. data_type = DataType :: Struct ( Box :: new ( StructType :: try_new ( new_inner_fields) ?) ) ;
73+ fields. insert ( idx, field) ;
74+ }
75+ Ok ( fields)
2176}
2277
2378/// The result of applying schema operations.
@@ -68,6 +123,43 @@ pub(crate) fn apply_schema_operations(
68123 }
69124 fields. insert ( key, field) ;
70125 }
126+ SchemaOperation :: SetNullable { path } => {
127+ let segments = path. path ( ) ;
128+ let key = segments
129+ . first ( )
130+ . ok_or_else ( || Error :: generic ( "Cannot set nullable: empty column path" ) ) ?
131+ . to_lowercase ( ) ;
132+ let field = fields. get_mut ( & key) . ok_or_else ( || {
133+ Error :: generic ( format ! (
134+ "Cannot set nullable on column '{path}': column does not exist"
135+ ) )
136+ } ) ?;
137+ if segments. len ( ) == 1 {
138+ field. nullable = true ;
139+ } else {
140+ // Take ownership of data_type to avoid cloning inner fields
141+ let data_type = std:: mem:: replace ( & mut field. data_type , DataType :: BOOLEAN ) ;
142+ let DataType :: Struct ( inner) = data_type else {
143+ field. data_type = data_type;
144+ return Err ( Error :: generic ( format ! (
145+ "Column '{}' is not a struct; cannot traverse into it \
146+ while resolving '{path}'",
147+ segments[ 0 ]
148+ ) ) ) ;
149+ } ;
150+ let modified_inner = modify_field_at_path (
151+ inner. into_fields ( ) . collect ( ) ,
152+ segments,
153+ & segments[ 1 ..] ,
154+ & |mut f| {
155+ f. nullable = true ;
156+ Ok ( f)
157+ } ,
158+ ) ?;
159+ field. data_type =
160+ DataType :: Struct ( Box :: new ( StructType :: try_new ( modified_inner) ?) ) ;
161+ }
162+ }
71163 }
72164 }
73165
@@ -80,6 +172,7 @@ pub(crate) fn apply_schema_operations(
80172#[ cfg( test) ]
81173mod tests {
82174 use super :: * ;
175+ use crate :: expressions:: column_name;
83176 use crate :: schema:: { DataType , StructField , StructType } ;
84177
85178 fn simple_schema ( ) -> StructType {
@@ -90,6 +183,103 @@ mod tests {
90183 . unwrap ( )
91184 }
92185
186+ fn nested_schema ( ) -> StructType {
187+ StructType :: try_new ( vec ! [
188+ StructField :: not_null( "id" , DataType :: INTEGER ) ,
189+ StructField :: nullable(
190+ "address" ,
191+ StructType :: try_new( vec![
192+ StructField :: not_null( "city" , DataType :: STRING ) ,
193+ StructField :: nullable( "zip" , DataType :: STRING ) ,
194+ ] )
195+ . unwrap( ) ,
196+ ) ,
197+ ] )
198+ . unwrap ( )
199+ }
200+
201+ // === modify_field_at_path tests ===
202+
203+ #[ test]
204+ fn modify_top_level_field ( ) {
205+ let fields: Vec < StructField > = simple_schema ( ) . into_fields ( ) . collect ( ) ;
206+ let path = vec ! [ "id" . to_string( ) ] ;
207+ let result = modify_field_at_path ( fields, & path, & path, & |mut f| {
208+ f. nullable = true ;
209+ Ok ( f)
210+ } )
211+ . unwrap ( ) ;
212+ let id = result. iter ( ) . find ( |f| f. name ( ) == "id" ) . unwrap ( ) ;
213+ assert ! ( id. is_nullable( ) ) ;
214+ }
215+
216+ #[ test]
217+ fn modify_nested_field ( ) {
218+ let fields: Vec < StructField > = nested_schema ( ) . into_fields ( ) . collect ( ) ;
219+ let path = vec ! [ "address" . to_string( ) , "city" . to_string( ) ] ;
220+ let result = modify_field_at_path ( fields, & path, & path, & |mut f| {
221+ f. nullable = true ;
222+ Ok ( f)
223+ } )
224+ . unwrap ( ) ;
225+ let addr = result. iter ( ) . find ( |f| f. name ( ) == "address" ) . unwrap ( ) ;
226+ match addr. data_type ( ) {
227+ DataType :: Struct ( s) => assert ! ( s. field( "city" ) . unwrap( ) . is_nullable( ) ) ,
228+ other => panic ! ( "Expected Struct, got: {other:?}" ) ,
229+ }
230+ }
231+
232+ #[ test]
233+ fn modify_preserves_sibling_fields ( ) {
234+ let fields: Vec < StructField > = nested_schema ( ) . into_fields ( ) . collect ( ) ;
235+ let path = vec ! [ "address" . to_string( ) , "city" . to_string( ) ] ;
236+ let result = modify_field_at_path ( fields, & path, & path, & |mut f| {
237+ f. nullable = true ;
238+ Ok ( f)
239+ } )
240+ . unwrap ( ) ;
241+ // "id" should be unchanged
242+ let id = result. iter ( ) . find ( |f| f. name ( ) == "id" ) . unwrap ( ) ;
243+ assert ! ( !id. is_nullable( ) ) ;
244+ // "zip" sibling should be unchanged
245+ let addr = result. iter ( ) . find ( |f| f. name ( ) == "address" ) . unwrap ( ) ;
246+ match addr. data_type ( ) {
247+ DataType :: Struct ( s) => assert ! ( s. field( "zip" ) . unwrap( ) . is_nullable( ) ) ,
248+ other => panic ! ( "Expected Struct, got: {other:?}" ) ,
249+ }
250+ }
251+
252+ #[ test]
253+ fn modify_nonexistent_field_fails ( ) {
254+ let fields: Vec < StructField > = simple_schema ( ) . into_fields ( ) . collect ( ) ;
255+ let path = vec ! [ "nope" . to_string( ) ] ;
256+ let err = modify_field_at_path ( fields, & path, & path, & |f| Ok ( f) ) . unwrap_err ( ) ;
257+ assert ! ( err. to_string( ) . contains( "does not exist" ) ) ;
258+ }
259+
260+ #[ test]
261+ fn modify_through_non_struct_fails ( ) {
262+ let fields: Vec < StructField > = simple_schema ( ) . into_fields ( ) . collect ( ) ;
263+ let path = vec ! [ "name" . to_string( ) , "inner" . to_string( ) ] ;
264+ let err = modify_field_at_path ( fields, & path, & path, & |f| Ok ( f) ) . unwrap_err ( ) ;
265+ assert ! ( err. to_string( ) . contains( "not a struct" ) ) ;
266+ }
267+
268+ #[ test]
269+ fn modify_case_insensitive_lookup ( ) {
270+ let fields: Vec < StructField > = simple_schema ( ) . into_fields ( ) . collect ( ) ;
271+ let path = vec ! [ "ID" . to_string( ) ] ;
272+ let result = modify_field_at_path ( fields, & path, & path, & |mut f| {
273+ f. nullable = true ;
274+ Ok ( f)
275+ } )
276+ . unwrap ( ) ;
277+ let id = result. iter ( ) . find ( |f| f. name ( ) == "id" ) . unwrap ( ) ;
278+ assert ! ( id. is_nullable( ) ) ;
279+ }
280+
281+ // === apply_schema_operations tests ===
282+
93283 #[ test]
94284 fn add_nullable_column_succeeds ( ) {
95285 let ops = vec ! [ SchemaOperation :: AddColumn {
@@ -164,4 +354,123 @@ mod tests {
164354 let names: Vec < & String > = result. schema . fields ( ) . map ( |f| f. name ( ) ) . collect ( ) ;
165355 assert_eq ! ( names, vec![ "id" , "name" , "email" ] ) ;
166356 }
357+
358+ // === apply_schema_operations: SetNullable tests ===
359+
360+ #[ test]
361+ fn set_nullable_on_required_field ( ) {
362+ let ops = vec ! [ SchemaOperation :: SetNullable {
363+ path: column_name!( "id" ) ,
364+ } ] ;
365+ let result = apply_schema_operations ( simple_schema ( ) , ops) . unwrap ( ) ;
366+ let id_field = result. schema . field ( "id" ) . unwrap ( ) ;
367+ assert ! ( id_field. is_nullable( ) ) ;
368+ }
369+
370+ #[ test]
371+ fn set_nullable_already_nullable_is_noop ( ) {
372+ let ops = vec ! [ SchemaOperation :: SetNullable {
373+ path: column_name!( "name" ) ,
374+ } ] ;
375+ let result = apply_schema_operations ( simple_schema ( ) , ops) . unwrap ( ) ;
376+ let name_field = result. schema . field ( "name" ) . unwrap ( ) ;
377+ assert ! ( name_field. is_nullable( ) ) ;
378+ }
379+
380+ #[ test]
381+ fn set_nullable_case_insensitive ( ) {
382+ let ops = vec ! [ SchemaOperation :: SetNullable {
383+ path: column_name!( "ID" ) ,
384+ } ] ;
385+ let result = apply_schema_operations ( simple_schema ( ) , ops) . unwrap ( ) ;
386+ let id_field = result. schema . field ( "id" ) . unwrap ( ) ;
387+ assert ! ( id_field. is_nullable( ) ) ;
388+ }
389+
390+ #[ test]
391+ fn set_nullable_deeply_nested_field ( ) {
392+ let schema = StructType :: try_new ( vec ! [
393+ StructField :: not_null( "id" , DataType :: INTEGER ) ,
394+ StructField :: nullable(
395+ "address" ,
396+ StructType :: try_new( vec![ StructField :: nullable(
397+ "location" ,
398+ StructType :: try_new( vec![ StructField :: not_null( "zipcode" , DataType :: STRING ) ] )
399+ . unwrap( ) ,
400+ ) ] )
401+ . unwrap( ) ,
402+ ) ,
403+ ] )
404+ . unwrap ( ) ;
405+ let ops = vec ! [ SchemaOperation :: SetNullable {
406+ path: column_name!( "address.location.zipcode" ) ,
407+ } ] ;
408+ let result = apply_schema_operations ( schema, ops) . unwrap ( ) ;
409+ let addr = result. schema . field ( "address" ) . unwrap ( ) ;
410+ match addr. data_type ( ) {
411+ DataType :: Struct ( s) => match s. field ( "location" ) . unwrap ( ) . data_type ( ) {
412+ DataType :: Struct ( loc) => {
413+ let zip = loc. field ( "zipcode" ) . unwrap ( ) ;
414+ assert ! ( zip. is_nullable( ) ) ;
415+ }
416+ other => panic ! ( "Expected Struct, got: {other:?}" ) ,
417+ } ,
418+ other => panic ! ( "Expected Struct, got: {other:?}" ) ,
419+ }
420+ }
421+
422+ #[ test]
423+ fn chain_add_and_set_nullable ( ) {
424+ let ops = vec ! [
425+ SchemaOperation :: AddColumn {
426+ field: StructField :: nullable( "email" , DataType :: STRING ) ,
427+ } ,
428+ SchemaOperation :: SetNullable {
429+ path: column_name!( "id" ) ,
430+ } ,
431+ ] ;
432+ let result = apply_schema_operations ( simple_schema ( ) , ops) . unwrap ( ) ;
433+ assert_eq ! ( result. schema. fields( ) . count( ) , 3 ) ;
434+ assert ! ( result. schema. field( "email" ) . is_some( ) ) ;
435+ assert ! ( result. schema. field( "id" ) . unwrap( ) . is_nullable( ) ) ;
436+ }
437+
438+ #[ test]
439+ fn set_nullable_nonexistent_column_fails ( ) {
440+ let ops = vec ! [ SchemaOperation :: SetNullable {
441+ path: column_name!( "nonexistent" ) ,
442+ } ] ;
443+ let err = apply_schema_operations ( simple_schema ( ) , ops) . unwrap_err ( ) ;
444+ assert ! ( err. to_string( ) . contains( "does not exist" ) ) ;
445+ }
446+
447+ #[ test]
448+ fn set_nullable_nested_field ( ) {
449+ let schema = StructType :: try_new ( vec ! [
450+ StructField :: not_null( "id" , DataType :: INTEGER ) ,
451+ StructField :: nullable(
452+ "address" ,
453+ StructType :: try_new( vec![ StructField :: not_null( "city" , DataType :: STRING ) ] ) . unwrap( ) ,
454+ ) ,
455+ ] )
456+ . unwrap ( ) ;
457+ let ops = vec ! [ SchemaOperation :: SetNullable {
458+ path: column_name!( "address.city" ) ,
459+ } ] ;
460+ let result = apply_schema_operations ( schema, ops) . unwrap ( ) ;
461+ let addr = result. schema . field ( "address" ) . unwrap ( ) ;
462+ match addr. data_type ( ) {
463+ DataType :: Struct ( s) => assert ! ( s. field( "city" ) . unwrap( ) . is_nullable( ) ) ,
464+ other => panic ! ( "Expected Struct, got: {other:?}" ) ,
465+ }
466+ }
467+
468+ #[ test]
469+ fn set_nullable_through_non_struct_fails ( ) {
470+ let ops = vec ! [ SchemaOperation :: SetNullable {
471+ path: column_name!( "name.inner" ) ,
472+ } ] ;
473+ let err = apply_schema_operations ( simple_schema ( ) , ops) . unwrap_err ( ) ;
474+ assert ! ( err. to_string( ) . contains( "not a struct" ) ) ;
475+ }
167476}
0 commit comments