Skip to content

Commit da4e9c1

Browse files
feat: add set_nullable support for ALTER TABLE
Implement the SetNullable schema operation which changes a column's nullability from NOT NULL to nullable. If the column is already nullable, this is a no-op. Only the safe direction is allowed (NOT NULL -> nullable) per the Delta protocol.
1 parent cf050d2 commit da4e9c1

3 files changed

Lines changed: 427 additions & 3 deletions

File tree

kernel/src/transaction/builder/alter_table.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@
77
//!
88
//! - [`Ready`]: Initial state. Operations are available, but `build()` is not (at least one
99
//! operation is required).
10-
//! - [`Modifying`]: After `add_column`. Can chain more `add_column` calls, and `build()` is
11-
//! available.
10+
//! - [`Modifying`]: After at least one operation (`add_column`, `set_nullable`). Can chain
11+
//! more operations, and `build()` is available.
1212
1313
use std::marker::PhantomData;
1414
use std::sync::Arc;
1515

1616
use crate::committer::Committer;
17+
use crate::expressions::ColumnName;
1718
use crate::schema::StructField;
1819
use crate::snapshot::SnapshotRef;
1920
use crate::table_configuration::TableConfiguration;
@@ -67,6 +68,12 @@ impl AlterTableTransactionBuilder<Ready> {
6768
self.operations.push(SchemaOperation::AddColumn { field });
6869
self.transition()
6970
}
71+
72+
/// Change a column's nullability from NOT NULL to nullable.
73+
pub fn set_nullable(mut self, path: ColumnName) -> AlterTableTransactionBuilder<Modifying> {
74+
self.operations.push(SchemaOperation::SetNullable { path });
75+
self.transition()
76+
}
7077
}
7178

7279
impl AlterTableTransactionBuilder<Modifying> {
@@ -76,6 +83,12 @@ impl AlterTableTransactionBuilder<Modifying> {
7683
self
7784
}
7885

86+
/// Change a column's nullability from NOT NULL to nullable.
87+
pub fn set_nullable(mut self, path: ColumnName) -> Self {
88+
self.operations.push(SchemaOperation::SetNullable { path });
89+
self
90+
}
91+
7992
/// Validate and apply schema operations, then build the [`AlterTableTransaction`].
8093
///
8194
/// This method:

kernel/src/transaction/schema_evolution.rs

Lines changed: 310 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
use indexmap::IndexMap;
77

88
use crate::error::Error;
9-
use crate::schema::{SchemaRef, StructField, StructType};
9+
use crate::expressions::ColumnName;
10+
use crate::schema::{DataType, SchemaRef, StructField, StructType};
1011
use crate::DeltaResult;
1112

1213
/// A schema evolution operation to be applied during ALTER TABLE.
@@ -18,6 +19,60 @@ use crate::DeltaResult;
1819
pub(crate) enum SchemaOperation {
1920
/// Add a top-level column.
2021
AddColumn { field: StructField },
22+
23+
/// Change a column's nullability from NOT NULL to nullable.
24+
SetNullable { path: ColumnName },
25+
}
26+
27+
// Helper to modify a nested column. For each component in `remaining`, locates the matching
28+
// field (case-insensitive), then descends into the next nested struct. At the leaf, applies
29+
// `modifier` to produce the replacement field. Returns the modified field list. `full_path` is
30+
// used only in error messages.
31+
//
32+
// Unlike `StructType::walk_column_fields_by` which is iterative and read-only, this must be
33+
// recursive so it can rebuild parent structs bottom-up after modifying the leaf.
34+
//
35+
// Returns an error if a field in the path does not exist or an intermediate field is not a struct.
36+
fn modify_field_at_path(
37+
mut fields: Vec<StructField>,
38+
full_path: &[String],
39+
remaining: &[String],
40+
modifier: &dyn Fn(StructField) -> DeltaResult<StructField>,
41+
) -> DeltaResult<Vec<StructField>> {
42+
let (first, rest) = remaining
43+
.split_first()
44+
.ok_or_else(|| Error::internal_error("modify_field_at_path called with empty path"))?;
45+
46+
// Delta column names are case-insensitive
47+
let idx = fields
48+
.iter()
49+
.position(|f| f.name().eq_ignore_ascii_case(first))
50+
.ok_or_else(|| {
51+
Error::generic(format!(
52+
"Column '{}' does not exist",
53+
ColumnName::new(full_path.iter())
54+
))
55+
})?;
56+
57+
if rest.is_empty() {
58+
let original = fields.remove(idx);
59+
fields.insert(idx, modifier(original)?);
60+
} else {
61+
// Take ownership of the field to avoid cloning inner struct fields
62+
let mut field = fields.remove(idx);
63+
let DataType::Struct(inner) = field.data_type else {
64+
return Err(Error::generic(format!(
65+
"Column '{}' is not a struct; cannot traverse into it",
66+
ColumnName::new(full_path.iter())
67+
)));
68+
};
69+
// into_fields() gives owned fields -- no cloning
70+
let new_inner_fields =
71+
modify_field_at_path(inner.into_fields().collect(), full_path, rest, modifier)?;
72+
field.data_type = DataType::Struct(Box::new(StructType::try_new(new_inner_fields)?));
73+
fields.insert(idx, field);
74+
}
75+
Ok(fields)
2176
}
2277

2378
/// The result of applying schema operations.
@@ -68,6 +123,43 @@ pub(crate) fn apply_schema_operations(
68123
}
69124
fields.insert(key, field);
70125
}
126+
SchemaOperation::SetNullable { path } => {
127+
let segments = path.path();
128+
let key = segments
129+
.first()
130+
.ok_or_else(|| Error::generic("Cannot set nullable: empty column path"))?
131+
.to_lowercase();
132+
let field = fields.get_mut(&key).ok_or_else(|| {
133+
Error::generic(format!(
134+
"Cannot set nullable on column '{path}': column does not exist"
135+
))
136+
})?;
137+
if segments.len() == 1 {
138+
field.nullable = true;
139+
} else {
140+
// Take ownership of data_type to avoid cloning inner fields
141+
let data_type = std::mem::replace(&mut field.data_type, DataType::BOOLEAN);
142+
let DataType::Struct(inner) = data_type else {
143+
field.data_type = data_type;
144+
return Err(Error::generic(format!(
145+
"Column '{}' is not a struct; cannot traverse into it \
146+
while resolving '{path}'",
147+
segments[0]
148+
)));
149+
};
150+
let modified_inner = modify_field_at_path(
151+
inner.into_fields().collect(),
152+
segments,
153+
&segments[1..],
154+
&|mut f| {
155+
f.nullable = true;
156+
Ok(f)
157+
},
158+
)?;
159+
field.data_type =
160+
DataType::Struct(Box::new(StructType::try_new(modified_inner)?));
161+
}
162+
}
71163
}
72164
}
73165

@@ -80,6 +172,7 @@ pub(crate) fn apply_schema_operations(
80172
#[cfg(test)]
81173
mod tests {
82174
use super::*;
175+
use crate::expressions::column_name;
83176
use crate::schema::{DataType, StructField, StructType};
84177

85178
fn simple_schema() -> StructType {
@@ -90,6 +183,103 @@ mod tests {
90183
.unwrap()
91184
}
92185

186+
fn nested_schema() -> StructType {
187+
StructType::try_new(vec![
188+
StructField::not_null("id", DataType::INTEGER),
189+
StructField::nullable(
190+
"address",
191+
StructType::try_new(vec![
192+
StructField::not_null("city", DataType::STRING),
193+
StructField::nullable("zip", DataType::STRING),
194+
])
195+
.unwrap(),
196+
),
197+
])
198+
.unwrap()
199+
}
200+
201+
// === modify_field_at_path tests ===
202+
203+
#[test]
204+
fn modify_top_level_field() {
205+
let fields: Vec<StructField> = simple_schema().into_fields().collect();
206+
let path = vec!["id".to_string()];
207+
let result = modify_field_at_path(fields, &path, &path, &|mut f| {
208+
f.nullable = true;
209+
Ok(f)
210+
})
211+
.unwrap();
212+
let id = result.iter().find(|f| f.name() == "id").unwrap();
213+
assert!(id.is_nullable());
214+
}
215+
216+
#[test]
217+
fn modify_nested_field() {
218+
let fields: Vec<StructField> = nested_schema().into_fields().collect();
219+
let path = vec!["address".to_string(), "city".to_string()];
220+
let result = modify_field_at_path(fields, &path, &path, &|mut f| {
221+
f.nullable = true;
222+
Ok(f)
223+
})
224+
.unwrap();
225+
let addr = result.iter().find(|f| f.name() == "address").unwrap();
226+
match addr.data_type() {
227+
DataType::Struct(s) => assert!(s.field("city").unwrap().is_nullable()),
228+
other => panic!("Expected Struct, got: {other:?}"),
229+
}
230+
}
231+
232+
#[test]
233+
fn modify_preserves_sibling_fields() {
234+
let fields: Vec<StructField> = nested_schema().into_fields().collect();
235+
let path = vec!["address".to_string(), "city".to_string()];
236+
let result = modify_field_at_path(fields, &path, &path, &|mut f| {
237+
f.nullable = true;
238+
Ok(f)
239+
})
240+
.unwrap();
241+
// "id" should be unchanged
242+
let id = result.iter().find(|f| f.name() == "id").unwrap();
243+
assert!(!id.is_nullable());
244+
// "zip" sibling should be unchanged
245+
let addr = result.iter().find(|f| f.name() == "address").unwrap();
246+
match addr.data_type() {
247+
DataType::Struct(s) => assert!(s.field("zip").unwrap().is_nullable()),
248+
other => panic!("Expected Struct, got: {other:?}"),
249+
}
250+
}
251+
252+
#[test]
253+
fn modify_nonexistent_field_fails() {
254+
let fields: Vec<StructField> = simple_schema().into_fields().collect();
255+
let path = vec!["nope".to_string()];
256+
let err = modify_field_at_path(fields, &path, &path, &|f| Ok(f)).unwrap_err();
257+
assert!(err.to_string().contains("does not exist"));
258+
}
259+
260+
#[test]
261+
fn modify_through_non_struct_fails() {
262+
let fields: Vec<StructField> = simple_schema().into_fields().collect();
263+
let path = vec!["name".to_string(), "inner".to_string()];
264+
let err = modify_field_at_path(fields, &path, &path, &|f| Ok(f)).unwrap_err();
265+
assert!(err.to_string().contains("not a struct"));
266+
}
267+
268+
#[test]
269+
fn modify_case_insensitive_lookup() {
270+
let fields: Vec<StructField> = simple_schema().into_fields().collect();
271+
let path = vec!["ID".to_string()];
272+
let result = modify_field_at_path(fields, &path, &path, &|mut f| {
273+
f.nullable = true;
274+
Ok(f)
275+
})
276+
.unwrap();
277+
let id = result.iter().find(|f| f.name() == "id").unwrap();
278+
assert!(id.is_nullable());
279+
}
280+
281+
// === apply_schema_operations tests ===
282+
93283
#[test]
94284
fn add_nullable_column_succeeds() {
95285
let ops = vec![SchemaOperation::AddColumn {
@@ -164,4 +354,123 @@ mod tests {
164354
let names: Vec<&String> = result.schema.fields().map(|f| f.name()).collect();
165355
assert_eq!(names, vec!["id", "name", "email"]);
166356
}
357+
358+
// === apply_schema_operations: SetNullable tests ===
359+
360+
#[test]
361+
fn set_nullable_on_required_field() {
362+
let ops = vec![SchemaOperation::SetNullable {
363+
path: column_name!("id"),
364+
}];
365+
let result = apply_schema_operations(simple_schema(), ops).unwrap();
366+
let id_field = result.schema.field("id").unwrap();
367+
assert!(id_field.is_nullable());
368+
}
369+
370+
#[test]
371+
fn set_nullable_already_nullable_is_noop() {
372+
let ops = vec![SchemaOperation::SetNullable {
373+
path: column_name!("name"),
374+
}];
375+
let result = apply_schema_operations(simple_schema(), ops).unwrap();
376+
let name_field = result.schema.field("name").unwrap();
377+
assert!(name_field.is_nullable());
378+
}
379+
380+
#[test]
381+
fn set_nullable_case_insensitive() {
382+
let ops = vec![SchemaOperation::SetNullable {
383+
path: column_name!("ID"),
384+
}];
385+
let result = apply_schema_operations(simple_schema(), ops).unwrap();
386+
let id_field = result.schema.field("id").unwrap();
387+
assert!(id_field.is_nullable());
388+
}
389+
390+
#[test]
391+
fn set_nullable_deeply_nested_field() {
392+
let schema = StructType::try_new(vec![
393+
StructField::not_null("id", DataType::INTEGER),
394+
StructField::nullable(
395+
"address",
396+
StructType::try_new(vec![StructField::nullable(
397+
"location",
398+
StructType::try_new(vec![StructField::not_null("zipcode", DataType::STRING)])
399+
.unwrap(),
400+
)])
401+
.unwrap(),
402+
),
403+
])
404+
.unwrap();
405+
let ops = vec![SchemaOperation::SetNullable {
406+
path: column_name!("address.location.zipcode"),
407+
}];
408+
let result = apply_schema_operations(schema, ops).unwrap();
409+
let addr = result.schema.field("address").unwrap();
410+
match addr.data_type() {
411+
DataType::Struct(s) => match s.field("location").unwrap().data_type() {
412+
DataType::Struct(loc) => {
413+
let zip = loc.field("zipcode").unwrap();
414+
assert!(zip.is_nullable());
415+
}
416+
other => panic!("Expected Struct, got: {other:?}"),
417+
},
418+
other => panic!("Expected Struct, got: {other:?}"),
419+
}
420+
}
421+
422+
#[test]
423+
fn chain_add_and_set_nullable() {
424+
let ops = vec![
425+
SchemaOperation::AddColumn {
426+
field: StructField::nullable("email", DataType::STRING),
427+
},
428+
SchemaOperation::SetNullable {
429+
path: column_name!("id"),
430+
},
431+
];
432+
let result = apply_schema_operations(simple_schema(), ops).unwrap();
433+
assert_eq!(result.schema.fields().count(), 3);
434+
assert!(result.schema.field("email").is_some());
435+
assert!(result.schema.field("id").unwrap().is_nullable());
436+
}
437+
438+
#[test]
439+
fn set_nullable_nonexistent_column_fails() {
440+
let ops = vec![SchemaOperation::SetNullable {
441+
path: column_name!("nonexistent"),
442+
}];
443+
let err = apply_schema_operations(simple_schema(), ops).unwrap_err();
444+
assert!(err.to_string().contains("does not exist"));
445+
}
446+
447+
#[test]
448+
fn set_nullable_nested_field() {
449+
let schema = StructType::try_new(vec![
450+
StructField::not_null("id", DataType::INTEGER),
451+
StructField::nullable(
452+
"address",
453+
StructType::try_new(vec![StructField::not_null("city", DataType::STRING)]).unwrap(),
454+
),
455+
])
456+
.unwrap();
457+
let ops = vec![SchemaOperation::SetNullable {
458+
path: column_name!("address.city"),
459+
}];
460+
let result = apply_schema_operations(schema, ops).unwrap();
461+
let addr = result.schema.field("address").unwrap();
462+
match addr.data_type() {
463+
DataType::Struct(s) => assert!(s.field("city").unwrap().is_nullable()),
464+
other => panic!("Expected Struct, got: {other:?}"),
465+
}
466+
}
467+
468+
#[test]
469+
fn set_nullable_through_non_struct_fails() {
470+
let ops = vec![SchemaOperation::SetNullable {
471+
path: column_name!("name.inner"),
472+
}];
473+
let err = apply_schema_operations(simple_schema(), ops).unwrap_err();
474+
assert!(err.to_string().contains("not a struct"));
475+
}
167476
}

0 commit comments

Comments
 (0)