@@ -1281,6 +1281,19 @@ impl PageHeader {
12811281/////////////////////////////////////////////////
12821282// helper functions for writing file meta data
12831283
1284+ #[ cfg( feature = "encryption" ) ]
1285+ fn should_write_column_stats ( column_chunk : & ColumnChunkMetaData ) -> bool {
1286+ // If there is encrypted column metadata present,
1287+ // the column is encrypted with a different key to the footer or a plaintext footer is used,
1288+ // so the statistics are sensitive and shouldn't be written.
1289+ column_chunk. encrypted_column_metadata . is_none ( )
1290+ }
1291+
1292+ #[ cfg( not( feature = "encryption" ) ) ]
1293+ fn should_write_column_stats ( _column_chunk : & ColumnChunkMetaData ) -> bool {
1294+ true
1295+ }
1296+
12841297// serialize the bits of the column chunk needed for a thrift ColumnMetaData
12851298// struct ColumnMetaData {
12861299// 1: required Type type
@@ -1331,48 +1344,51 @@ pub(super) fn serialize_column_meta_data<W: Write>(
13311344 if let Some ( dictionary_page_offset) = column_chunk. dictionary_page_offset {
13321345 last_field_id = dictionary_page_offset. write_thrift_field ( w, 11 , last_field_id) ?;
13331346 }
1334- // PageStatistics is the same as thrift Statistics, but writable
1335- let stats = page_stats_to_thrift ( column_chunk. statistics ( ) ) ;
1336- if let Some ( stats) = stats {
1337- last_field_id = stats. write_thrift_field ( w, 12 , last_field_id) ?;
1338- }
1339- if let Some ( page_encoding_stats) = column_chunk. page_encoding_stats ( ) {
1340- last_field_id = page_encoding_stats. write_thrift_field ( w, 13 , last_field_id) ?;
1341- }
1342- if let Some ( bloom_filter_offset) = column_chunk. bloom_filter_offset {
1343- last_field_id = bloom_filter_offset. write_thrift_field ( w, 14 , last_field_id) ?;
1344- }
1345- if let Some ( bloom_filter_length) = column_chunk. bloom_filter_length {
1346- last_field_id = bloom_filter_length. write_thrift_field ( w, 15 , last_field_id) ?;
1347- }
13481347
1349- // SizeStatistics
1350- let size_stats = if column_chunk. unencoded_byte_array_data_bytes . is_some ( )
1351- || column_chunk. repetition_level_histogram . is_some ( )
1352- || column_chunk. definition_level_histogram . is_some ( )
1353- {
1354- let repetition_level_histogram = column_chunk
1355- . repetition_level_histogram ( )
1356- . map ( |hist| hist. clone ( ) . into_inner ( ) ) ;
1357-
1358- let definition_level_histogram = column_chunk
1359- . definition_level_histogram ( )
1360- . map ( |hist| hist. clone ( ) . into_inner ( ) ) ;
1361-
1362- Some ( SizeStatistics {
1363- unencoded_byte_array_data_bytes : column_chunk. unencoded_byte_array_data_bytes ,
1364- repetition_level_histogram,
1365- definition_level_histogram,
1366- } )
1367- } else {
1368- None
1369- } ;
1370- if let Some ( size_stats) = size_stats {
1371- last_field_id = size_stats. write_thrift_field ( w, 16 , last_field_id) ?;
1372- }
1348+ if should_write_column_stats ( column_chunk) {
1349+ // PageStatistics is the same as thrift Statistics, but writable
1350+ let stats = page_stats_to_thrift ( column_chunk. statistics ( ) ) ;
1351+ if let Some ( stats) = stats {
1352+ last_field_id = stats. write_thrift_field ( w, 12 , last_field_id) ?;
1353+ }
1354+ if let Some ( page_encoding_stats) = column_chunk. page_encoding_stats ( ) {
1355+ last_field_id = page_encoding_stats. write_thrift_field ( w, 13 , last_field_id) ?;
1356+ }
1357+ if let Some ( bloom_filter_offset) = column_chunk. bloom_filter_offset {
1358+ last_field_id = bloom_filter_offset. write_thrift_field ( w, 14 , last_field_id) ?;
1359+ }
1360+ if let Some ( bloom_filter_length) = column_chunk. bloom_filter_length {
1361+ last_field_id = bloom_filter_length. write_thrift_field ( w, 15 , last_field_id) ?;
1362+ }
13731363
1374- if let Some ( geo_stats) = column_chunk. geo_statistics ( ) {
1375- geo_stats. write_thrift_field ( w, 17 , last_field_id) ?;
1364+ // SizeStatistics
1365+ let size_stats = if column_chunk. unencoded_byte_array_data_bytes . is_some ( )
1366+ || column_chunk. repetition_level_histogram . is_some ( )
1367+ || column_chunk. definition_level_histogram . is_some ( )
1368+ {
1369+ let repetition_level_histogram = column_chunk
1370+ . repetition_level_histogram ( )
1371+ . map ( |hist| hist. clone ( ) . into_inner ( ) ) ;
1372+
1373+ let definition_level_histogram = column_chunk
1374+ . definition_level_histogram ( )
1375+ . map ( |hist| hist. clone ( ) . into_inner ( ) ) ;
1376+
1377+ Some ( SizeStatistics {
1378+ unencoded_byte_array_data_bytes : column_chunk. unencoded_byte_array_data_bytes ,
1379+ repetition_level_histogram,
1380+ definition_level_histogram,
1381+ } )
1382+ } else {
1383+ None
1384+ } ;
1385+ if let Some ( size_stats) = size_stats {
1386+ last_field_id = size_stats. write_thrift_field ( w, 16 , last_field_id) ?;
1387+ }
1388+
1389+ if let Some ( geo_stats) = column_chunk. geo_statistics ( ) {
1390+ geo_stats. write_thrift_field ( w, 17 , last_field_id) ?;
1391+ }
13761392 }
13771393
13781394 w. write_struct_end ( )
@@ -1592,17 +1608,17 @@ impl WriteThrift for ColumnChunkMetaData {
15921608 . write_thrift_field ( writer, 2 , last_field_id) ?;
15931609
15941610 #[ cfg( feature = "encryption" ) ]
1595- {
1596- // only write the ColumnMetaData if we haven't already encrypted it
1597- if self . encrypted_column_metadata . is_none ( ) {
1598- writer. write_field_begin ( FieldType :: Struct , 3 , last_field_id) ?;
1599- serialize_column_meta_data ( self , writer) ?;
1600- last_field_id = 3 ;
1601- }
1602- }
1611+ let write_meta_data =
1612+ self . encrypted_column_metadata . is_none ( ) || self . plaintext_footer_mode ;
16031613 #[ cfg( not( feature = "encryption" ) ) ]
1604- {
1605- // always write the ColumnMetaData
1614+ let write_meta_data = true ;
1615+
1616+ // When the footer is encrypted and encrypted_column_metadata is present,
1617+ // skip writing the plaintext meta_data field to reduce footer size.
1618+ // When the footer is plaintext (plaintext_footer_mode=true), we still write
1619+ // meta_data for backward compatibility with readers that expect it, but with
1620+ // sensitive fields (statistics, bloom filter info, etc.) stripped out.
1621+ if write_meta_data {
16061622 writer. write_field_begin ( FieldType :: Struct , 3 , last_field_id) ?;
16071623 serialize_column_meta_data ( self , writer) ?;
16081624 last_field_id = 3 ;
0 commit comments