Skip to content

Commit 7608b2d

Browse files
committed
Allow unparser to override the alias name for the specific dialect (apache#16540)
* allow override col alias for specific dialect * improve test case * add generic dialect case
1 parent 33a32d4 commit 7608b2d

File tree

4 files changed

+107
-5
lines changed

4 files changed

+107
-5
lines changed

datafusion/sql/src/unparser/dialect.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,13 @@ pub trait Dialect: Send + Sync {
197197
fn unnest_as_table_factor(&self) -> bool {
198198
false
199199
}
200+
201+
/// Allows the dialect to override column alias unparsing if the dialect has specific rules.
202+
/// Returns None if the default unparsing should be used, or Some(String) if there is
203+
/// a custom implementation for the alias.
204+
fn col_alias_overrides(&self, _alias: &str) -> Result<Option<String>> {
205+
Ok(None)
206+
}
200207
}
201208

202209
/// `IntervalStyle` to use for unparsing
@@ -500,6 +507,49 @@ impl Dialect for SqliteDialect {
500507
}
501508
}
502509

510+
#[derive(Default)]
511+
pub struct BigQueryDialect {}
512+
513+
impl Dialect for BigQueryDialect {
514+
fn identifier_quote_style(&self, _: &str) -> Option<char> {
515+
Some('`')
516+
}
517+
518+
fn col_alias_overrides(&self, alias: &str) -> Result<Option<String>> {
519+
// Check if alias contains any special characters not supported by BigQuery col names
520+
// https://cloud.google.com/bigquery/docs/schemas#flexible-column-names
521+
let special_chars: [char; 20] = [
522+
'!', '"', '$', '(', ')', '*', ',', '.', '/', ';', '?', '@', '[', '\\', ']',
523+
'^', '`', '{', '}', '~',
524+
];
525+
526+
if alias.chars().any(|c| special_chars.contains(&c)) {
527+
let mut encoded_name = String::new();
528+
for c in alias.chars() {
529+
if special_chars.contains(&c) {
530+
encoded_name.push_str(&format!("_{}", c as u32));
531+
} else {
532+
encoded_name.push(c);
533+
}
534+
}
535+
Ok(Some(encoded_name))
536+
} else {
537+
Ok(Some(alias.to_string()))
538+
}
539+
}
540+
541+
fn unnest_as_table_factor(&self) -> bool {
542+
true
543+
}
544+
}
545+
546+
impl BigQueryDialect {
547+
#[must_use]
548+
pub fn new() -> Self {
549+
Self {}
550+
}
551+
}
552+
503553
pub struct CustomDialect {
504554
identifier_quote_style: Option<char>,
505555
supports_nulls_first_in_sort: bool,

datafusion/sql/src/unparser/expr.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -710,21 +710,29 @@ impl Unparser<'_> {
710710
}
711711

712712
pub fn col_to_sql(&self, col: &Column) -> Result<ast::Expr> {
713+
// Replace the column name if the dialect has an override
714+
let col_name =
715+
if let Some(rewritten_name) = self.dialect.col_alias_overrides(&col.name)? {
716+
rewritten_name
717+
} else {
718+
col.name.to_string()
719+
};
720+
713721
if let Some(table_ref) = &col.relation {
714722
let mut id = if self.dialect.full_qualified_col() {
715723
table_ref.to_vec()
716724
} else {
717725
vec![table_ref.table().to_string()]
718726
};
719-
id.push(col.name.to_string());
727+
id.push(col_name);
720728
return Ok(ast::Expr::CompoundIdentifier(
721729
id.iter()
722730
.map(|i| self.new_ident_quoted_if_needs(i.to_string()))
723731
.collect(),
724732
));
725733
}
726734
Ok(ast::Expr::Identifier(
727-
self.new_ident_quoted_if_needs(col.name.to_string()),
735+
self.new_ident_quoted_if_needs(col_name),
728736
))
729737
}
730738

datafusion/sql/src/unparser/plan.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1199,9 +1199,18 @@ impl Unparser<'_> {
11991199
Expr::Alias(Alias { expr, name, .. }) => {
12001200
let inner = self.expr_to_sql(expr)?;
12011201

1202+
// Determine the alias name to use
1203+
let col_name = if let Some(rewritten_name) =
1204+
self.dialect.col_alias_overrides(name)?
1205+
{
1206+
rewritten_name.to_string()
1207+
} else {
1208+
name.to_string()
1209+
};
1210+
12021211
Ok(ast::SelectItem::ExprWithAlias {
12031212
expr: inner,
1204-
alias: self.new_ident_quoted_if_needs(name.to_string()),
1213+
alias: self.new_ident_quoted_if_needs(col_name),
12051214
})
12061215
}
12071216
_ => {

datafusion/sql/tests/cases/plan_to_sql.rs

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ use datafusion_functions_nested::map::map_udf;
3434
use datafusion_functions_window::rank::rank_udwf;
3535
use datafusion_sql::planner::{ContextProvider, PlannerContext, SqlToRel};
3636
use datafusion_sql::unparser::dialect::{
37-
CustomDialectBuilder, DefaultDialect as UnparserDefaultDialect, DefaultDialect,
38-
Dialect as UnparserDialect, MySqlDialect as UnparserMySqlDialect,
37+
BigQueryDialect, CustomDialectBuilder, DefaultDialect as UnparserDefaultDialect,
38+
DefaultDialect, Dialect as UnparserDialect, MySqlDialect as UnparserMySqlDialect,
3939
PostgreSqlDialect as UnparserPostgreSqlDialect, SqliteDialect,
4040
};
4141
use datafusion_sql::unparser::{expr_to_sql, plan_to_sql, Unparser};
@@ -923,6 +923,41 @@ fn roundtrip_statement_with_dialect_45() -> Result<(), DataFusionError> {
923923
Ok(())
924924
}
925925

926+
#[test]
927+
fn roundtrip_statement_with_dialect_special_char_alias() -> Result<(), DataFusionError> {
928+
roundtrip_statement_with_dialect_helper!(
929+
sql: "select min(a) as \"min(a)\" from (select 1 as a)",
930+
parser_dialect: GenericDialect {},
931+
unparser_dialect: BigQueryDialect {},
932+
expected: @r#"SELECT min(`a`) AS `min_40a_41` FROM (SELECT 1 AS `a`)"#,
933+
);
934+
roundtrip_statement_with_dialect_helper!(
935+
sql: "select a as \"a*\", b as \"b@\" from (select 1 as a , 2 as b)",
936+
parser_dialect: GenericDialect {},
937+
unparser_dialect: BigQueryDialect {},
938+
expected: @r#"SELECT `a` AS `a_42`, `b` AS `b_64` FROM (SELECT 1 AS `a`, 2 AS `b`)"#,
939+
);
940+
roundtrip_statement_with_dialect_helper!(
941+
sql: "select a as \"a*\", b , c as \"c@\" from (select 1 as a , 2 as b, 3 as c)",
942+
parser_dialect: GenericDialect {},
943+
unparser_dialect: BigQueryDialect {},
944+
expected: @r#"SELECT `a` AS `a_42`, `b`, `c` AS `c_64` FROM (SELECT 1 AS `a`, 2 AS `b`, 3 AS `c`)"#,
945+
);
946+
roundtrip_statement_with_dialect_helper!(
947+
sql: "select * from (select a as \"a*\", b as \"b@\" from (select 1 as a , 2 as b)) where \"a*\" = 1",
948+
parser_dialect: GenericDialect {},
949+
unparser_dialect: BigQueryDialect {},
950+
expected: @r#"SELECT `a_42`, `b_64` FROM (SELECT `a` AS `a_42`, `b` AS `b_64` FROM (SELECT 1 AS `a`, 2 AS `b`)) WHERE (`a_42` = 1)"#,
951+
);
952+
roundtrip_statement_with_dialect_helper!(
953+
sql: "select * from (select a as \"a*\", b as \"b@\" from (select 1 as a , 2 as b)) where \"a*\" = 1",
954+
parser_dialect: GenericDialect {},
955+
unparser_dialect: UnparserDefaultDialect {},
956+
expected: @r#"SELECT "a*", "b@" FROM (SELECT a AS "a*", b AS "b@" FROM (SELECT 1 AS a, 2 AS b)) WHERE ("a*" = 1)"#,
957+
);
958+
Ok(())
959+
}
960+
926961
#[test]
927962
fn test_unnest_logical_plan() -> Result<()> {
928963
let query = "select unnest(struct_col), unnest(array_col), struct_col, array_col from unnest_table";

0 commit comments

Comments
 (0)