sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arg_max_or_min_no_count, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 no_datetime_sql, 20 encode_decode_sql, 21 build_formatted_time, 22 inline_array_unless_query, 23 no_comment_column_constraint_sql, 24 no_safe_divide_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 rename_func, 29 str_position_sql, 30 str_to_time_sql, 31 timestamptrunc_sql, 32 timestrtotime_sql, 33 unit_to_var, 34 unit_to_str, 35 sha256_sql, 36 build_regexp_extract, 37 explode_to_unnest_sql, 38 no_make_interval_sql, 39) 40from sqlglot.generator import unsupported_args 41from sqlglot.helper import seq_get 42from sqlglot.tokens import TokenType 43from sqlglot.parser import binary_range_parser 44 45DATETIME_DELTA = t.Union[ 46 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 47] 48 49WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 50 exp.FirstValue, 51 exp.LastValue, 52 exp.Lag, 53 exp.Lead, 54 exp.NthValue, 55) 56 57 58def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 59 this = expression.this 60 unit = unit_to_var(expression) 61 op = ( 62 "+" 63 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 64 else "-" 65 ) 66 67 to_type: t.Optional[DATA_TYPE] = None 68 if isinstance(expression, exp.TsOrDsAdd): 69 to_type = expression.return_type 70 elif this.is_string: 71 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 72 to_type = ( 73 exp.DataType.Type.DATETIME 74 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 75 else exp.DataType.Type.DATE 76 ) 77 78 this = exp.cast(this, to_type) if to_type else this 79 80 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 81 82 83# BigQuery -> DuckDB conversion for the DATE function 84def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 85 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 86 zone = self.sql(expression, "zone") 87 88 if zone: 89 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 90 date_str = f"{date_str} || ' ' || {zone}" 91 92 # This will create a TIMESTAMP with time zone information 93 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 94 95 return result 96 97 98# BigQuery -> DuckDB conversion for the TIME_DIFF function 99def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 100 this = exp.cast(expression.this, exp.DataType.Type.TIME) 101 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 102 103 # Although the 2 dialects share similar signatures, BQ seems to inverse 104 # the sign of the result so the start/end time operands are flipped 105 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 106 107 108@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 109def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 110 return self.func("ARRAY_SORT", expression.this) 111 112 113def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 114 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 115 return self.func(name, expression.this) 116 117 118def _build_sort_array_desc(args: t.List) -> exp.Expression: 119 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 120 121 122def _build_date_diff(args: t.List) -> exp.Expression: 123 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 124 125 126def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 127 def _builder(args: t.List) -> exp.GenerateSeries: 128 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 129 if len(args) == 1: 130 # DuckDB uses 0 as a default for the series' start when it's omitted 131 args.insert(0, exp.Literal.number("0")) 132 133 gen_series = exp.GenerateSeries.from_arg_list(args) 134 gen_series.set("is_end_exclusive", end_exclusive) 135 136 return gen_series 137 138 return _builder 139 140 141def _build_make_timestamp(args: t.List) -> exp.Expression: 142 if len(args) == 1: 143 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 144 145 return exp.TimestampFromParts( 146 year=seq_get(args, 0), 147 month=seq_get(args, 1), 148 day=seq_get(args, 2), 149 hour=seq_get(args, 3), 150 min=seq_get(args, 4), 151 sec=seq_get(args, 5), 152 ) 153 154 155def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 156 args: t.List[str] = [] 157 158 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 159 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 160 # The transformation to ROW will take place if: 161 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 162 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 163 ancestor_cast = expression.find_ancestor(exp.Cast) 164 is_bq_inline_struct = ( 165 (expression.find(exp.PropertyEQ) is None) 166 and ancestor_cast 167 and any( 168 casted_type.is_type(exp.DataType.Type.STRUCT) 169 for casted_type in ancestor_cast.find_all(exp.DataType) 170 ) 171 ) 172 173 for i, expr in enumerate(expression.expressions): 174 is_property_eq = isinstance(expr, exp.PropertyEQ) 175 value = expr.expression if is_property_eq else expr 176 177 if is_bq_inline_struct: 178 args.append(self.sql(value)) 179 else: 180 key = expr.name if is_property_eq else f"_{i}" 181 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 182 183 csv_args = ", ".join(args) 184 185 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 186 187 188def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 189 if expression.is_type("array"): 190 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 191 192 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 193 if expression.is_type( 194 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 195 ): 196 return expression.this.value 197 198 return self.datatype_sql(expression) 199 200 201def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 202 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 203 return f"CAST({sql} AS TEXT)" 204 205 206def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 207 scale = expression.args.get("scale") 208 timestamp = expression.this 209 210 if scale in (None, exp.UnixToTime.SECONDS): 211 return self.func("TO_TIMESTAMP", timestamp) 212 if scale == exp.UnixToTime.MILLIS: 213 return self.func("EPOCH_MS", timestamp) 214 if scale == exp.UnixToTime.MICROS: 215 return self.func("MAKE_TIMESTAMP", timestamp) 216 217 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 218 219 220WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 221 222 223def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 224 arrow_sql = arrow_json_extract_sql(self, expression) 225 if not expression.same_parent and isinstance( 226 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 227 ): 228 arrow_sql = self.wrap(arrow_sql) 229 return arrow_sql 230 231 232def _implicit_datetime_cast( 233 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 234) -> t.Optional[exp.Expression]: 235 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 236 237 238def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 239 this = _implicit_datetime_cast(expression.this) 240 expr = _implicit_datetime_cast(expression.expression) 241 242 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 243 244 245def _generate_datetime_array_sql( 246 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 247) -> str: 248 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 249 250 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 251 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 252 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 253 254 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 255 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 256 start=start, end=end, step=expression.args.get("step") 257 ) 258 259 if is_generate_date_array: 260 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 261 # GENERATE_DATE_ARRAY we must cast it back to DATE array 262 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 263 264 return self.sql(gen_series) 265 266 267def _json_extract_value_array_sql( 268 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 269) -> str: 270 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 271 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 272 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 273 274 275class DuckDB(Dialect): 276 NULL_ORDERING = "nulls_are_last" 277 SUPPORTS_USER_DEFINED_TYPES = False 278 SAFE_DIVISION = True 279 INDEX_OFFSET = 1 280 CONCAT_COALESCE = True 281 SUPPORTS_ORDER_BY_ALL = True 282 SUPPORTS_FIXED_SIZE_ARRAYS = True 283 STRICT_JSON_PATH_SYNTAX = False 284 285 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 286 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 287 288 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 289 if isinstance(path, exp.Literal): 290 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 291 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 292 # This check ensures we'll avoid trying to parse these as JSON paths, which can 293 # either result in a noisy warning or in an invalid representation of the path. 294 path_text = path.name 295 if path_text.startswith("/") or "[#" in path_text: 296 return path 297 298 return super().to_json_path(path) 299 300 class Tokenizer(tokens.Tokenizer): 301 HEREDOC_STRINGS = ["$"] 302 303 HEREDOC_TAG_IS_IDENTIFIER = True 304 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 305 306 KEYWORDS = { 307 **tokens.Tokenizer.KEYWORDS, 308 "//": TokenType.DIV, 309 "**": TokenType.DSTAR, 310 "^@": TokenType.CARET_AT, 311 "@>": TokenType.AT_GT, 312 "<@": TokenType.LT_AT, 313 "ATTACH": TokenType.COMMAND, 314 "BINARY": TokenType.VARBINARY, 315 "BITSTRING": TokenType.BIT, 316 "BPCHAR": TokenType.TEXT, 317 "CHAR": TokenType.TEXT, 318 "CHARACTER VARYING": TokenType.TEXT, 319 "DETACH": TokenType.COMMAND, 320 "EXCLUDE": TokenType.EXCEPT, 321 "LOGICAL": TokenType.BOOLEAN, 322 "ONLY": TokenType.ONLY, 323 "PIVOT_WIDER": TokenType.PIVOT, 324 "POSITIONAL": TokenType.POSITIONAL, 325 "SIGNED": TokenType.INT, 326 "STRING": TokenType.TEXT, 327 "SUMMARIZE": TokenType.SUMMARIZE, 328 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 329 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 330 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 331 "TIMESTAMP_US": TokenType.TIMESTAMP, 332 "UBIGINT": TokenType.UBIGINT, 333 "UINTEGER": TokenType.UINT, 334 "USMALLINT": TokenType.USMALLINT, 335 "UTINYINT": TokenType.UTINYINT, 336 "VARCHAR": TokenType.TEXT, 337 } 338 KEYWORDS.pop("/*+") 339 340 SINGLE_TOKENS = { 341 **tokens.Tokenizer.SINGLE_TOKENS, 342 "$": TokenType.PARAMETER, 343 } 344 345 class Parser(parser.Parser): 346 BITWISE = { 347 **parser.Parser.BITWISE, 348 TokenType.TILDA: exp.RegexpLike, 349 } 350 BITWISE.pop(TokenType.CARET) 351 352 RANGE_PARSERS = { 353 **parser.Parser.RANGE_PARSERS, 354 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 355 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 356 } 357 358 EXPONENT = { 359 **parser.Parser.EXPONENT, 360 TokenType.CARET: exp.Pow, 361 TokenType.DSTAR: exp.Pow, 362 } 363 364 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 365 366 FUNCTIONS = { 367 **parser.Parser.FUNCTIONS, 368 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 369 "ARRAY_SORT": exp.SortArray.from_arg_list, 370 "DATEDIFF": _build_date_diff, 371 "DATE_DIFF": _build_date_diff, 372 "DATE_TRUNC": date_trunc_to_time, 373 "DATETRUNC": date_trunc_to_time, 374 "DECODE": lambda args: exp.Decode( 375 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 376 ), 377 "ENCODE": lambda args: exp.Encode( 378 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 379 ), 380 "EPOCH": exp.TimeToUnix.from_arg_list, 381 "EPOCH_MS": lambda args: exp.UnixToTime( 382 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 383 ), 384 "JSON": exp.ParseJSON.from_arg_list, 385 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 386 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 387 "LIST_HAS": exp.ArrayContains.from_arg_list, 388 "LIST_REVERSE_SORT": _build_sort_array_desc, 389 "LIST_SORT": exp.SortArray.from_arg_list, 390 "LIST_VALUE": lambda args: exp.Array(expressions=args), 391 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 392 "MAKE_TIMESTAMP": _build_make_timestamp, 393 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 394 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 395 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 396 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 397 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 398 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 399 this=seq_get(args, 0), 400 expression=seq_get(args, 1), 401 replacement=seq_get(args, 2), 402 modifiers=seq_get(args, 3), 403 ), 404 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 405 "STRING_SPLIT": exp.Split.from_arg_list, 406 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 407 "STRING_TO_ARRAY": exp.Split.from_arg_list, 408 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 409 "STRUCT_PACK": exp.Struct.from_arg_list, 410 "STR_SPLIT": exp.Split.from_arg_list, 411 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 412 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 413 "UNNEST": exp.Explode.from_arg_list, 414 "XOR": binary_from_function(exp.BitwiseXor), 415 "GENERATE_SERIES": _build_generate_series(), 416 "RANGE": _build_generate_series(end_exclusive=True), 417 "EDITDIST3": exp.Levenshtein.from_arg_list, 418 } 419 420 FUNCTIONS.pop("DATE_SUB") 421 FUNCTIONS.pop("GLOB") 422 423 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 424 FUNCTION_PARSERS.pop("DECODE") 425 426 NO_PAREN_FUNCTION_PARSERS = { 427 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 428 "MAP": lambda self: self._parse_map(), 429 } 430 431 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 432 TokenType.SEMI, 433 TokenType.ANTI, 434 } 435 436 PLACEHOLDER_PARSERS = { 437 **parser.Parser.PLACEHOLDER_PARSERS, 438 TokenType.PARAMETER: lambda self: ( 439 self.expression(exp.Placeholder, this=self._prev.text) 440 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 441 else None 442 ), 443 } 444 445 TYPE_CONVERTERS = { 446 # https://duckdb.org/docs/sql/data_types/numeric 447 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 448 # https://duckdb.org/docs/sql/data_types/text 449 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 450 } 451 452 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 453 # https://duckdb.org/docs/sql/samples.html 454 sample = super()._parse_table_sample(as_modifier=as_modifier) 455 if sample and not sample.args.get("method"): 456 if sample.args.get("size"): 457 sample.set("method", exp.var("RESERVOIR")) 458 else: 459 sample.set("method", exp.var("SYSTEM")) 460 461 return sample 462 463 def _parse_bracket( 464 self, this: t.Optional[exp.Expression] = None 465 ) -> t.Optional[exp.Expression]: 466 bracket = super()._parse_bracket(this) 467 if isinstance(bracket, exp.Bracket): 468 bracket.set("returns_list_for_maps", True) 469 470 return bracket 471 472 def _parse_map(self) -> exp.ToMap | exp.Map: 473 if self._match(TokenType.L_BRACE, advance=False): 474 return self.expression(exp.ToMap, this=self._parse_bracket()) 475 476 args = self._parse_wrapped_csv(self._parse_assignment) 477 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 478 479 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 480 return self._parse_field_def() 481 482 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 483 if len(aggregations) == 1: 484 return super()._pivot_column_names(aggregations) 485 return pivot_column_names(aggregations, dialect="duckdb") 486 487 class Generator(generator.Generator): 488 PARAMETER_TOKEN = "$" 489 NAMED_PLACEHOLDER_TOKEN = "$" 490 JOIN_HINTS = False 491 TABLE_HINTS = False 492 QUERY_HINTS = False 493 LIMIT_FETCH = "LIMIT" 494 STRUCT_DELIMITER = ("(", ")") 495 RENAME_TABLE_WITH_DB = False 496 NVL2_SUPPORTED = False 497 SEMI_ANTI_JOIN_WITH_SIDE = False 498 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 499 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 500 LAST_DAY_SUPPORTS_DATE_PART = False 501 JSON_KEY_VALUE_PAIR_SEP = "," 502 IGNORE_NULLS_IN_FUNC = True 503 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 504 SUPPORTS_CREATE_TABLE_LIKE = False 505 MULTI_ARG_DISTINCT = False 506 CAN_IMPLEMENT_ARRAY_ANY = True 507 SUPPORTS_TO_NUMBER = False 508 COPY_HAS_INTO_KEYWORD = False 509 STAR_EXCEPT = "EXCLUDE" 510 PAD_FILL_PATTERN_IS_REQUIRED = True 511 ARRAY_CONCAT_IS_VAR_LEN = False 512 ARRAY_SIZE_DIM_REQUIRED = False 513 514 TRANSFORMS = { 515 **generator.Generator.TRANSFORMS, 516 exp.ApproxDistinct: approx_count_distinct_sql, 517 exp.Array: inline_array_unless_query, 518 exp.ArrayFilter: rename_func("LIST_FILTER"), 519 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 520 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 521 exp.ArraySort: _array_sort_sql, 522 exp.ArraySum: rename_func("LIST_SUM"), 523 exp.BitwiseXor: rename_func("XOR"), 524 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 525 exp.CurrentDate: lambda *_: "CURRENT_DATE", 526 exp.CurrentTime: lambda *_: "CURRENT_TIME", 527 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 528 exp.DayOfMonth: rename_func("DAYOFMONTH"), 529 exp.DayOfWeek: rename_func("DAYOFWEEK"), 530 exp.DayOfWeekIso: rename_func("ISODOW"), 531 exp.DayOfYear: rename_func("DAYOFYEAR"), 532 exp.DataType: _datatype_sql, 533 exp.Date: _date_sql, 534 exp.DateAdd: _date_delta_sql, 535 exp.DateFromParts: rename_func("MAKE_DATE"), 536 exp.DateSub: _date_delta_sql, 537 exp.DateDiff: _date_diff_sql, 538 exp.DateStrToDate: datestrtodate_sql, 539 exp.Datetime: no_datetime_sql, 540 exp.DatetimeSub: _date_delta_sql, 541 exp.DatetimeAdd: _date_delta_sql, 542 exp.DateToDi: lambda self, 543 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 544 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 545 exp.DiToDate: lambda self, 546 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 547 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 548 exp.GenerateDateArray: _generate_datetime_array_sql, 549 exp.GenerateTimestampArray: _generate_datetime_array_sql, 550 exp.Explode: rename_func("UNNEST"), 551 exp.IntDiv: lambda self, e: self.binary(e, "//"), 552 exp.IsInf: rename_func("ISINF"), 553 exp.IsNan: rename_func("ISNAN"), 554 exp.JSONBExists: rename_func("JSON_EXISTS"), 555 exp.JSONExtract: _arrow_json_extract_sql, 556 exp.JSONExtractArray: _json_extract_value_array_sql, 557 exp.JSONExtractScalar: _arrow_json_extract_sql, 558 exp.JSONFormat: _json_format_sql, 559 exp.JSONValueArray: _json_extract_value_array_sql, 560 exp.Lateral: explode_to_unnest_sql, 561 exp.LogicalOr: rename_func("BOOL_OR"), 562 exp.LogicalAnd: rename_func("BOOL_AND"), 563 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 564 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 565 exp.MonthsBetween: lambda self, e: self.func( 566 "DATEDIFF", 567 "'month'", 568 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 569 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 570 ), 571 exp.PercentileCont: rename_func("QUANTILE_CONT"), 572 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 573 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 574 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 575 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 576 exp.RegexpReplace: lambda self, e: self.func( 577 "REGEXP_REPLACE", 578 e.this, 579 e.expression, 580 e.args.get("replacement"), 581 e.args.get("modifiers"), 582 ), 583 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 584 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 585 exp.Return: lambda self, e: self.sql(e, "this"), 586 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 587 exp.Rand: rename_func("RANDOM"), 588 exp.SafeDivide: no_safe_divide_sql, 589 exp.SHA: rename_func("SHA1"), 590 exp.SHA2: sha256_sql, 591 exp.Split: rename_func("STR_SPLIT"), 592 exp.SortArray: _sort_array_sql, 593 exp.StrPosition: str_position_sql, 594 exp.StrToUnix: lambda self, e: self.func( 595 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 596 ), 597 exp.Struct: _struct_sql, 598 exp.Transform: rename_func("LIST_TRANSFORM"), 599 exp.TimeAdd: _date_delta_sql, 600 exp.Time: no_time_sql, 601 exp.TimeDiff: _timediff_sql, 602 exp.Timestamp: no_timestamp_sql, 603 exp.TimestampDiff: lambda self, e: self.func( 604 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 605 ), 606 exp.TimestampTrunc: timestamptrunc_sql(), 607 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 608 exp.TimeStrToTime: timestrtotime_sql, 609 exp.TimeStrToUnix: lambda self, e: self.func( 610 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 611 ), 612 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 613 exp.TimeToUnix: rename_func("EPOCH"), 614 exp.TsOrDiToDi: lambda self, 615 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 616 exp.TsOrDsAdd: _date_delta_sql, 617 exp.TsOrDsDiff: lambda self, e: self.func( 618 "DATE_DIFF", 619 f"'{e.args.get('unit') or 'DAY'}'", 620 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 621 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 622 ), 623 exp.UnixToStr: lambda self, e: self.func( 624 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 625 ), 626 exp.DatetimeTrunc: lambda self, e: self.func( 627 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 628 ), 629 exp.UnixToTime: _unix_to_time_sql, 630 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 631 exp.VariancePop: rename_func("VAR_POP"), 632 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 633 exp.Xor: bool_xor_sql, 634 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 635 rename_func("LEVENSHTEIN") 636 ), 637 } 638 639 SUPPORTED_JSON_PATH_PARTS = { 640 exp.JSONPathKey, 641 exp.JSONPathRoot, 642 exp.JSONPathSubscript, 643 exp.JSONPathWildcard, 644 } 645 646 TYPE_MAPPING = { 647 **generator.Generator.TYPE_MAPPING, 648 exp.DataType.Type.BINARY: "BLOB", 649 exp.DataType.Type.BPCHAR: "TEXT", 650 exp.DataType.Type.CHAR: "TEXT", 651 exp.DataType.Type.DATETIME: "TIMESTAMP", 652 exp.DataType.Type.FLOAT: "REAL", 653 exp.DataType.Type.NCHAR: "TEXT", 654 exp.DataType.Type.NVARCHAR: "TEXT", 655 exp.DataType.Type.UINT: "UINTEGER", 656 exp.DataType.Type.VARBINARY: "BLOB", 657 exp.DataType.Type.ROWVERSION: "BLOB", 658 exp.DataType.Type.VARCHAR: "TEXT", 659 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 660 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 661 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 662 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 663 } 664 665 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 666 RESERVED_KEYWORDS = { 667 "array", 668 "analyse", 669 "union", 670 "all", 671 "when", 672 "in_p", 673 "default", 674 "create_p", 675 "window", 676 "asymmetric", 677 "to", 678 "else", 679 "localtime", 680 "from", 681 "end_p", 682 "select", 683 "current_date", 684 "foreign", 685 "with", 686 "grant", 687 "session_user", 688 "or", 689 "except", 690 "references", 691 "fetch", 692 "limit", 693 "group_p", 694 "leading", 695 "into", 696 "collate", 697 "offset", 698 "do", 699 "then", 700 "localtimestamp", 701 "check_p", 702 "lateral_p", 703 "current_role", 704 "where", 705 "asc_p", 706 "placing", 707 "desc_p", 708 "user", 709 "unique", 710 "initially", 711 "column", 712 "both", 713 "some", 714 "as", 715 "any", 716 "only", 717 "deferrable", 718 "null_p", 719 "current_time", 720 "true_p", 721 "table", 722 "case", 723 "trailing", 724 "variadic", 725 "for", 726 "on", 727 "distinct", 728 "false_p", 729 "not", 730 "constraint", 731 "current_timestamp", 732 "returning", 733 "primary", 734 "intersect", 735 "having", 736 "analyze", 737 "current_user", 738 "and", 739 "cast", 740 "symmetric", 741 "using", 742 "order", 743 "current_catalog", 744 } 745 746 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 747 748 # DuckDB doesn't generally support CREATE TABLE .. properties 749 # https://duckdb.org/docs/sql/statements/create_table.html 750 PROPERTIES_LOCATION = { 751 prop: exp.Properties.Location.UNSUPPORTED 752 for prop in generator.Generator.PROPERTIES_LOCATION 753 } 754 755 # There are a few exceptions (e.g. temporary tables) which are supported or 756 # can be transpiled to DuckDB, so we explicitly override them accordingly 757 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 758 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 759 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 760 761 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 762 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 763 764 def strtotime_sql(self, expression: exp.StrToTime) -> str: 765 if expression.args.get("safe"): 766 formatted_time = self.format_time(expression) 767 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 768 return str_to_time_sql(self, expression) 769 770 def strtodate_sql(self, expression: exp.StrToDate) -> str: 771 if expression.args.get("safe"): 772 formatted_time = self.format_time(expression) 773 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 774 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 775 776 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 777 arg = expression.this 778 if expression.args.get("safe"): 779 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 780 return self.func("JSON", arg) 781 782 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 783 nano = expression.args.get("nano") 784 if nano is not None: 785 expression.set( 786 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 787 ) 788 789 return rename_func("MAKE_TIME")(self, expression) 790 791 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 792 sec = expression.args["sec"] 793 794 milli = expression.args.get("milli") 795 if milli is not None: 796 sec += milli.pop() / exp.Literal.number(1000.0) 797 798 nano = expression.args.get("nano") 799 if nano is not None: 800 sec += nano.pop() / exp.Literal.number(1000000000.0) 801 802 if milli or nano: 803 expression.set("sec", sec) 804 805 return rename_func("MAKE_TIMESTAMP")(self, expression) 806 807 def tablesample_sql( 808 self, 809 expression: exp.TableSample, 810 tablesample_keyword: t.Optional[str] = None, 811 ) -> str: 812 if not isinstance(expression.parent, exp.Select): 813 # This sample clause only applies to a single source, not the entire resulting relation 814 tablesample_keyword = "TABLESAMPLE" 815 816 if expression.args.get("size"): 817 method = expression.args.get("method") 818 if method and method.name.upper() != "RESERVOIR": 819 self.unsupported( 820 f"Sampling method {method} is not supported with a discrete sample count, " 821 "defaulting to reservoir sampling" 822 ) 823 expression.set("method", exp.var("RESERVOIR")) 824 825 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 826 827 def interval_sql(self, expression: exp.Interval) -> str: 828 multiplier: t.Optional[int] = None 829 unit = expression.text("unit").lower() 830 831 if unit.startswith("week"): 832 multiplier = 7 833 if unit.startswith("quarter"): 834 multiplier = 90 835 836 if multiplier: 837 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 838 839 return super().interval_sql(expression) 840 841 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 842 if isinstance(expression.parent, exp.UserDefinedFunction): 843 return self.sql(expression, "this") 844 return super().columndef_sql(expression, sep) 845 846 def join_sql(self, expression: exp.Join) -> str: 847 if ( 848 expression.side == "LEFT" 849 and not expression.args.get("on") 850 and isinstance(expression.this, exp.Unnest) 851 ): 852 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 853 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 854 return super().join_sql(expression.on(exp.true())) 855 856 return super().join_sql(expression) 857 858 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 859 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 860 if expression.args.get("is_end_exclusive"): 861 return rename_func("RANGE")(self, expression) 862 863 return self.function_fallback_sql(expression) 864 865 def bracket_sql(self, expression: exp.Bracket) -> str: 866 this = expression.this 867 if isinstance(this, exp.Array): 868 this.replace(exp.paren(this)) 869 870 bracket = super().bracket_sql(expression) 871 872 if not expression.args.get("returns_list_for_maps"): 873 if not this.type: 874 from sqlglot.optimizer.annotate_types import annotate_types 875 876 this = annotate_types(this) 877 878 if this.is_type(exp.DataType.Type.MAP): 879 bracket = f"({bracket})[1]" 880 881 return bracket 882 883 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 884 expression_sql = self.sql(expression, "expression") 885 886 func = expression.this 887 if isinstance(func, exp.PERCENTILES): 888 # Make the order key the first arg and slide the fraction to the right 889 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 890 order_col = expression.find(exp.Ordered) 891 if order_col: 892 func.set("expression", func.this) 893 func.set("this", order_col.this) 894 895 this = self.sql(expression, "this").rstrip(")") 896 897 return f"{this}{expression_sql})" 898 899 def length_sql(self, expression: exp.Length) -> str: 900 arg = expression.this 901 902 # Dialects like BQ and Snowflake also accept binary values as args, so 903 # DDB will attempt to infer the type or resort to case/when resolution 904 if not expression.args.get("binary") or arg.is_string: 905 return self.func("LENGTH", arg) 906 907 if not arg.type: 908 from sqlglot.optimizer.annotate_types import annotate_types 909 910 arg = annotate_types(arg) 911 912 if arg.is_type(*exp.DataType.TEXT_TYPES): 913 return self.func("LENGTH", arg) 914 915 # We need these casts to make duckdb's static type checker happy 916 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 917 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 918 919 case = ( 920 exp.case(self.func("TYPEOF", arg)) 921 .when( 922 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 923 ) # anonymous to break length_sql recursion 924 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 925 ) 926 927 return self.sql(case) 928 929 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 930 this = expression.this 931 key = expression.args.get("key") 932 key_sql = key.name if isinstance(key, exp.Expression) else "" 933 value_sql = self.sql(expression, "value") 934 935 kv_sql = f"{key_sql} := {value_sql}" 936 937 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 938 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 939 if isinstance(this, exp.Struct) and not this.expressions: 940 return self.func("STRUCT_PACK", kv_sql) 941 942 return self.func("STRUCT_INSERT", this, kv_sql) 943 944 def unnest_sql(self, expression: exp.Unnest) -> str: 945 explode_array = expression.args.get("explode_array") 946 if explode_array: 947 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 948 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 949 expression.expressions.append( 950 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 951 ) 952 953 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 954 alias = expression.args.get("alias") 955 if alias: 956 expression.set("alias", None) 957 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 958 959 unnest_sql = super().unnest_sql(expression) 960 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 961 return self.sql(select) 962 963 return super().unnest_sql(expression) 964 965 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 966 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 967 # DuckDB should render IGNORE NULLS only for the general-purpose 968 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 969 return super().ignorenulls_sql(expression) 970 971 return self.sql(expression, "this") 972 973 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 974 this = self.sql(expression, "this") 975 null_text = self.sql(expression, "null") 976 977 if null_text: 978 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 979 980 return self.func("ARRAY_TO_STRING", this, expression.expression) 981 982 @unsupported_args("position", "occurrence") 983 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 984 group = expression.args.get("group") 985 params = expression.args.get("parameters") 986 987 # Do not render group if there is no following argument, 988 # and it's the default value for this dialect 989 if ( 990 not params 991 and group 992 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 993 ): 994 group = None 995 return self.func( 996 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 997 )
276class DuckDB(Dialect): 277 NULL_ORDERING = "nulls_are_last" 278 SUPPORTS_USER_DEFINED_TYPES = False 279 SAFE_DIVISION = True 280 INDEX_OFFSET = 1 281 CONCAT_COALESCE = True 282 SUPPORTS_ORDER_BY_ALL = True 283 SUPPORTS_FIXED_SIZE_ARRAYS = True 284 STRICT_JSON_PATH_SYNTAX = False 285 286 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 287 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 288 289 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 290 if isinstance(path, exp.Literal): 291 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 292 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 293 # This check ensures we'll avoid trying to parse these as JSON paths, which can 294 # either result in a noisy warning or in an invalid representation of the path. 295 path_text = path.name 296 if path_text.startswith("/") or "[#" in path_text: 297 return path 298 299 return super().to_json_path(path) 300 301 class Tokenizer(tokens.Tokenizer): 302 HEREDOC_STRINGS = ["$"] 303 304 HEREDOC_TAG_IS_IDENTIFIER = True 305 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 306 307 KEYWORDS = { 308 **tokens.Tokenizer.KEYWORDS, 309 "//": TokenType.DIV, 310 "**": TokenType.DSTAR, 311 "^@": TokenType.CARET_AT, 312 "@>": TokenType.AT_GT, 313 "<@": TokenType.LT_AT, 314 "ATTACH": TokenType.COMMAND, 315 "BINARY": TokenType.VARBINARY, 316 "BITSTRING": TokenType.BIT, 317 "BPCHAR": TokenType.TEXT, 318 "CHAR": TokenType.TEXT, 319 "CHARACTER VARYING": TokenType.TEXT, 320 "DETACH": TokenType.COMMAND, 321 "EXCLUDE": TokenType.EXCEPT, 322 "LOGICAL": TokenType.BOOLEAN, 323 "ONLY": TokenType.ONLY, 324 "PIVOT_WIDER": TokenType.PIVOT, 325 "POSITIONAL": TokenType.POSITIONAL, 326 "SIGNED": TokenType.INT, 327 "STRING": TokenType.TEXT, 328 "SUMMARIZE": TokenType.SUMMARIZE, 329 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 330 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 331 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 332 "TIMESTAMP_US": TokenType.TIMESTAMP, 333 "UBIGINT": TokenType.UBIGINT, 334 "UINTEGER": TokenType.UINT, 335 "USMALLINT": TokenType.USMALLINT, 336 "UTINYINT": TokenType.UTINYINT, 337 "VARCHAR": TokenType.TEXT, 338 } 339 KEYWORDS.pop("/*+") 340 341 SINGLE_TOKENS = { 342 **tokens.Tokenizer.SINGLE_TOKENS, 343 "$": TokenType.PARAMETER, 344 } 345 346 class Parser(parser.Parser): 347 BITWISE = { 348 **parser.Parser.BITWISE, 349 TokenType.TILDA: exp.RegexpLike, 350 } 351 BITWISE.pop(TokenType.CARET) 352 353 RANGE_PARSERS = { 354 **parser.Parser.RANGE_PARSERS, 355 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 356 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 357 } 358 359 EXPONENT = { 360 **parser.Parser.EXPONENT, 361 TokenType.CARET: exp.Pow, 362 TokenType.DSTAR: exp.Pow, 363 } 364 365 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 366 367 FUNCTIONS = { 368 **parser.Parser.FUNCTIONS, 369 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 370 "ARRAY_SORT": exp.SortArray.from_arg_list, 371 "DATEDIFF": _build_date_diff, 372 "DATE_DIFF": _build_date_diff, 373 "DATE_TRUNC": date_trunc_to_time, 374 "DATETRUNC": date_trunc_to_time, 375 "DECODE": lambda args: exp.Decode( 376 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 377 ), 378 "ENCODE": lambda args: exp.Encode( 379 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 380 ), 381 "EPOCH": exp.TimeToUnix.from_arg_list, 382 "EPOCH_MS": lambda args: exp.UnixToTime( 383 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 384 ), 385 "JSON": exp.ParseJSON.from_arg_list, 386 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 387 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 388 "LIST_HAS": exp.ArrayContains.from_arg_list, 389 "LIST_REVERSE_SORT": _build_sort_array_desc, 390 "LIST_SORT": exp.SortArray.from_arg_list, 391 "LIST_VALUE": lambda args: exp.Array(expressions=args), 392 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 393 "MAKE_TIMESTAMP": _build_make_timestamp, 394 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 395 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 396 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 397 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 398 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 399 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 400 this=seq_get(args, 0), 401 expression=seq_get(args, 1), 402 replacement=seq_get(args, 2), 403 modifiers=seq_get(args, 3), 404 ), 405 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 406 "STRING_SPLIT": exp.Split.from_arg_list, 407 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 408 "STRING_TO_ARRAY": exp.Split.from_arg_list, 409 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 410 "STRUCT_PACK": exp.Struct.from_arg_list, 411 "STR_SPLIT": exp.Split.from_arg_list, 412 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 413 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 414 "UNNEST": exp.Explode.from_arg_list, 415 "XOR": binary_from_function(exp.BitwiseXor), 416 "GENERATE_SERIES": _build_generate_series(), 417 "RANGE": _build_generate_series(end_exclusive=True), 418 "EDITDIST3": exp.Levenshtein.from_arg_list, 419 } 420 421 FUNCTIONS.pop("DATE_SUB") 422 FUNCTIONS.pop("GLOB") 423 424 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 425 FUNCTION_PARSERS.pop("DECODE") 426 427 NO_PAREN_FUNCTION_PARSERS = { 428 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 429 "MAP": lambda self: self._parse_map(), 430 } 431 432 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 433 TokenType.SEMI, 434 TokenType.ANTI, 435 } 436 437 PLACEHOLDER_PARSERS = { 438 **parser.Parser.PLACEHOLDER_PARSERS, 439 TokenType.PARAMETER: lambda self: ( 440 self.expression(exp.Placeholder, this=self._prev.text) 441 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 442 else None 443 ), 444 } 445 446 TYPE_CONVERTERS = { 447 # https://duckdb.org/docs/sql/data_types/numeric 448 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 449 # https://duckdb.org/docs/sql/data_types/text 450 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 451 } 452 453 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 454 # https://duckdb.org/docs/sql/samples.html 455 sample = super()._parse_table_sample(as_modifier=as_modifier) 456 if sample and not sample.args.get("method"): 457 if sample.args.get("size"): 458 sample.set("method", exp.var("RESERVOIR")) 459 else: 460 sample.set("method", exp.var("SYSTEM")) 461 462 return sample 463 464 def _parse_bracket( 465 self, this: t.Optional[exp.Expression] = None 466 ) -> t.Optional[exp.Expression]: 467 bracket = super()._parse_bracket(this) 468 if isinstance(bracket, exp.Bracket): 469 bracket.set("returns_list_for_maps", True) 470 471 return bracket 472 473 def _parse_map(self) -> exp.ToMap | exp.Map: 474 if self._match(TokenType.L_BRACE, advance=False): 475 return self.expression(exp.ToMap, this=self._parse_bracket()) 476 477 args = self._parse_wrapped_csv(self._parse_assignment) 478 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 479 480 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 481 return self._parse_field_def() 482 483 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 484 if len(aggregations) == 1: 485 return super()._pivot_column_names(aggregations) 486 return pivot_column_names(aggregations, dialect="duckdb") 487 488 class Generator(generator.Generator): 489 PARAMETER_TOKEN = "$" 490 NAMED_PLACEHOLDER_TOKEN = "$" 491 JOIN_HINTS = False 492 TABLE_HINTS = False 493 QUERY_HINTS = False 494 LIMIT_FETCH = "LIMIT" 495 STRUCT_DELIMITER = ("(", ")") 496 RENAME_TABLE_WITH_DB = False 497 NVL2_SUPPORTED = False 498 SEMI_ANTI_JOIN_WITH_SIDE = False 499 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 500 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 501 LAST_DAY_SUPPORTS_DATE_PART = False 502 JSON_KEY_VALUE_PAIR_SEP = "," 503 IGNORE_NULLS_IN_FUNC = True 504 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 505 SUPPORTS_CREATE_TABLE_LIKE = False 506 MULTI_ARG_DISTINCT = False 507 CAN_IMPLEMENT_ARRAY_ANY = True 508 SUPPORTS_TO_NUMBER = False 509 COPY_HAS_INTO_KEYWORD = False 510 STAR_EXCEPT = "EXCLUDE" 511 PAD_FILL_PATTERN_IS_REQUIRED = True 512 ARRAY_CONCAT_IS_VAR_LEN = False 513 ARRAY_SIZE_DIM_REQUIRED = False 514 515 TRANSFORMS = { 516 **generator.Generator.TRANSFORMS, 517 exp.ApproxDistinct: approx_count_distinct_sql, 518 exp.Array: inline_array_unless_query, 519 exp.ArrayFilter: rename_func("LIST_FILTER"), 520 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 521 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 522 exp.ArraySort: _array_sort_sql, 523 exp.ArraySum: rename_func("LIST_SUM"), 524 exp.BitwiseXor: rename_func("XOR"), 525 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 526 exp.CurrentDate: lambda *_: "CURRENT_DATE", 527 exp.CurrentTime: lambda *_: "CURRENT_TIME", 528 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 529 exp.DayOfMonth: rename_func("DAYOFMONTH"), 530 exp.DayOfWeek: rename_func("DAYOFWEEK"), 531 exp.DayOfWeekIso: rename_func("ISODOW"), 532 exp.DayOfYear: rename_func("DAYOFYEAR"), 533 exp.DataType: _datatype_sql, 534 exp.Date: _date_sql, 535 exp.DateAdd: _date_delta_sql, 536 exp.DateFromParts: rename_func("MAKE_DATE"), 537 exp.DateSub: _date_delta_sql, 538 exp.DateDiff: _date_diff_sql, 539 exp.DateStrToDate: datestrtodate_sql, 540 exp.Datetime: no_datetime_sql, 541 exp.DatetimeSub: _date_delta_sql, 542 exp.DatetimeAdd: _date_delta_sql, 543 exp.DateToDi: lambda self, 544 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 545 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 546 exp.DiToDate: lambda self, 547 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 548 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 549 exp.GenerateDateArray: _generate_datetime_array_sql, 550 exp.GenerateTimestampArray: _generate_datetime_array_sql, 551 exp.Explode: rename_func("UNNEST"), 552 exp.IntDiv: lambda self, e: self.binary(e, "//"), 553 exp.IsInf: rename_func("ISINF"), 554 exp.IsNan: rename_func("ISNAN"), 555 exp.JSONBExists: rename_func("JSON_EXISTS"), 556 exp.JSONExtract: _arrow_json_extract_sql, 557 exp.JSONExtractArray: _json_extract_value_array_sql, 558 exp.JSONExtractScalar: _arrow_json_extract_sql, 559 exp.JSONFormat: _json_format_sql, 560 exp.JSONValueArray: _json_extract_value_array_sql, 561 exp.Lateral: explode_to_unnest_sql, 562 exp.LogicalOr: rename_func("BOOL_OR"), 563 exp.LogicalAnd: rename_func("BOOL_AND"), 564 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 565 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 566 exp.MonthsBetween: lambda self, e: self.func( 567 "DATEDIFF", 568 "'month'", 569 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 570 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 571 ), 572 exp.PercentileCont: rename_func("QUANTILE_CONT"), 573 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 574 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 575 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 576 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 577 exp.RegexpReplace: lambda self, e: self.func( 578 "REGEXP_REPLACE", 579 e.this, 580 e.expression, 581 e.args.get("replacement"), 582 e.args.get("modifiers"), 583 ), 584 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 585 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 586 exp.Return: lambda self, e: self.sql(e, "this"), 587 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 588 exp.Rand: rename_func("RANDOM"), 589 exp.SafeDivide: no_safe_divide_sql, 590 exp.SHA: rename_func("SHA1"), 591 exp.SHA2: sha256_sql, 592 exp.Split: rename_func("STR_SPLIT"), 593 exp.SortArray: _sort_array_sql, 594 exp.StrPosition: str_position_sql, 595 exp.StrToUnix: lambda self, e: self.func( 596 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 597 ), 598 exp.Struct: _struct_sql, 599 exp.Transform: rename_func("LIST_TRANSFORM"), 600 exp.TimeAdd: _date_delta_sql, 601 exp.Time: no_time_sql, 602 exp.TimeDiff: _timediff_sql, 603 exp.Timestamp: no_timestamp_sql, 604 exp.TimestampDiff: lambda self, e: self.func( 605 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 606 ), 607 exp.TimestampTrunc: timestamptrunc_sql(), 608 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 609 exp.TimeStrToTime: timestrtotime_sql, 610 exp.TimeStrToUnix: lambda self, e: self.func( 611 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 612 ), 613 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 614 exp.TimeToUnix: rename_func("EPOCH"), 615 exp.TsOrDiToDi: lambda self, 616 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 617 exp.TsOrDsAdd: _date_delta_sql, 618 exp.TsOrDsDiff: lambda self, e: self.func( 619 "DATE_DIFF", 620 f"'{e.args.get('unit') or 'DAY'}'", 621 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 622 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 623 ), 624 exp.UnixToStr: lambda self, e: self.func( 625 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 626 ), 627 exp.DatetimeTrunc: lambda self, e: self.func( 628 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 629 ), 630 exp.UnixToTime: _unix_to_time_sql, 631 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 632 exp.VariancePop: rename_func("VAR_POP"), 633 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 634 exp.Xor: bool_xor_sql, 635 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 636 rename_func("LEVENSHTEIN") 637 ), 638 } 639 640 SUPPORTED_JSON_PATH_PARTS = { 641 exp.JSONPathKey, 642 exp.JSONPathRoot, 643 exp.JSONPathSubscript, 644 exp.JSONPathWildcard, 645 } 646 647 TYPE_MAPPING = { 648 **generator.Generator.TYPE_MAPPING, 649 exp.DataType.Type.BINARY: "BLOB", 650 exp.DataType.Type.BPCHAR: "TEXT", 651 exp.DataType.Type.CHAR: "TEXT", 652 exp.DataType.Type.DATETIME: "TIMESTAMP", 653 exp.DataType.Type.FLOAT: "REAL", 654 exp.DataType.Type.NCHAR: "TEXT", 655 exp.DataType.Type.NVARCHAR: "TEXT", 656 exp.DataType.Type.UINT: "UINTEGER", 657 exp.DataType.Type.VARBINARY: "BLOB", 658 exp.DataType.Type.ROWVERSION: "BLOB", 659 exp.DataType.Type.VARCHAR: "TEXT", 660 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 661 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 662 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 663 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 664 } 665 666 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 667 RESERVED_KEYWORDS = { 668 "array", 669 "analyse", 670 "union", 671 "all", 672 "when", 673 "in_p", 674 "default", 675 "create_p", 676 "window", 677 "asymmetric", 678 "to", 679 "else", 680 "localtime", 681 "from", 682 "end_p", 683 "select", 684 "current_date", 685 "foreign", 686 "with", 687 "grant", 688 "session_user", 689 "or", 690 "except", 691 "references", 692 "fetch", 693 "limit", 694 "group_p", 695 "leading", 696 "into", 697 "collate", 698 "offset", 699 "do", 700 "then", 701 "localtimestamp", 702 "check_p", 703 "lateral_p", 704 "current_role", 705 "where", 706 "asc_p", 707 "placing", 708 "desc_p", 709 "user", 710 "unique", 711 "initially", 712 "column", 713 "both", 714 "some", 715 "as", 716 "any", 717 "only", 718 "deferrable", 719 "null_p", 720 "current_time", 721 "true_p", 722 "table", 723 "case", 724 "trailing", 725 "variadic", 726 "for", 727 "on", 728 "distinct", 729 "false_p", 730 "not", 731 "constraint", 732 "current_timestamp", 733 "returning", 734 "primary", 735 "intersect", 736 "having", 737 "analyze", 738 "current_user", 739 "and", 740 "cast", 741 "symmetric", 742 "using", 743 "order", 744 "current_catalog", 745 } 746 747 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 748 749 # DuckDB doesn't generally support CREATE TABLE .. properties 750 # https://duckdb.org/docs/sql/statements/create_table.html 751 PROPERTIES_LOCATION = { 752 prop: exp.Properties.Location.UNSUPPORTED 753 for prop in generator.Generator.PROPERTIES_LOCATION 754 } 755 756 # There are a few exceptions (e.g. temporary tables) which are supported or 757 # can be transpiled to DuckDB, so we explicitly override them accordingly 758 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 759 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 760 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 761 762 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 763 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 764 765 def strtotime_sql(self, expression: exp.StrToTime) -> str: 766 if expression.args.get("safe"): 767 formatted_time = self.format_time(expression) 768 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 769 return str_to_time_sql(self, expression) 770 771 def strtodate_sql(self, expression: exp.StrToDate) -> str: 772 if expression.args.get("safe"): 773 formatted_time = self.format_time(expression) 774 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 775 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 776 777 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 778 arg = expression.this 779 if expression.args.get("safe"): 780 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 781 return self.func("JSON", arg) 782 783 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 784 nano = expression.args.get("nano") 785 if nano is not None: 786 expression.set( 787 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 788 ) 789 790 return rename_func("MAKE_TIME")(self, expression) 791 792 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 793 sec = expression.args["sec"] 794 795 milli = expression.args.get("milli") 796 if milli is not None: 797 sec += milli.pop() / exp.Literal.number(1000.0) 798 799 nano = expression.args.get("nano") 800 if nano is not None: 801 sec += nano.pop() / exp.Literal.number(1000000000.0) 802 803 if milli or nano: 804 expression.set("sec", sec) 805 806 return rename_func("MAKE_TIMESTAMP")(self, expression) 807 808 def tablesample_sql( 809 self, 810 expression: exp.TableSample, 811 tablesample_keyword: t.Optional[str] = None, 812 ) -> str: 813 if not isinstance(expression.parent, exp.Select): 814 # This sample clause only applies to a single source, not the entire resulting relation 815 tablesample_keyword = "TABLESAMPLE" 816 817 if expression.args.get("size"): 818 method = expression.args.get("method") 819 if method and method.name.upper() != "RESERVOIR": 820 self.unsupported( 821 f"Sampling method {method} is not supported with a discrete sample count, " 822 "defaulting to reservoir sampling" 823 ) 824 expression.set("method", exp.var("RESERVOIR")) 825 826 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 827 828 def interval_sql(self, expression: exp.Interval) -> str: 829 multiplier: t.Optional[int] = None 830 unit = expression.text("unit").lower() 831 832 if unit.startswith("week"): 833 multiplier = 7 834 if unit.startswith("quarter"): 835 multiplier = 90 836 837 if multiplier: 838 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 839 840 return super().interval_sql(expression) 841 842 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 843 if isinstance(expression.parent, exp.UserDefinedFunction): 844 return self.sql(expression, "this") 845 return super().columndef_sql(expression, sep) 846 847 def join_sql(self, expression: exp.Join) -> str: 848 if ( 849 expression.side == "LEFT" 850 and not expression.args.get("on") 851 and isinstance(expression.this, exp.Unnest) 852 ): 853 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 854 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 855 return super().join_sql(expression.on(exp.true())) 856 857 return super().join_sql(expression) 858 859 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 860 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 861 if expression.args.get("is_end_exclusive"): 862 return rename_func("RANGE")(self, expression) 863 864 return self.function_fallback_sql(expression) 865 866 def bracket_sql(self, expression: exp.Bracket) -> str: 867 this = expression.this 868 if isinstance(this, exp.Array): 869 this.replace(exp.paren(this)) 870 871 bracket = super().bracket_sql(expression) 872 873 if not expression.args.get("returns_list_for_maps"): 874 if not this.type: 875 from sqlglot.optimizer.annotate_types import annotate_types 876 877 this = annotate_types(this) 878 879 if this.is_type(exp.DataType.Type.MAP): 880 bracket = f"({bracket})[1]" 881 882 return bracket 883 884 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 885 expression_sql = self.sql(expression, "expression") 886 887 func = expression.this 888 if isinstance(func, exp.PERCENTILES): 889 # Make the order key the first arg and slide the fraction to the right 890 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 891 order_col = expression.find(exp.Ordered) 892 if order_col: 893 func.set("expression", func.this) 894 func.set("this", order_col.this) 895 896 this = self.sql(expression, "this").rstrip(")") 897 898 return f"{this}{expression_sql})" 899 900 def length_sql(self, expression: exp.Length) -> str: 901 arg = expression.this 902 903 # Dialects like BQ and Snowflake also accept binary values as args, so 904 # DDB will attempt to infer the type or resort to case/when resolution 905 if not expression.args.get("binary") or arg.is_string: 906 return self.func("LENGTH", arg) 907 908 if not arg.type: 909 from sqlglot.optimizer.annotate_types import annotate_types 910 911 arg = annotate_types(arg) 912 913 if arg.is_type(*exp.DataType.TEXT_TYPES): 914 return self.func("LENGTH", arg) 915 916 # We need these casts to make duckdb's static type checker happy 917 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 918 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 919 920 case = ( 921 exp.case(self.func("TYPEOF", arg)) 922 .when( 923 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 924 ) # anonymous to break length_sql recursion 925 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 926 ) 927 928 return self.sql(case) 929 930 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 931 this = expression.this 932 key = expression.args.get("key") 933 key_sql = key.name if isinstance(key, exp.Expression) else "" 934 value_sql = self.sql(expression, "value") 935 936 kv_sql = f"{key_sql} := {value_sql}" 937 938 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 939 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 940 if isinstance(this, exp.Struct) and not this.expressions: 941 return self.func("STRUCT_PACK", kv_sql) 942 943 return self.func("STRUCT_INSERT", this, kv_sql) 944 945 def unnest_sql(self, expression: exp.Unnest) -> str: 946 explode_array = expression.args.get("explode_array") 947 if explode_array: 948 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 949 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 950 expression.expressions.append( 951 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 952 ) 953 954 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 955 alias = expression.args.get("alias") 956 if alias: 957 expression.set("alias", None) 958 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 959 960 unnest_sql = super().unnest_sql(expression) 961 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 962 return self.sql(select) 963 964 return super().unnest_sql(expression) 965 966 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 967 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 968 # DuckDB should render IGNORE NULLS only for the general-purpose 969 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 970 return super().ignorenulls_sql(expression) 971 972 return self.sql(expression, "this") 973 974 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 975 this = self.sql(expression, "this") 976 null_text = self.sql(expression, "null") 977 978 if null_text: 979 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 980 981 return self.func("ARRAY_TO_STRING", this, expression.expression) 982 983 @unsupported_args("position", "occurrence") 984 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 985 group = expression.args.get("group") 986 params = expression.args.get("parameters") 987 988 # Do not render group if there is no following argument, 989 # and it's the default value for this dialect 990 if ( 991 not params 992 and group 993 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 994 ): 995 group = None 996 return self.func( 997 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 998 )
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Specifies the strategy according to which identifiers should be normalized.
289 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 290 if isinstance(path, exp.Literal): 291 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 292 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 293 # This check ensures we'll avoid trying to parse these as JSON paths, which can 294 # either result in a noisy warning or in an invalid representation of the path. 295 path_text = path.name 296 if path_text.startswith("/") or "[#" in path_text: 297 return path 298 299 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
301 class Tokenizer(tokens.Tokenizer): 302 HEREDOC_STRINGS = ["$"] 303 304 HEREDOC_TAG_IS_IDENTIFIER = True 305 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 306 307 KEYWORDS = { 308 **tokens.Tokenizer.KEYWORDS, 309 "//": TokenType.DIV, 310 "**": TokenType.DSTAR, 311 "^@": TokenType.CARET_AT, 312 "@>": TokenType.AT_GT, 313 "<@": TokenType.LT_AT, 314 "ATTACH": TokenType.COMMAND, 315 "BINARY": TokenType.VARBINARY, 316 "BITSTRING": TokenType.BIT, 317 "BPCHAR": TokenType.TEXT, 318 "CHAR": TokenType.TEXT, 319 "CHARACTER VARYING": TokenType.TEXT, 320 "DETACH": TokenType.COMMAND, 321 "EXCLUDE": TokenType.EXCEPT, 322 "LOGICAL": TokenType.BOOLEAN, 323 "ONLY": TokenType.ONLY, 324 "PIVOT_WIDER": TokenType.PIVOT, 325 "POSITIONAL": TokenType.POSITIONAL, 326 "SIGNED": TokenType.INT, 327 "STRING": TokenType.TEXT, 328 "SUMMARIZE": TokenType.SUMMARIZE, 329 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 330 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 331 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 332 "TIMESTAMP_US": TokenType.TIMESTAMP, 333 "UBIGINT": TokenType.UBIGINT, 334 "UINTEGER": TokenType.UINT, 335 "USMALLINT": TokenType.USMALLINT, 336 "UTINYINT": TokenType.UTINYINT, 337 "VARCHAR": TokenType.TEXT, 338 } 339 KEYWORDS.pop("/*+") 340 341 SINGLE_TOKENS = { 342 **tokens.Tokenizer.SINGLE_TOKENS, 343 "$": TokenType.PARAMETER, 344 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
346 class Parser(parser.Parser): 347 BITWISE = { 348 **parser.Parser.BITWISE, 349 TokenType.TILDA: exp.RegexpLike, 350 } 351 BITWISE.pop(TokenType.CARET) 352 353 RANGE_PARSERS = { 354 **parser.Parser.RANGE_PARSERS, 355 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 356 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 357 } 358 359 EXPONENT = { 360 **parser.Parser.EXPONENT, 361 TokenType.CARET: exp.Pow, 362 TokenType.DSTAR: exp.Pow, 363 } 364 365 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 366 367 FUNCTIONS = { 368 **parser.Parser.FUNCTIONS, 369 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 370 "ARRAY_SORT": exp.SortArray.from_arg_list, 371 "DATEDIFF": _build_date_diff, 372 "DATE_DIFF": _build_date_diff, 373 "DATE_TRUNC": date_trunc_to_time, 374 "DATETRUNC": date_trunc_to_time, 375 "DECODE": lambda args: exp.Decode( 376 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 377 ), 378 "ENCODE": lambda args: exp.Encode( 379 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 380 ), 381 "EPOCH": exp.TimeToUnix.from_arg_list, 382 "EPOCH_MS": lambda args: exp.UnixToTime( 383 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 384 ), 385 "JSON": exp.ParseJSON.from_arg_list, 386 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 387 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 388 "LIST_HAS": exp.ArrayContains.from_arg_list, 389 "LIST_REVERSE_SORT": _build_sort_array_desc, 390 "LIST_SORT": exp.SortArray.from_arg_list, 391 "LIST_VALUE": lambda args: exp.Array(expressions=args), 392 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 393 "MAKE_TIMESTAMP": _build_make_timestamp, 394 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 395 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 396 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 397 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 398 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 399 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 400 this=seq_get(args, 0), 401 expression=seq_get(args, 1), 402 replacement=seq_get(args, 2), 403 modifiers=seq_get(args, 3), 404 ), 405 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 406 "STRING_SPLIT": exp.Split.from_arg_list, 407 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 408 "STRING_TO_ARRAY": exp.Split.from_arg_list, 409 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 410 "STRUCT_PACK": exp.Struct.from_arg_list, 411 "STR_SPLIT": exp.Split.from_arg_list, 412 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 413 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 414 "UNNEST": exp.Explode.from_arg_list, 415 "XOR": binary_from_function(exp.BitwiseXor), 416 "GENERATE_SERIES": _build_generate_series(), 417 "RANGE": _build_generate_series(end_exclusive=True), 418 "EDITDIST3": exp.Levenshtein.from_arg_list, 419 } 420 421 FUNCTIONS.pop("DATE_SUB") 422 FUNCTIONS.pop("GLOB") 423 424 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 425 FUNCTION_PARSERS.pop("DECODE") 426 427 NO_PAREN_FUNCTION_PARSERS = { 428 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 429 "MAP": lambda self: self._parse_map(), 430 } 431 432 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 433 TokenType.SEMI, 434 TokenType.ANTI, 435 } 436 437 PLACEHOLDER_PARSERS = { 438 **parser.Parser.PLACEHOLDER_PARSERS, 439 TokenType.PARAMETER: lambda self: ( 440 self.expression(exp.Placeholder, this=self._prev.text) 441 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 442 else None 443 ), 444 } 445 446 TYPE_CONVERTERS = { 447 # https://duckdb.org/docs/sql/data_types/numeric 448 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 449 # https://duckdb.org/docs/sql/data_types/text 450 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 451 } 452 453 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 454 # https://duckdb.org/docs/sql/samples.html 455 sample = super()._parse_table_sample(as_modifier=as_modifier) 456 if sample and not sample.args.get("method"): 457 if sample.args.get("size"): 458 sample.set("method", exp.var("RESERVOIR")) 459 else: 460 sample.set("method", exp.var("SYSTEM")) 461 462 return sample 463 464 def _parse_bracket( 465 self, this: t.Optional[exp.Expression] = None 466 ) -> t.Optional[exp.Expression]: 467 bracket = super()._parse_bracket(this) 468 if isinstance(bracket, exp.Bracket): 469 bracket.set("returns_list_for_maps", True) 470 471 return bracket 472 473 def _parse_map(self) -> exp.ToMap | exp.Map: 474 if self._match(TokenType.L_BRACE, advance=False): 475 return self.expression(exp.ToMap, this=self._parse_bracket()) 476 477 args = self._parse_wrapped_csv(self._parse_assignment) 478 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 479 480 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 481 return self._parse_field_def() 482 483 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 484 if len(aggregations) == 1: 485 return super()._pivot_column_names(aggregations) 486 return pivot_column_names(aggregations, dialect="duckdb")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
488 class Generator(generator.Generator): 489 PARAMETER_TOKEN = "$" 490 NAMED_PLACEHOLDER_TOKEN = "$" 491 JOIN_HINTS = False 492 TABLE_HINTS = False 493 QUERY_HINTS = False 494 LIMIT_FETCH = "LIMIT" 495 STRUCT_DELIMITER = ("(", ")") 496 RENAME_TABLE_WITH_DB = False 497 NVL2_SUPPORTED = False 498 SEMI_ANTI_JOIN_WITH_SIDE = False 499 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 500 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 501 LAST_DAY_SUPPORTS_DATE_PART = False 502 JSON_KEY_VALUE_PAIR_SEP = "," 503 IGNORE_NULLS_IN_FUNC = True 504 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 505 SUPPORTS_CREATE_TABLE_LIKE = False 506 MULTI_ARG_DISTINCT = False 507 CAN_IMPLEMENT_ARRAY_ANY = True 508 SUPPORTS_TO_NUMBER = False 509 COPY_HAS_INTO_KEYWORD = False 510 STAR_EXCEPT = "EXCLUDE" 511 PAD_FILL_PATTERN_IS_REQUIRED = True 512 ARRAY_CONCAT_IS_VAR_LEN = False 513 ARRAY_SIZE_DIM_REQUIRED = False 514 515 TRANSFORMS = { 516 **generator.Generator.TRANSFORMS, 517 exp.ApproxDistinct: approx_count_distinct_sql, 518 exp.Array: inline_array_unless_query, 519 exp.ArrayFilter: rename_func("LIST_FILTER"), 520 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 521 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 522 exp.ArraySort: _array_sort_sql, 523 exp.ArraySum: rename_func("LIST_SUM"), 524 exp.BitwiseXor: rename_func("XOR"), 525 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 526 exp.CurrentDate: lambda *_: "CURRENT_DATE", 527 exp.CurrentTime: lambda *_: "CURRENT_TIME", 528 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 529 exp.DayOfMonth: rename_func("DAYOFMONTH"), 530 exp.DayOfWeek: rename_func("DAYOFWEEK"), 531 exp.DayOfWeekIso: rename_func("ISODOW"), 532 exp.DayOfYear: rename_func("DAYOFYEAR"), 533 exp.DataType: _datatype_sql, 534 exp.Date: _date_sql, 535 exp.DateAdd: _date_delta_sql, 536 exp.DateFromParts: rename_func("MAKE_DATE"), 537 exp.DateSub: _date_delta_sql, 538 exp.DateDiff: _date_diff_sql, 539 exp.DateStrToDate: datestrtodate_sql, 540 exp.Datetime: no_datetime_sql, 541 exp.DatetimeSub: _date_delta_sql, 542 exp.DatetimeAdd: _date_delta_sql, 543 exp.DateToDi: lambda self, 544 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 545 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 546 exp.DiToDate: lambda self, 547 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 548 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 549 exp.GenerateDateArray: _generate_datetime_array_sql, 550 exp.GenerateTimestampArray: _generate_datetime_array_sql, 551 exp.Explode: rename_func("UNNEST"), 552 exp.IntDiv: lambda self, e: self.binary(e, "//"), 553 exp.IsInf: rename_func("ISINF"), 554 exp.IsNan: rename_func("ISNAN"), 555 exp.JSONBExists: rename_func("JSON_EXISTS"), 556 exp.JSONExtract: _arrow_json_extract_sql, 557 exp.JSONExtractArray: _json_extract_value_array_sql, 558 exp.JSONExtractScalar: _arrow_json_extract_sql, 559 exp.JSONFormat: _json_format_sql, 560 exp.JSONValueArray: _json_extract_value_array_sql, 561 exp.Lateral: explode_to_unnest_sql, 562 exp.LogicalOr: rename_func("BOOL_OR"), 563 exp.LogicalAnd: rename_func("BOOL_AND"), 564 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 565 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 566 exp.MonthsBetween: lambda self, e: self.func( 567 "DATEDIFF", 568 "'month'", 569 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 570 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 571 ), 572 exp.PercentileCont: rename_func("QUANTILE_CONT"), 573 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 574 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 575 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 576 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 577 exp.RegexpReplace: lambda self, e: self.func( 578 "REGEXP_REPLACE", 579 e.this, 580 e.expression, 581 e.args.get("replacement"), 582 e.args.get("modifiers"), 583 ), 584 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 585 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 586 exp.Return: lambda self, e: self.sql(e, "this"), 587 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 588 exp.Rand: rename_func("RANDOM"), 589 exp.SafeDivide: no_safe_divide_sql, 590 exp.SHA: rename_func("SHA1"), 591 exp.SHA2: sha256_sql, 592 exp.Split: rename_func("STR_SPLIT"), 593 exp.SortArray: _sort_array_sql, 594 exp.StrPosition: str_position_sql, 595 exp.StrToUnix: lambda self, e: self.func( 596 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 597 ), 598 exp.Struct: _struct_sql, 599 exp.Transform: rename_func("LIST_TRANSFORM"), 600 exp.TimeAdd: _date_delta_sql, 601 exp.Time: no_time_sql, 602 exp.TimeDiff: _timediff_sql, 603 exp.Timestamp: no_timestamp_sql, 604 exp.TimestampDiff: lambda self, e: self.func( 605 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 606 ), 607 exp.TimestampTrunc: timestamptrunc_sql(), 608 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 609 exp.TimeStrToTime: timestrtotime_sql, 610 exp.TimeStrToUnix: lambda self, e: self.func( 611 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 612 ), 613 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 614 exp.TimeToUnix: rename_func("EPOCH"), 615 exp.TsOrDiToDi: lambda self, 616 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 617 exp.TsOrDsAdd: _date_delta_sql, 618 exp.TsOrDsDiff: lambda self, e: self.func( 619 "DATE_DIFF", 620 f"'{e.args.get('unit') or 'DAY'}'", 621 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 622 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 623 ), 624 exp.UnixToStr: lambda self, e: self.func( 625 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 626 ), 627 exp.DatetimeTrunc: lambda self, e: self.func( 628 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 629 ), 630 exp.UnixToTime: _unix_to_time_sql, 631 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 632 exp.VariancePop: rename_func("VAR_POP"), 633 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 634 exp.Xor: bool_xor_sql, 635 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 636 rename_func("LEVENSHTEIN") 637 ), 638 } 639 640 SUPPORTED_JSON_PATH_PARTS = { 641 exp.JSONPathKey, 642 exp.JSONPathRoot, 643 exp.JSONPathSubscript, 644 exp.JSONPathWildcard, 645 } 646 647 TYPE_MAPPING = { 648 **generator.Generator.TYPE_MAPPING, 649 exp.DataType.Type.BINARY: "BLOB", 650 exp.DataType.Type.BPCHAR: "TEXT", 651 exp.DataType.Type.CHAR: "TEXT", 652 exp.DataType.Type.DATETIME: "TIMESTAMP", 653 exp.DataType.Type.FLOAT: "REAL", 654 exp.DataType.Type.NCHAR: "TEXT", 655 exp.DataType.Type.NVARCHAR: "TEXT", 656 exp.DataType.Type.UINT: "UINTEGER", 657 exp.DataType.Type.VARBINARY: "BLOB", 658 exp.DataType.Type.ROWVERSION: "BLOB", 659 exp.DataType.Type.VARCHAR: "TEXT", 660 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 661 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 662 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 663 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 664 } 665 666 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 667 RESERVED_KEYWORDS = { 668 "array", 669 "analyse", 670 "union", 671 "all", 672 "when", 673 "in_p", 674 "default", 675 "create_p", 676 "window", 677 "asymmetric", 678 "to", 679 "else", 680 "localtime", 681 "from", 682 "end_p", 683 "select", 684 "current_date", 685 "foreign", 686 "with", 687 "grant", 688 "session_user", 689 "or", 690 "except", 691 "references", 692 "fetch", 693 "limit", 694 "group_p", 695 "leading", 696 "into", 697 "collate", 698 "offset", 699 "do", 700 "then", 701 "localtimestamp", 702 "check_p", 703 "lateral_p", 704 "current_role", 705 "where", 706 "asc_p", 707 "placing", 708 "desc_p", 709 "user", 710 "unique", 711 "initially", 712 "column", 713 "both", 714 "some", 715 "as", 716 "any", 717 "only", 718 "deferrable", 719 "null_p", 720 "current_time", 721 "true_p", 722 "table", 723 "case", 724 "trailing", 725 "variadic", 726 "for", 727 "on", 728 "distinct", 729 "false_p", 730 "not", 731 "constraint", 732 "current_timestamp", 733 "returning", 734 "primary", 735 "intersect", 736 "having", 737 "analyze", 738 "current_user", 739 "and", 740 "cast", 741 "symmetric", 742 "using", 743 "order", 744 "current_catalog", 745 } 746 747 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 748 749 # DuckDB doesn't generally support CREATE TABLE .. properties 750 # https://duckdb.org/docs/sql/statements/create_table.html 751 PROPERTIES_LOCATION = { 752 prop: exp.Properties.Location.UNSUPPORTED 753 for prop in generator.Generator.PROPERTIES_LOCATION 754 } 755 756 # There are a few exceptions (e.g. temporary tables) which are supported or 757 # can be transpiled to DuckDB, so we explicitly override them accordingly 758 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 759 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 760 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 761 762 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 763 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 764 765 def strtotime_sql(self, expression: exp.StrToTime) -> str: 766 if expression.args.get("safe"): 767 formatted_time = self.format_time(expression) 768 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 769 return str_to_time_sql(self, expression) 770 771 def strtodate_sql(self, expression: exp.StrToDate) -> str: 772 if expression.args.get("safe"): 773 formatted_time = self.format_time(expression) 774 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 775 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 776 777 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 778 arg = expression.this 779 if expression.args.get("safe"): 780 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 781 return self.func("JSON", arg) 782 783 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 784 nano = expression.args.get("nano") 785 if nano is not None: 786 expression.set( 787 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 788 ) 789 790 return rename_func("MAKE_TIME")(self, expression) 791 792 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 793 sec = expression.args["sec"] 794 795 milli = expression.args.get("milli") 796 if milli is not None: 797 sec += milli.pop() / exp.Literal.number(1000.0) 798 799 nano = expression.args.get("nano") 800 if nano is not None: 801 sec += nano.pop() / exp.Literal.number(1000000000.0) 802 803 if milli or nano: 804 expression.set("sec", sec) 805 806 return rename_func("MAKE_TIMESTAMP")(self, expression) 807 808 def tablesample_sql( 809 self, 810 expression: exp.TableSample, 811 tablesample_keyword: t.Optional[str] = None, 812 ) -> str: 813 if not isinstance(expression.parent, exp.Select): 814 # This sample clause only applies to a single source, not the entire resulting relation 815 tablesample_keyword = "TABLESAMPLE" 816 817 if expression.args.get("size"): 818 method = expression.args.get("method") 819 if method and method.name.upper() != "RESERVOIR": 820 self.unsupported( 821 f"Sampling method {method} is not supported with a discrete sample count, " 822 "defaulting to reservoir sampling" 823 ) 824 expression.set("method", exp.var("RESERVOIR")) 825 826 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 827 828 def interval_sql(self, expression: exp.Interval) -> str: 829 multiplier: t.Optional[int] = None 830 unit = expression.text("unit").lower() 831 832 if unit.startswith("week"): 833 multiplier = 7 834 if unit.startswith("quarter"): 835 multiplier = 90 836 837 if multiplier: 838 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 839 840 return super().interval_sql(expression) 841 842 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 843 if isinstance(expression.parent, exp.UserDefinedFunction): 844 return self.sql(expression, "this") 845 return super().columndef_sql(expression, sep) 846 847 def join_sql(self, expression: exp.Join) -> str: 848 if ( 849 expression.side == "LEFT" 850 and not expression.args.get("on") 851 and isinstance(expression.this, exp.Unnest) 852 ): 853 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 854 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 855 return super().join_sql(expression.on(exp.true())) 856 857 return super().join_sql(expression) 858 859 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 860 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 861 if expression.args.get("is_end_exclusive"): 862 return rename_func("RANGE")(self, expression) 863 864 return self.function_fallback_sql(expression) 865 866 def bracket_sql(self, expression: exp.Bracket) -> str: 867 this = expression.this 868 if isinstance(this, exp.Array): 869 this.replace(exp.paren(this)) 870 871 bracket = super().bracket_sql(expression) 872 873 if not expression.args.get("returns_list_for_maps"): 874 if not this.type: 875 from sqlglot.optimizer.annotate_types import annotate_types 876 877 this = annotate_types(this) 878 879 if this.is_type(exp.DataType.Type.MAP): 880 bracket = f"({bracket})[1]" 881 882 return bracket 883 884 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 885 expression_sql = self.sql(expression, "expression") 886 887 func = expression.this 888 if isinstance(func, exp.PERCENTILES): 889 # Make the order key the first arg and slide the fraction to the right 890 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 891 order_col = expression.find(exp.Ordered) 892 if order_col: 893 func.set("expression", func.this) 894 func.set("this", order_col.this) 895 896 this = self.sql(expression, "this").rstrip(")") 897 898 return f"{this}{expression_sql})" 899 900 def length_sql(self, expression: exp.Length) -> str: 901 arg = expression.this 902 903 # Dialects like BQ and Snowflake also accept binary values as args, so 904 # DDB will attempt to infer the type or resort to case/when resolution 905 if not expression.args.get("binary") or arg.is_string: 906 return self.func("LENGTH", arg) 907 908 if not arg.type: 909 from sqlglot.optimizer.annotate_types import annotate_types 910 911 arg = annotate_types(arg) 912 913 if arg.is_type(*exp.DataType.TEXT_TYPES): 914 return self.func("LENGTH", arg) 915 916 # We need these casts to make duckdb's static type checker happy 917 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 918 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 919 920 case = ( 921 exp.case(self.func("TYPEOF", arg)) 922 .when( 923 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 924 ) # anonymous to break length_sql recursion 925 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 926 ) 927 928 return self.sql(case) 929 930 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 931 this = expression.this 932 key = expression.args.get("key") 933 key_sql = key.name if isinstance(key, exp.Expression) else "" 934 value_sql = self.sql(expression, "value") 935 936 kv_sql = f"{key_sql} := {value_sql}" 937 938 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 939 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 940 if isinstance(this, exp.Struct) and not this.expressions: 941 return self.func("STRUCT_PACK", kv_sql) 942 943 return self.func("STRUCT_INSERT", this, kv_sql) 944 945 def unnest_sql(self, expression: exp.Unnest) -> str: 946 explode_array = expression.args.get("explode_array") 947 if explode_array: 948 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 949 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 950 expression.expressions.append( 951 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 952 ) 953 954 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 955 alias = expression.args.get("alias") 956 if alias: 957 expression.set("alias", None) 958 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 959 960 unnest_sql = super().unnest_sql(expression) 961 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 962 return self.sql(select) 963 964 return super().unnest_sql(expression) 965 966 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 967 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 968 # DuckDB should render IGNORE NULLS only for the general-purpose 969 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 970 return super().ignorenulls_sql(expression) 971 972 return self.sql(expression, "this") 973 974 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 975 this = self.sql(expression, "this") 976 null_text = self.sql(expression, "null") 977 978 if null_text: 979 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 980 981 return self.func("ARRAY_TO_STRING", this, expression.expression) 982 983 @unsupported_args("position", "occurrence") 984 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 985 group = expression.args.get("group") 986 params = expression.args.get("parameters") 987 988 # Do not render group if there is no following argument, 989 # and it's the default value for this dialect 990 if ( 991 not params 992 and group 993 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 994 ): 995 group = None 996 return self.func( 997 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 998 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
771 def strtodate_sql(self, expression: exp.StrToDate) -> str: 772 if expression.args.get("safe"): 773 formatted_time = self.format_time(expression) 774 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 775 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
783 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 784 nano = expression.args.get("nano") 785 if nano is not None: 786 expression.set( 787 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 788 ) 789 790 return rename_func("MAKE_TIME")(self, expression)
792 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 793 sec = expression.args["sec"] 794 795 milli = expression.args.get("milli") 796 if milli is not None: 797 sec += milli.pop() / exp.Literal.number(1000.0) 798 799 nano = expression.args.get("nano") 800 if nano is not None: 801 sec += nano.pop() / exp.Literal.number(1000000000.0) 802 803 if milli or nano: 804 expression.set("sec", sec) 805 806 return rename_func("MAKE_TIMESTAMP")(self, expression)
808 def tablesample_sql( 809 self, 810 expression: exp.TableSample, 811 tablesample_keyword: t.Optional[str] = None, 812 ) -> str: 813 if not isinstance(expression.parent, exp.Select): 814 # This sample clause only applies to a single source, not the entire resulting relation 815 tablesample_keyword = "TABLESAMPLE" 816 817 if expression.args.get("size"): 818 method = expression.args.get("method") 819 if method and method.name.upper() != "RESERVOIR": 820 self.unsupported( 821 f"Sampling method {method} is not supported with a discrete sample count, " 822 "defaulting to reservoir sampling" 823 ) 824 expression.set("method", exp.var("RESERVOIR")) 825 826 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
828 def interval_sql(self, expression: exp.Interval) -> str: 829 multiplier: t.Optional[int] = None 830 unit = expression.text("unit").lower() 831 832 if unit.startswith("week"): 833 multiplier = 7 834 if unit.startswith("quarter"): 835 multiplier = 90 836 837 if multiplier: 838 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 839 840 return super().interval_sql(expression)
847 def join_sql(self, expression: exp.Join) -> str: 848 if ( 849 expression.side == "LEFT" 850 and not expression.args.get("on") 851 and isinstance(expression.this, exp.Unnest) 852 ): 853 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 854 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 855 return super().join_sql(expression.on(exp.true())) 856 857 return super().join_sql(expression)
866 def bracket_sql(self, expression: exp.Bracket) -> str: 867 this = expression.this 868 if isinstance(this, exp.Array): 869 this.replace(exp.paren(this)) 870 871 bracket = super().bracket_sql(expression) 872 873 if not expression.args.get("returns_list_for_maps"): 874 if not this.type: 875 from sqlglot.optimizer.annotate_types import annotate_types 876 877 this = annotate_types(this) 878 879 if this.is_type(exp.DataType.Type.MAP): 880 bracket = f"({bracket})[1]" 881 882 return bracket
884 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 885 expression_sql = self.sql(expression, "expression") 886 887 func = expression.this 888 if isinstance(func, exp.PERCENTILES): 889 # Make the order key the first arg and slide the fraction to the right 890 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 891 order_col = expression.find(exp.Ordered) 892 if order_col: 893 func.set("expression", func.this) 894 func.set("this", order_col.this) 895 896 this = self.sql(expression, "this").rstrip(")") 897 898 return f"{this}{expression_sql})"
900 def length_sql(self, expression: exp.Length) -> str: 901 arg = expression.this 902 903 # Dialects like BQ and Snowflake also accept binary values as args, so 904 # DDB will attempt to infer the type or resort to case/when resolution 905 if not expression.args.get("binary") or arg.is_string: 906 return self.func("LENGTH", arg) 907 908 if not arg.type: 909 from sqlglot.optimizer.annotate_types import annotate_types 910 911 arg = annotate_types(arg) 912 913 if arg.is_type(*exp.DataType.TEXT_TYPES): 914 return self.func("LENGTH", arg) 915 916 # We need these casts to make duckdb's static type checker happy 917 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 918 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 919 920 case = ( 921 exp.case(self.func("TYPEOF", arg)) 922 .when( 923 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 924 ) # anonymous to break length_sql recursion 925 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 926 ) 927 928 return self.sql(case)
930 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 931 this = expression.this 932 key = expression.args.get("key") 933 key_sql = key.name if isinstance(key, exp.Expression) else "" 934 value_sql = self.sql(expression, "value") 935 936 kv_sql = f"{key_sql} := {value_sql}" 937 938 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 939 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 940 if isinstance(this, exp.Struct) and not this.expressions: 941 return self.func("STRUCT_PACK", kv_sql) 942 943 return self.func("STRUCT_INSERT", this, kv_sql)
945 def unnest_sql(self, expression: exp.Unnest) -> str: 946 explode_array = expression.args.get("explode_array") 947 if explode_array: 948 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 949 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 950 expression.expressions.append( 951 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 952 ) 953 954 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 955 alias = expression.args.get("alias") 956 if alias: 957 expression.set("alias", None) 958 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 959 960 unnest_sql = super().unnest_sql(expression) 961 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 962 return self.sql(select) 963 964 return super().unnest_sql(expression)
966 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 967 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 968 # DuckDB should render IGNORE NULLS only for the general-purpose 969 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 970 return super().ignorenulls_sql(expression) 971 972 return self.sql(expression, "this")
974 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 975 this = self.sql(expression, "this") 976 null_text = self.sql(expression, "null") 977 978 if null_text: 979 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 980 981 return self.func("ARRAY_TO_STRING", this, expression.expression)
983 @unsupported_args("position", "occurrence") 984 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 985 group = expression.args.get("group") 986 params = expression.args.get("parameters") 987 988 # Do not render group if there is no following argument, 989 # and it's the default value for this dialect 990 if ( 991 not params 992 and group 993 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 994 ): 995 group = None 996 return self.func( 997 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 998 )
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql