sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 annotate_with_type_lambda, 10 build_timetostr_or_tochar, 11 binary_from_function, 12 build_default_decimal_type, 13 build_replace_with_optional_replacement, 14 build_timestamp_from_parts, 15 date_delta_sql, 16 date_trunc_to_time, 17 datestrtodate_sql, 18 build_formatted_time, 19 if_sql, 20 inline_array_sql, 21 max_or_greatest, 22 min_or_least, 23 rename_func, 24 timestamptrunc_sql, 25 timestrtotime_sql, 26 var_map_sql, 27 map_date_part, 28 no_timestamp_sql, 29 strposition_sql, 30 timestampdiff_sql, 31 no_make_interval_sql, 32 groupconcat_sql, 33) 34from sqlglot.generator import unsupported_args 35from sqlglot.helper import find_new_name, flatten, is_float, is_int, seq_get 36from sqlglot.optimizer.annotate_types import TypeAnnotator 37from sqlglot.optimizer.scope import build_scope, find_all_in_scope 38from sqlglot.tokens import TokenType 39 40if t.TYPE_CHECKING: 41 from sqlglot._typing import E, B 42 43 44# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 45def _build_datetime( 46 name: str, kind: exp.DataType.Type, safe: bool = False 47) -> t.Callable[[t.List], exp.Func]: 48 def _builder(args: t.List) -> exp.Func: 49 value = seq_get(args, 0) 50 scale_or_fmt = seq_get(args, 1) 51 52 int_value = value is not None and is_int(value.name) 53 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 54 55 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 56 # Converts calls like `TO_TIME('01:02:03')` into casts 57 if len(args) == 1 and value.is_string and not int_value: 58 return ( 59 exp.TryCast(this=value, to=exp.DataType.build(kind), requires_string=True) 60 if safe 61 else exp.cast(value, kind) 62 ) 63 64 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 65 # cases so we can transpile them, since they're relatively common 66 if kind == exp.DataType.Type.TIMESTAMP: 67 if not safe and (int_value or int_scale_or_fmt): 68 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 69 # it's not easily transpilable 70 return exp.UnixToTime(this=value, scale=scale_or_fmt) 71 if not int_scale_or_fmt and not is_float(value.name): 72 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 73 expr.set("safe", safe) 74 return expr 75 76 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 77 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 78 formatted_exp = build_formatted_time(klass, "snowflake")(args) 79 formatted_exp.set("safe", safe) 80 return formatted_exp 81 82 return exp.Anonymous(this=name, expressions=args) 83 84 return _builder 85 86 87def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 88 expression = parser.build_var_map(args) 89 90 if isinstance(expression, exp.StarMap): 91 return expression 92 93 return exp.Struct( 94 expressions=[ 95 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 96 ] 97 ) 98 99 100def _build_datediff(args: t.List) -> exp.DateDiff: 101 return exp.DateDiff( 102 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 103 ) 104 105 106def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 107 def _builder(args: t.List) -> E: 108 return expr_type( 109 this=seq_get(args, 2), 110 expression=seq_get(args, 1), 111 unit=map_date_part(seq_get(args, 0)), 112 ) 113 114 return _builder 115 116 117def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 118 def _builder(args: t.List) -> B | exp.Anonymous: 119 if len(args) == 3: 120 return exp.Anonymous(this=name, expressions=args) 121 122 return binary_from_function(expr_type)(args) 123 124 return _builder 125 126 127# https://docs.snowflake.com/en/sql-reference/functions/div0 128def _build_if_from_div0(args: t.List) -> exp.If: 129 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 130 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 131 132 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 133 exp.Is(this=lhs, expression=exp.null()).not_() 134 ) 135 true = exp.Literal.number(0) 136 false = exp.Div(this=lhs, expression=rhs) 137 return exp.If(this=cond, true=true, false=false) 138 139 140# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 141def _build_if_from_zeroifnull(args: t.List) -> exp.If: 142 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 143 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 144 145 146# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 147def _build_if_from_nullifzero(args: t.List) -> exp.If: 148 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 149 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 150 151 152def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 153 flag = expression.text("flag") 154 155 if "i" not in flag: 156 flag += "i" 157 158 return self.func( 159 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 160 ) 161 162 163def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 164 regexp_replace = exp.RegexpReplace.from_arg_list(args) 165 166 if not regexp_replace.args.get("replacement"): 167 regexp_replace.set("replacement", exp.Literal.string("")) 168 169 return regexp_replace 170 171 172def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 173 def _parse(self: Snowflake.Parser) -> exp.Show: 174 return self._parse_show_snowflake(*args, **kwargs) 175 176 return _parse 177 178 179def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 180 trunc = date_trunc_to_time(args) 181 trunc.set("unit", map_date_part(trunc.args["unit"])) 182 return trunc 183 184 185def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 186 """ 187 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 188 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 189 190 Example: 191 >>> from sqlglot import parse_one 192 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 193 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 194 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 195 """ 196 if isinstance(expression, exp.Pivot): 197 if expression.unpivot: 198 expression = transforms.unqualify_columns(expression) 199 else: 200 for field in expression.fields: 201 field_expr = seq_get(field.expressions if field else [], 0) 202 203 if isinstance(field_expr, exp.PivotAny): 204 unqualified_field_expr = transforms.unqualify_columns(field_expr) 205 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 206 207 return expression 208 209 210def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 211 assert isinstance(expression, exp.Create) 212 213 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 214 if expression.this in exp.DataType.NESTED_TYPES: 215 expression.set("expressions", None) 216 return expression 217 218 props = expression.args.get("properties") 219 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 220 for schema_expression in expression.this.expressions: 221 if isinstance(schema_expression, exp.ColumnDef): 222 column_type = schema_expression.kind 223 if isinstance(column_type, exp.DataType): 224 column_type.transform(_flatten_structured_type, copy=False) 225 226 return expression 227 228 229def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 230 generate_date_array = unnest.expressions[0] 231 start = generate_date_array.args.get("start") 232 end = generate_date_array.args.get("end") 233 step = generate_date_array.args.get("step") 234 235 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 236 return 237 238 unit = step.args.get("unit") 239 240 unnest_alias = unnest.args.get("alias") 241 if unnest_alias: 242 unnest_alias = unnest_alias.copy() 243 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 244 else: 245 sequence_value_name = "value" 246 247 # We'll add the next sequence value to the starting date and project the result 248 date_add = _build_date_time_add(exp.DateAdd)( 249 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 250 ) 251 252 # We use DATEDIFF to compute the number of sequence values needed 253 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 254 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 255 ) 256 257 unnest.set("expressions", [number_sequence]) 258 259 unnest_parent = unnest.parent 260 if isinstance(unnest_parent, exp.Join): 261 select = unnest_parent.parent 262 if isinstance(select, exp.Select): 263 replace_column_name = ( 264 sequence_value_name 265 if isinstance(sequence_value_name, str) 266 else sequence_value_name.name 267 ) 268 269 scope = build_scope(select) 270 if scope: 271 for column in scope.columns: 272 if column.name.lower() == replace_column_name.lower(): 273 column.replace( 274 date_add.as_(replace_column_name) 275 if isinstance(column.parent, exp.Select) 276 else date_add 277 ) 278 279 lateral = exp.Lateral(this=unnest_parent.this.pop()) 280 unnest_parent.replace(exp.Join(this=lateral)) 281 else: 282 unnest.replace( 283 exp.select(date_add.as_(sequence_value_name)) 284 .from_(unnest.copy()) 285 .subquery(unnest_alias) 286 ) 287 288 289def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 290 if isinstance(expression, exp.Select): 291 for generate_date_array in expression.find_all(exp.GenerateDateArray): 292 parent = generate_date_array.parent 293 294 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 295 # query is the following (it'll be unnested properly on the next iteration due to copy): 296 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 297 if not isinstance(parent, exp.Unnest): 298 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 299 generate_date_array.replace( 300 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 301 ) 302 303 if ( 304 isinstance(parent, exp.Unnest) 305 and isinstance(parent.parent, (exp.From, exp.Join)) 306 and len(parent.expressions) == 1 307 ): 308 _unnest_generate_date_array(parent) 309 310 return expression 311 312 313def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 314 def _builder(args: t.List) -> E: 315 return expr_type( 316 this=seq_get(args, 0), 317 expression=seq_get(args, 1), 318 position=seq_get(args, 2), 319 occurrence=seq_get(args, 3), 320 parameters=seq_get(args, 4), 321 group=seq_get(args, 5) or exp.Literal.number(0), 322 ) 323 324 return _builder 325 326 327def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 328 # Other dialects don't support all of the following parameters, so we need to 329 # generate default values as necessary to ensure the transpilation is correct 330 group = expression.args.get("group") 331 332 # To avoid generating all these default values, we set group to None if 333 # it's 0 (also default value) which doesn't trigger the following chain 334 if group and group.name == "0": 335 group = None 336 337 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 338 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 339 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 340 341 return self.func( 342 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 343 expression.this, 344 expression.expression, 345 position, 346 occurrence, 347 parameters, 348 group, 349 ) 350 351 352def _json_extract_value_array_sql( 353 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 354) -> str: 355 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 356 ident = exp.to_identifier("x") 357 358 if isinstance(expression, exp.JSONValueArray): 359 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 360 else: 361 this = exp.ParseJSON(this=f"TO_JSON({ident})") 362 363 transform_lambda = exp.Lambda(expressions=[ident], this=this) 364 365 return self.func("TRANSFORM", json_extract, transform_lambda) 366 367 368def _qualify_unnested_columns(expression: exp.Expression) -> exp.Expression: 369 if isinstance(expression, exp.Select): 370 scope = build_scope(expression) 371 if not scope: 372 return expression 373 374 unnests = list(scope.find_all(exp.Unnest)) 375 376 if not unnests: 377 return expression 378 379 taken_source_names = set(scope.sources) 380 column_source: t.Dict[str, exp.Identifier] = {} 381 unnest_to_identifier: t.Dict[exp.Unnest, exp.Identifier] = {} 382 383 unnest_identifier: t.Optional[exp.Identifier] = None 384 orig_expression = expression.copy() 385 386 for unnest in unnests: 387 if not isinstance(unnest.parent, (exp.From, exp.Join)): 388 continue 389 390 # Try to infer column names produced by an unnest operator. This is only possible 391 # when we can peek into the (statically known) contents of the unnested value. 392 unnest_columns: t.Set[str] = set() 393 for unnest_expr in unnest.expressions: 394 if not isinstance(unnest_expr, exp.Array): 395 continue 396 397 for array_expr in unnest_expr.expressions: 398 if not ( 399 isinstance(array_expr, exp.Struct) 400 and array_expr.expressions 401 and all( 402 isinstance(struct_expr, exp.PropertyEQ) 403 for struct_expr in array_expr.expressions 404 ) 405 ): 406 continue 407 408 unnest_columns.update( 409 struct_expr.this.name.lower() for struct_expr in array_expr.expressions 410 ) 411 break 412 413 if unnest_columns: 414 break 415 416 unnest_alias = unnest.args.get("alias") 417 if not unnest_alias: 418 alias_name = find_new_name(taken_source_names, "value") 419 taken_source_names.add(alias_name) 420 421 # Produce a `TableAlias` AST similar to what is produced for BigQuery. This 422 # will be corrected later, when we generate SQL for the `Unnest` AST node. 423 aliased_unnest = exp.alias_(unnest, None, table=[alias_name]) 424 scope.replace(unnest, aliased_unnest) 425 426 unnest_identifier = aliased_unnest.args["alias"].columns[0] 427 else: 428 alias_columns = getattr(unnest_alias, "columns", []) 429 unnest_identifier = unnest_alias.this or seq_get(alias_columns, 0) 430 431 if not isinstance(unnest_identifier, exp.Identifier): 432 return orig_expression 433 434 unnest_to_identifier[unnest] = unnest_identifier 435 column_source.update({c.lower(): unnest_identifier for c in unnest_columns}) 436 437 for column in scope.columns: 438 if column.table: 439 continue 440 441 table = column_source.get(column.name.lower()) 442 if ( 443 unnest_identifier 444 and not table 445 and len(scope.sources) == 1 446 and column.name.lower() != unnest_identifier.name.lower() 447 ): 448 unnest_ancestor = column.find_ancestor(exp.Unnest, exp.Select) 449 ancestor_identifier = unnest_to_identifier.get(unnest_ancestor) 450 if ( 451 isinstance(unnest_ancestor, exp.Unnest) 452 and ancestor_identifier 453 and ancestor_identifier.name.lower() == unnest_identifier.name.lower() 454 ): 455 continue 456 457 table = unnest_identifier 458 459 column.set("table", table and table.copy()) 460 461 return expression 462 463 464def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 465 if isinstance(expression, exp.Select): 466 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 467 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 468 # by Snowflake's parser. 469 # 470 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 471 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 472 unnest_aliases = set() 473 for unnest in find_all_in_scope(expression, exp.Unnest): 474 unnest_alias = unnest.args.get("alias") 475 if ( 476 isinstance(unnest_alias, exp.TableAlias) 477 and not unnest_alias.this 478 and len(unnest_alias.columns) == 1 479 ): 480 unnest_aliases.add(unnest_alias.columns[0].name) 481 482 if unnest_aliases: 483 for c in find_all_in_scope(expression, exp.Column): 484 if c.table in unnest_aliases: 485 bracket_lhs = c.args["table"] 486 bracket_rhs = exp.Literal.string(c.name) 487 bracket = exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]) 488 489 if c.parent is expression: 490 # Retain column projection names by using aliases 491 c.replace(exp.alias_(bracket, c.this.copy())) 492 else: 493 c.replace(bracket) 494 495 return expression 496 497 498def _annotate_reverse(self: TypeAnnotator, expression: exp.Reverse) -> exp.Reverse: 499 expression = self._annotate_by_args(expression, "this") 500 if expression.is_type(exp.DataType.Type.NULL): 501 # Snowflake treats REVERSE(NULL) as a VARCHAR 502 self._set_type(expression, exp.DataType.Type.VARCHAR) 503 504 return expression 505 506 507class Snowflake(Dialect): 508 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 509 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 510 NULL_ORDERING = "nulls_are_large" 511 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 512 SUPPORTS_USER_DEFINED_TYPES = False 513 SUPPORTS_SEMI_ANTI_JOIN = False 514 PREFER_CTE_ALIAS_COLUMN = True 515 TABLESAMPLE_SIZE_IS_PERCENT = True 516 COPY_PARAMS_ARE_CSV = False 517 ARRAY_AGG_INCLUDES_NULLS = None 518 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 519 TRY_CAST_REQUIRES_STRING = True 520 521 TYPE_TO_EXPRESSIONS = { 522 **Dialect.TYPE_TO_EXPRESSIONS, 523 exp.DataType.Type.INT: { 524 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.INT], 525 exp.Length, 526 }, 527 exp.DataType.Type.VARCHAR: { 528 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.VARCHAR], 529 exp.MD5, 530 exp.AIAgg, 531 exp.AIClassify, 532 exp.AISummarizeAgg, 533 exp.RegexpExtract, 534 exp.RegexpReplace, 535 exp.Repeat, 536 exp.Replace, 537 exp.SHA, 538 exp.SHA2, 539 exp.Space, 540 exp.Uuid, 541 }, 542 exp.DataType.Type.BINARY: { 543 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BINARY], 544 exp.MD5Digest, 545 exp.SHA1Digest, 546 exp.SHA2Digest, 547 }, 548 exp.DataType.Type.BIGINT: { 549 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BIGINT], 550 exp.MD5NumberLower64, 551 exp.MD5NumberUpper64, 552 }, 553 exp.DataType.Type.ARRAY: { 554 exp.Split, 555 }, 556 } 557 558 ANNOTATORS = { 559 **Dialect.ANNOTATORS, 560 **{ 561 expr_type: annotate_with_type_lambda(data_type) 562 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 563 for expr_type in expressions 564 }, 565 **{ 566 expr_type: lambda self, e: self._annotate_by_args(e, "this") 567 for expr_type in ( 568 exp.Left, 569 exp.Right, 570 exp.Substring, 571 ) 572 }, 573 exp.ConcatWs: lambda self, e: self._annotate_by_args(e, "expressions"), 574 exp.Reverse: _annotate_reverse, 575 } 576 577 TIME_MAPPING = { 578 "YYYY": "%Y", 579 "yyyy": "%Y", 580 "YY": "%y", 581 "yy": "%y", 582 "MMMM": "%B", 583 "mmmm": "%B", 584 "MON": "%b", 585 "mon": "%b", 586 "MM": "%m", 587 "mm": "%m", 588 "DD": "%d", 589 "dd": "%-d", 590 "DY": "%a", 591 "dy": "%w", 592 "HH24": "%H", 593 "hh24": "%H", 594 "HH12": "%I", 595 "hh12": "%I", 596 "MI": "%M", 597 "mi": "%M", 598 "SS": "%S", 599 "ss": "%S", 600 "FF6": "%f", 601 "ff6": "%f", 602 } 603 604 DATE_PART_MAPPING = { 605 **Dialect.DATE_PART_MAPPING, 606 "ISOWEEK": "WEEKISO", 607 } 608 609 def quote_identifier(self, expression: E, identify: bool = True) -> E: 610 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 611 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 612 if ( 613 isinstance(expression, exp.Identifier) 614 and isinstance(expression.parent, exp.Table) 615 and expression.name.lower() == "dual" 616 ): 617 return expression # type: ignore 618 619 return super().quote_identifier(expression, identify=identify) 620 621 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 622 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 623 SINGLE_TOKENS.pop("$") 624 625 class Parser(parser.Parser): 626 IDENTIFY_PIVOT_STRINGS = True 627 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 628 COLON_IS_VARIANT_EXTRACT = True 629 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 630 631 ID_VAR_TOKENS = { 632 *parser.Parser.ID_VAR_TOKENS, 633 TokenType.EXCEPT, 634 TokenType.MATCH_CONDITION, 635 } 636 637 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 638 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 639 640 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 641 642 FUNCTIONS = { 643 **parser.Parser.FUNCTIONS, 644 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 645 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 646 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 647 this=seq_get(args, 1), expression=seq_get(args, 0) 648 ), 649 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 650 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 651 start=seq_get(args, 0), 652 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 653 step=seq_get(args, 2), 654 ), 655 "ARRAY_SORT": exp.SortArray.from_arg_list, 656 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 657 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 658 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 659 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 660 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 661 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 662 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 663 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 664 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 665 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 666 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 667 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 668 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 669 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 670 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 671 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 672 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 673 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 674 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 675 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 676 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 677 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 678 "DATE_TRUNC": _date_trunc_to_time, 679 "DATEADD": _build_date_time_add(exp.DateAdd), 680 "DATEDIFF": _build_datediff, 681 "DIV0": _build_if_from_div0, 682 "EDITDISTANCE": lambda args: exp.Levenshtein( 683 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 684 ), 685 "FLATTEN": exp.Explode.from_arg_list, 686 "GET": exp.GetExtract.from_arg_list, 687 "GET_PATH": lambda args, dialect: exp.JSONExtract( 688 this=seq_get(args, 0), 689 expression=dialect.to_json_path(seq_get(args, 1)), 690 requires_json=True, 691 ), 692 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 693 "IFF": exp.If.from_arg_list, 694 "MD5_HEX": exp.MD5.from_arg_list, 695 "MD5_BINARY": exp.MD5Digest.from_arg_list, 696 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 697 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 698 "LAST_DAY": lambda args: exp.LastDay( 699 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 700 ), 701 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 702 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 703 "NULLIFZERO": _build_if_from_nullifzero, 704 "OBJECT_CONSTRUCT": _build_object_construct, 705 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 706 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 707 "REGEXP_REPLACE": _build_regexp_replace, 708 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 709 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 710 "REPLACE": build_replace_with_optional_replacement, 711 "RLIKE": exp.RegexpLike.from_arg_list, 712 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 713 "SHA1_HEX": exp.SHA.from_arg_list, 714 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 715 "SHA2_HEX": exp.SHA2.from_arg_list, 716 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 717 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 718 "TIMEADD": _build_date_time_add(exp.TimeAdd), 719 "TIMEDIFF": _build_datediff, 720 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 721 "TIMESTAMPDIFF": _build_datediff, 722 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 723 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 724 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 725 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 726 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 727 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 728 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 729 "TRY_TO_TIMESTAMP": _build_datetime( 730 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 731 ), 732 "TO_CHAR": build_timetostr_or_tochar, 733 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 734 "TO_NUMBER": lambda args: exp.ToNumber( 735 this=seq_get(args, 0), 736 format=seq_get(args, 1), 737 precision=seq_get(args, 2), 738 scale=seq_get(args, 3), 739 ), 740 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 741 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 742 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 743 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 744 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 745 "TO_VARCHAR": build_timetostr_or_tochar, 746 "TO_JSON": exp.JSONFormat.from_arg_list, 747 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 748 "ZEROIFNULL": _build_if_from_zeroifnull, 749 } 750 FUNCTIONS.pop("PREDICT") 751 752 FUNCTION_PARSERS = { 753 **parser.Parser.FUNCTION_PARSERS, 754 "DATE_PART": lambda self: self._parse_date_part(), 755 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 756 "LISTAGG": lambda self: self._parse_string_agg(), 757 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 758 } 759 FUNCTION_PARSERS.pop("TRIM") 760 761 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 762 763 ALTER_PARSERS = { 764 **parser.Parser.ALTER_PARSERS, 765 "SESSION": lambda self: self._parse_alter_session(), 766 "UNSET": lambda self: self.expression( 767 exp.Set, 768 tag=self._match_text_seq("TAG"), 769 expressions=self._parse_csv(self._parse_id_var), 770 unset=True, 771 ), 772 } 773 774 STATEMENT_PARSERS = { 775 **parser.Parser.STATEMENT_PARSERS, 776 TokenType.GET: lambda self: self._parse_get(), 777 TokenType.PUT: lambda self: self._parse_put(), 778 TokenType.SHOW: lambda self: self._parse_show(), 779 } 780 781 PROPERTY_PARSERS = { 782 **parser.Parser.PROPERTY_PARSERS, 783 "CREDENTIALS": lambda self: self._parse_credentials_property(), 784 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 785 "LOCATION": lambda self: self._parse_location_property(), 786 "TAG": lambda self: self._parse_tag(), 787 "USING": lambda self: self._match_text_seq("TEMPLATE") 788 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 789 } 790 791 TYPE_CONVERTERS = { 792 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 793 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 794 } 795 796 SHOW_PARSERS = { 797 "DATABASES": _show_parser("DATABASES"), 798 "TERSE DATABASES": _show_parser("DATABASES"), 799 "SCHEMAS": _show_parser("SCHEMAS"), 800 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 801 "OBJECTS": _show_parser("OBJECTS"), 802 "TERSE OBJECTS": _show_parser("OBJECTS"), 803 "TABLES": _show_parser("TABLES"), 804 "TERSE TABLES": _show_parser("TABLES"), 805 "VIEWS": _show_parser("VIEWS"), 806 "TERSE VIEWS": _show_parser("VIEWS"), 807 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 808 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 809 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 810 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 811 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 812 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 813 "SEQUENCES": _show_parser("SEQUENCES"), 814 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 815 "STAGES": _show_parser("STAGES"), 816 "COLUMNS": _show_parser("COLUMNS"), 817 "USERS": _show_parser("USERS"), 818 "TERSE USERS": _show_parser("USERS"), 819 "FILE FORMATS": _show_parser("FILE FORMATS"), 820 "FUNCTIONS": _show_parser("FUNCTIONS"), 821 "PROCEDURES": _show_parser("PROCEDURES"), 822 "WAREHOUSES": _show_parser("WAREHOUSES"), 823 } 824 825 CONSTRAINT_PARSERS = { 826 **parser.Parser.CONSTRAINT_PARSERS, 827 "WITH": lambda self: self._parse_with_constraint(), 828 "MASKING": lambda self: self._parse_with_constraint(), 829 "PROJECTION": lambda self: self._parse_with_constraint(), 830 "TAG": lambda self: self._parse_with_constraint(), 831 } 832 833 STAGED_FILE_SINGLE_TOKENS = { 834 TokenType.DOT, 835 TokenType.MOD, 836 TokenType.SLASH, 837 } 838 839 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 840 841 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 842 843 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 844 845 LAMBDAS = { 846 **parser.Parser.LAMBDAS, 847 TokenType.ARROW: lambda self, expressions: self.expression( 848 exp.Lambda, 849 this=self._replace_lambda( 850 self._parse_assignment(), 851 expressions, 852 ), 853 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 854 ), 855 } 856 857 COLUMN_OPERATORS = { 858 **parser.Parser.COLUMN_OPERATORS, 859 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 860 exp.ModelAttribute, this=this, expression=attr 861 ), 862 } 863 864 def _parse_use(self) -> exp.Use: 865 if self._match_text_seq("SECONDARY", "ROLES"): 866 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 867 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 868 return self.expression( 869 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 870 ) 871 872 return super()._parse_use() 873 874 def _negate_range( 875 self, this: t.Optional[exp.Expression] = None 876 ) -> t.Optional[exp.Expression]: 877 if not this: 878 return this 879 880 query = this.args.get("query") 881 if isinstance(this, exp.In) and isinstance(query, exp.Query): 882 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 883 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 884 # which can produce different results (most likely a SnowFlake bug). 885 # 886 # https://docs.snowflake.com/en/sql-reference/functions/in 887 # Context: https://github.com/tobymao/sqlglot/issues/3890 888 return self.expression( 889 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 890 ) 891 892 return self.expression(exp.Not, this=this) 893 894 def _parse_tag(self) -> exp.Tags: 895 return self.expression( 896 exp.Tags, 897 expressions=self._parse_wrapped_csv(self._parse_property), 898 ) 899 900 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 901 if self._prev.token_type != TokenType.WITH: 902 self._retreat(self._index - 1) 903 904 if self._match_text_seq("MASKING", "POLICY"): 905 policy = self._parse_column() 906 return self.expression( 907 exp.MaskingPolicyColumnConstraint, 908 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 909 expressions=self._match(TokenType.USING) 910 and self._parse_wrapped_csv(self._parse_id_var), 911 ) 912 if self._match_text_seq("PROJECTION", "POLICY"): 913 policy = self._parse_column() 914 return self.expression( 915 exp.ProjectionPolicyColumnConstraint, 916 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 917 ) 918 if self._match(TokenType.TAG): 919 return self._parse_tag() 920 921 return None 922 923 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 924 if self._match(TokenType.TAG): 925 return self._parse_tag() 926 927 return super()._parse_with_property() 928 929 def _parse_create(self) -> exp.Create | exp.Command: 930 expression = super()._parse_create() 931 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 932 # Replace the Table node with the enclosed Identifier 933 expression.this.replace(expression.this.this) 934 935 return expression 936 937 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 938 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 939 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 940 this = self._parse_var() or self._parse_type() 941 942 if not this: 943 return None 944 945 self._match(TokenType.COMMA) 946 expression = self._parse_bitwise() 947 this = map_date_part(this) 948 name = this.name.upper() 949 950 if name.startswith("EPOCH"): 951 if name == "EPOCH_MILLISECOND": 952 scale = 10**3 953 elif name == "EPOCH_MICROSECOND": 954 scale = 10**6 955 elif name == "EPOCH_NANOSECOND": 956 scale = 10**9 957 else: 958 scale = None 959 960 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 961 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 962 963 if scale: 964 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 965 966 return to_unix 967 968 return self.expression(exp.Extract, this=this, expression=expression) 969 970 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 971 if is_map: 972 # Keys are strings in Snowflake's objects, see also: 973 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 974 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 975 return self._parse_slice(self._parse_string()) or self._parse_assignment() 976 977 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 978 979 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 980 lateral = super()._parse_lateral() 981 if not lateral: 982 return lateral 983 984 if isinstance(lateral.this, exp.Explode): 985 table_alias = lateral.args.get("alias") 986 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 987 if table_alias and not table_alias.args.get("columns"): 988 table_alias.set("columns", columns) 989 elif not table_alias: 990 exp.alias_(lateral, "_flattened", table=columns, copy=False) 991 992 return lateral 993 994 def _parse_table_parts( 995 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 996 ) -> exp.Table: 997 # https://docs.snowflake.com/en/user-guide/querying-stage 998 if self._match(TokenType.STRING, advance=False): 999 table = self._parse_string() 1000 elif self._match_text_seq("@", advance=False): 1001 table = self._parse_location_path() 1002 else: 1003 table = None 1004 1005 if table: 1006 file_format = None 1007 pattern = None 1008 1009 wrapped = self._match(TokenType.L_PAREN) 1010 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1011 if self._match_text_seq("FILE_FORMAT", "=>"): 1012 file_format = self._parse_string() or super()._parse_table_parts( 1013 is_db_reference=is_db_reference 1014 ) 1015 elif self._match_text_seq("PATTERN", "=>"): 1016 pattern = self._parse_string() 1017 else: 1018 break 1019 1020 self._match(TokenType.COMMA) 1021 1022 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1023 else: 1024 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1025 1026 return table 1027 1028 def _parse_table( 1029 self, 1030 schema: bool = False, 1031 joins: bool = False, 1032 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1033 parse_bracket: bool = False, 1034 is_db_reference: bool = False, 1035 parse_partition: bool = False, 1036 consume_pipe: bool = False, 1037 ) -> t.Optional[exp.Expression]: 1038 table = super()._parse_table( 1039 schema=schema, 1040 joins=joins, 1041 alias_tokens=alias_tokens, 1042 parse_bracket=parse_bracket, 1043 is_db_reference=is_db_reference, 1044 parse_partition=parse_partition, 1045 ) 1046 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1047 table_from_rows = table.this 1048 for arg in exp.TableFromRows.arg_types: 1049 if arg != "this": 1050 table_from_rows.set(arg, table.args.get(arg)) 1051 1052 table = table_from_rows 1053 1054 return table 1055 1056 def _parse_id_var( 1057 self, 1058 any_token: bool = True, 1059 tokens: t.Optional[t.Collection[TokenType]] = None, 1060 ) -> t.Optional[exp.Expression]: 1061 if self._match_text_seq("IDENTIFIER", "("): 1062 identifier = ( 1063 super()._parse_id_var(any_token=any_token, tokens=tokens) 1064 or self._parse_string() 1065 ) 1066 self._match_r_paren() 1067 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1068 1069 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1070 1071 def _parse_show_snowflake(self, this: str) -> exp.Show: 1072 scope = None 1073 scope_kind = None 1074 1075 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1076 # which is syntactically valid but has no effect on the output 1077 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1078 1079 history = self._match_text_seq("HISTORY") 1080 1081 like = self._parse_string() if self._match(TokenType.LIKE) else None 1082 1083 if self._match(TokenType.IN): 1084 if self._match_text_seq("ACCOUNT"): 1085 scope_kind = "ACCOUNT" 1086 elif self._match_text_seq("CLASS"): 1087 scope_kind = "CLASS" 1088 scope = self._parse_table_parts() 1089 elif self._match_text_seq("APPLICATION"): 1090 scope_kind = "APPLICATION" 1091 if self._match_text_seq("PACKAGE"): 1092 scope_kind += " PACKAGE" 1093 scope = self._parse_table_parts() 1094 elif self._match_set(self.DB_CREATABLES): 1095 scope_kind = self._prev.text.upper() 1096 if self._curr: 1097 scope = self._parse_table_parts() 1098 elif self._curr: 1099 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1100 scope = self._parse_table_parts() 1101 1102 return self.expression( 1103 exp.Show, 1104 **{ 1105 "terse": terse, 1106 "this": this, 1107 "history": history, 1108 "like": like, 1109 "scope": scope, 1110 "scope_kind": scope_kind, 1111 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1112 "limit": self._parse_limit(), 1113 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1114 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1115 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1116 }, 1117 ) 1118 1119 def _parse_put(self) -> exp.Put | exp.Command: 1120 if self._curr.token_type != TokenType.STRING: 1121 return self._parse_as_command(self._prev) 1122 1123 return self.expression( 1124 exp.Put, 1125 this=self._parse_string(), 1126 target=self._parse_location_path(), 1127 properties=self._parse_properties(), 1128 ) 1129 1130 def _parse_get(self) -> t.Optional[exp.Expression]: 1131 start = self._prev 1132 1133 # If we detect GET( then we need to parse a function, not a statement 1134 if self._match(TokenType.L_PAREN): 1135 self._retreat(self._index - 2) 1136 return self._parse_expression() 1137 1138 target = self._parse_location_path() 1139 1140 # Parse as command if unquoted file path 1141 if self._curr.token_type == TokenType.URI_START: 1142 return self._parse_as_command(start) 1143 1144 return self.expression( 1145 exp.Get, 1146 this=self._parse_string(), 1147 target=target, 1148 properties=self._parse_properties(), 1149 ) 1150 1151 def _parse_location_property(self) -> exp.LocationProperty: 1152 self._match(TokenType.EQ) 1153 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1154 1155 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1156 # Parse either a subquery or a staged file 1157 return ( 1158 self._parse_select(table=True, parse_subquery_alias=False) 1159 if self._match(TokenType.L_PAREN, advance=False) 1160 else self._parse_table_parts() 1161 ) 1162 1163 def _parse_location_path(self) -> exp.Var: 1164 start = self._curr 1165 self._advance_any(ignore_reserved=True) 1166 1167 # We avoid consuming a comma token because external tables like @foo and @bar 1168 # can be joined in a query with a comma separator, as well as closing paren 1169 # in case of subqueries 1170 while self._is_connected() and not self._match_set( 1171 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1172 ): 1173 self._advance_any(ignore_reserved=True) 1174 1175 return exp.var(self._find_sql(start, self._prev)) 1176 1177 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1178 this = super()._parse_lambda_arg() 1179 1180 if not this: 1181 return this 1182 1183 typ = self._parse_types() 1184 1185 if typ: 1186 return self.expression(exp.Cast, this=this, to=typ) 1187 1188 return this 1189 1190 def _parse_foreign_key(self) -> exp.ForeignKey: 1191 # inlineFK, the REFERENCES columns are implied 1192 if self._match(TokenType.REFERENCES, advance=False): 1193 return self.expression(exp.ForeignKey) 1194 1195 # outoflineFK, explicitly names the columns 1196 return super()._parse_foreign_key() 1197 1198 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1199 self._match(TokenType.EQ) 1200 if self._match(TokenType.L_PAREN, advance=False): 1201 expressions = self._parse_wrapped_options() 1202 else: 1203 expressions = [self._parse_format_name()] 1204 1205 return self.expression( 1206 exp.FileFormatProperty, 1207 expressions=expressions, 1208 ) 1209 1210 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1211 return self.expression( 1212 exp.CredentialsProperty, 1213 expressions=self._parse_wrapped_options(), 1214 ) 1215 1216 def _parse_semantic_view(self) -> exp.SemanticView: 1217 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1218 1219 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1220 if self._match_text_seq("DIMENSIONS"): 1221 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1222 if self._match_text_seq("METRICS"): 1223 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1224 if self._match_text_seq("WHERE"): 1225 kwargs["where"] = self._parse_expression() 1226 1227 return self.expression(exp.SemanticView, **kwargs) 1228 1229 class Tokenizer(tokens.Tokenizer): 1230 STRING_ESCAPES = ["\\", "'"] 1231 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1232 RAW_STRINGS = ["$$"] 1233 COMMENTS = ["--", "//", ("/*", "*/")] 1234 NESTED_COMMENTS = False 1235 1236 KEYWORDS = { 1237 **tokens.Tokenizer.KEYWORDS, 1238 "BYTEINT": TokenType.INT, 1239 "FILE://": TokenType.URI_START, 1240 "FILE FORMAT": TokenType.FILE_FORMAT, 1241 "GET": TokenType.GET, 1242 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1243 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1244 "MINUS": TokenType.EXCEPT, 1245 "NCHAR VARYING": TokenType.VARCHAR, 1246 "PUT": TokenType.PUT, 1247 "REMOVE": TokenType.COMMAND, 1248 "RM": TokenType.COMMAND, 1249 "SAMPLE": TokenType.TABLE_SAMPLE, 1250 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1251 "SQL_DOUBLE": TokenType.DOUBLE, 1252 "SQL_VARCHAR": TokenType.VARCHAR, 1253 "STAGE": TokenType.STAGE, 1254 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1255 "STREAMLIT": TokenType.STREAMLIT, 1256 "TAG": TokenType.TAG, 1257 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1258 "TOP": TokenType.TOP, 1259 "WAREHOUSE": TokenType.WAREHOUSE, 1260 } 1261 KEYWORDS.pop("/*+") 1262 1263 SINGLE_TOKENS = { 1264 **tokens.Tokenizer.SINGLE_TOKENS, 1265 "$": TokenType.PARAMETER, 1266 "!": TokenType.EXCLAMATION, 1267 } 1268 1269 VAR_SINGLE_TOKENS = {"$"} 1270 1271 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1272 1273 class Generator(generator.Generator): 1274 PARAMETER_TOKEN = "$" 1275 MATCHED_BY_SOURCE = False 1276 SINGLE_STRING_INTERVAL = True 1277 JOIN_HINTS = False 1278 TABLE_HINTS = False 1279 QUERY_HINTS = False 1280 AGGREGATE_FILTER_SUPPORTED = False 1281 SUPPORTS_TABLE_COPY = False 1282 COLLATE_IS_FUNC = True 1283 LIMIT_ONLY_LITERALS = True 1284 JSON_KEY_VALUE_PAIR_SEP = "," 1285 INSERT_OVERWRITE = " OVERWRITE INTO" 1286 STRUCT_DELIMITER = ("(", ")") 1287 COPY_PARAMS_ARE_WRAPPED = False 1288 COPY_PARAMS_EQ_REQUIRED = True 1289 STAR_EXCEPT = "EXCLUDE" 1290 SUPPORTS_EXPLODING_PROJECTIONS = False 1291 ARRAY_CONCAT_IS_VAR_LEN = False 1292 SUPPORTS_CONVERT_TIMEZONE = True 1293 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1294 SUPPORTS_MEDIAN = True 1295 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1296 SUPPORTS_DECODE_CASE = True 1297 IS_BOOL_ALLOWED = False 1298 1299 TRANSFORMS = { 1300 **generator.Generator.TRANSFORMS, 1301 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1302 exp.ArgMax: rename_func("MAX_BY"), 1303 exp.ArgMin: rename_func("MIN_BY"), 1304 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1305 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1306 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1307 exp.AtTimeZone: lambda self, e: self.func( 1308 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1309 ), 1310 exp.BitwiseOr: rename_func("BITOR"), 1311 exp.BitwiseXor: rename_func("BITXOR"), 1312 exp.BitwiseAnd: rename_func("BITAND"), 1313 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1314 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1315 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1316 exp.BitwiseNot: rename_func("BITNOT"), 1317 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1318 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1319 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1320 exp.DateAdd: date_delta_sql("DATEADD"), 1321 exp.DateDiff: date_delta_sql("DATEDIFF"), 1322 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1323 exp.DatetimeDiff: timestampdiff_sql, 1324 exp.DateStrToDate: datestrtodate_sql, 1325 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1326 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1327 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1328 exp.DayOfYear: rename_func("DAYOFYEAR"), 1329 exp.Explode: rename_func("FLATTEN"), 1330 exp.Extract: lambda self, e: self.func( 1331 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1332 ), 1333 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1334 exp.FileFormatProperty: lambda self, 1335 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1336 exp.FromTimeZone: lambda self, e: self.func( 1337 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1338 ), 1339 exp.GenerateSeries: lambda self, e: self.func( 1340 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1341 ), 1342 exp.GetExtract: rename_func("GET"), 1343 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1344 exp.If: if_sql(name="IFF", false_value="NULL"), 1345 exp.JSONExtractArray: _json_extract_value_array_sql, 1346 exp.JSONExtractScalar: lambda self, e: self.func( 1347 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1348 ), 1349 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1350 exp.JSONPathRoot: lambda *_: "", 1351 exp.JSONValueArray: _json_extract_value_array_sql, 1352 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1353 rename_func("EDITDISTANCE") 1354 ), 1355 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1356 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1357 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1358 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1359 exp.MakeInterval: no_make_interval_sql, 1360 exp.Max: max_or_greatest, 1361 exp.Min: min_or_least, 1362 exp.ParseJSON: lambda self, e: self.func( 1363 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1364 ), 1365 exp.JSONFormat: rename_func("TO_JSON"), 1366 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1367 exp.PercentileCont: transforms.preprocess( 1368 [transforms.add_within_group_for_percentiles] 1369 ), 1370 exp.PercentileDisc: transforms.preprocess( 1371 [transforms.add_within_group_for_percentiles] 1372 ), 1373 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1374 exp.RegexpExtract: _regexpextract_sql, 1375 exp.RegexpExtractAll: _regexpextract_sql, 1376 exp.RegexpILike: _regexpilike_sql, 1377 exp.Rand: rename_func("RANDOM"), 1378 exp.Select: transforms.preprocess( 1379 [ 1380 transforms.eliminate_window_clause, 1381 transforms.eliminate_distinct_on, 1382 transforms.explode_projection_to_unnest(), 1383 transforms.eliminate_semi_and_anti_joins, 1384 _transform_generate_date_array, 1385 _qualify_unnested_columns, 1386 _eliminate_dot_variant_lookup, 1387 ] 1388 ), 1389 exp.SHA: rename_func("SHA1"), 1390 exp.MD5Digest: rename_func("MD5_BINARY"), 1391 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1392 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1393 exp.LowerHex: rename_func("TO_CHAR"), 1394 exp.SortArray: rename_func("ARRAY_SORT"), 1395 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1396 exp.StartsWith: rename_func("STARTSWITH"), 1397 exp.EndsWith: rename_func("ENDSWITH"), 1398 exp.StrPosition: lambda self, e: strposition_sql( 1399 self, e, func_name="CHARINDEX", supports_position=True 1400 ), 1401 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1402 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1403 exp.Stuff: rename_func("INSERT"), 1404 exp.StPoint: rename_func("ST_MAKEPOINT"), 1405 exp.TimeAdd: date_delta_sql("TIMEADD"), 1406 exp.Timestamp: no_timestamp_sql, 1407 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1408 exp.TimestampDiff: lambda self, e: self.func( 1409 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1410 ), 1411 exp.TimestampTrunc: timestamptrunc_sql(), 1412 exp.TimeStrToTime: timestrtotime_sql, 1413 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1414 exp.ToArray: rename_func("TO_ARRAY"), 1415 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1416 exp.ToDouble: rename_func("TO_DOUBLE"), 1417 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1418 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1419 exp.TsOrDsToDate: lambda self, e: self.func( 1420 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1421 ), 1422 exp.TsOrDsToTime: lambda self, e: self.func( 1423 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1424 ), 1425 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1426 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1427 exp.Uuid: rename_func("UUID_STRING"), 1428 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1429 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1430 exp.Xor: rename_func("BOOLXOR"), 1431 exp.ByteLength: rename_func("OCTET_LENGTH"), 1432 } 1433 1434 SUPPORTED_JSON_PATH_PARTS = { 1435 exp.JSONPathKey, 1436 exp.JSONPathRoot, 1437 exp.JSONPathSubscript, 1438 } 1439 1440 TYPE_MAPPING = { 1441 **generator.Generator.TYPE_MAPPING, 1442 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1443 exp.DataType.Type.NESTED: "OBJECT", 1444 exp.DataType.Type.STRUCT: "OBJECT", 1445 exp.DataType.Type.TEXT: "VARCHAR", 1446 } 1447 1448 TOKEN_MAPPING = { 1449 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1450 } 1451 1452 PROPERTIES_LOCATION = { 1453 **generator.Generator.PROPERTIES_LOCATION, 1454 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1455 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1456 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1457 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1458 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1459 } 1460 1461 UNSUPPORTED_VALUES_EXPRESSIONS = { 1462 exp.Map, 1463 exp.StarMap, 1464 exp.Struct, 1465 exp.VarMap, 1466 } 1467 1468 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1469 1470 def with_properties(self, properties: exp.Properties) -> str: 1471 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1472 1473 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1474 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1475 values_as_table = False 1476 1477 return super().values_sql(expression, values_as_table=values_as_table) 1478 1479 def datatype_sql(self, expression: exp.DataType) -> str: 1480 expressions = expression.expressions 1481 if ( 1482 expressions 1483 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1484 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1485 ): 1486 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1487 return "OBJECT" 1488 1489 return super().datatype_sql(expression) 1490 1491 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1492 return self.func( 1493 "TO_NUMBER", 1494 expression.this, 1495 expression.args.get("format"), 1496 expression.args.get("precision"), 1497 expression.args.get("scale"), 1498 ) 1499 1500 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1501 milli = expression.args.get("milli") 1502 if milli is not None: 1503 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1504 expression.set("nano", milli_to_nano) 1505 1506 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1507 1508 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1509 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1510 return self.func("TO_GEOGRAPHY", expression.this) 1511 if expression.is_type(exp.DataType.Type.GEOMETRY): 1512 return self.func("TO_GEOMETRY", expression.this) 1513 1514 return super().cast_sql(expression, safe_prefix=safe_prefix) 1515 1516 def trycast_sql(self, expression: exp.TryCast) -> str: 1517 value = expression.this 1518 1519 if value.type is None: 1520 from sqlglot.optimizer.annotate_types import annotate_types 1521 1522 value = annotate_types(value, dialect=self.dialect) 1523 1524 # Snowflake requires that TRY_CAST's value be a string 1525 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1526 # if we can deduce that the value is a string, then we can generate TRY_CAST 1527 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1528 return super().trycast_sql(expression) 1529 1530 return self.cast_sql(expression) 1531 1532 def log_sql(self, expression: exp.Log) -> str: 1533 if not expression.expression: 1534 return self.func("LN", expression.this) 1535 1536 return super().log_sql(expression) 1537 1538 def unnest_sql(self, expression: exp.Unnest) -> str: 1539 unnest_alias = expression.args.get("alias") 1540 offset = expression.args.get("offset") 1541 1542 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1543 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1544 1545 columns = [ 1546 exp.to_identifier("seq"), 1547 exp.to_identifier("key"), 1548 exp.to_identifier("path"), 1549 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1550 value, 1551 exp.to_identifier("this"), 1552 ] 1553 1554 if unnest_alias: 1555 unnest_alias.set("columns", columns) 1556 else: 1557 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1558 1559 table_input = self.sql(expression.expressions[0]) 1560 if not table_input.startswith("INPUT =>"): 1561 table_input = f"INPUT => {table_input}" 1562 1563 expression_parent = expression.parent 1564 1565 explode = ( 1566 f"FLATTEN({table_input})" 1567 if isinstance(expression_parent, exp.Lateral) 1568 else f"TABLE(FLATTEN({table_input}))" 1569 ) 1570 alias = self.sql(unnest_alias) 1571 alias = f" AS {alias}" if alias else "" 1572 value = ( 1573 "" 1574 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1575 else f"{value} FROM " 1576 ) 1577 1578 return f"{value}{explode}{alias}" 1579 1580 def show_sql(self, expression: exp.Show) -> str: 1581 terse = "TERSE " if expression.args.get("terse") else "" 1582 history = " HISTORY" if expression.args.get("history") else "" 1583 like = self.sql(expression, "like") 1584 like = f" LIKE {like}" if like else "" 1585 1586 scope = self.sql(expression, "scope") 1587 scope = f" {scope}" if scope else "" 1588 1589 scope_kind = self.sql(expression, "scope_kind") 1590 if scope_kind: 1591 scope_kind = f" IN {scope_kind}" 1592 1593 starts_with = self.sql(expression, "starts_with") 1594 if starts_with: 1595 starts_with = f" STARTS WITH {starts_with}" 1596 1597 limit = self.sql(expression, "limit") 1598 1599 from_ = self.sql(expression, "from") 1600 if from_: 1601 from_ = f" FROM {from_}" 1602 1603 privileges = self.expressions(expression, key="privileges", flat=True) 1604 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1605 1606 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1607 1608 def describe_sql(self, expression: exp.Describe) -> str: 1609 # Default to table if kind is unknown 1610 kind_value = expression.args.get("kind") or "TABLE" 1611 kind = f" {kind_value}" if kind_value else "" 1612 this = f" {self.sql(expression, 'this')}" 1613 expressions = self.expressions(expression, flat=True) 1614 expressions = f" {expressions}" if expressions else "" 1615 return f"DESCRIBE{kind}{this}{expressions}" 1616 1617 def generatedasidentitycolumnconstraint_sql( 1618 self, expression: exp.GeneratedAsIdentityColumnConstraint 1619 ) -> str: 1620 start = expression.args.get("start") 1621 start = f" START {start}" if start else "" 1622 increment = expression.args.get("increment") 1623 increment = f" INCREMENT {increment}" if increment else "" 1624 1625 order = expression.args.get("order") 1626 if order is not None: 1627 order_clause = " ORDER" if order else " NOORDER" 1628 else: 1629 order_clause = "" 1630 1631 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1632 1633 def cluster_sql(self, expression: exp.Cluster) -> str: 1634 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1635 1636 def struct_sql(self, expression: exp.Struct) -> str: 1637 if len(expression.expressions) == 1: 1638 arg = expression.expressions[0] 1639 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1640 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1641 return f"{{{self.sql(expression.expressions[0])}}}" 1642 1643 keys = [] 1644 values = [] 1645 1646 for i, e in enumerate(expression.expressions): 1647 if isinstance(e, exp.PropertyEQ): 1648 keys.append( 1649 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1650 ) 1651 values.append(e.expression) 1652 else: 1653 keys.append(exp.Literal.string(f"_{i}")) 1654 values.append(e) 1655 1656 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1657 1658 @unsupported_args("weight", "accuracy") 1659 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1660 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1661 1662 def alterset_sql(self, expression: exp.AlterSet) -> str: 1663 exprs = self.expressions(expression, flat=True) 1664 exprs = f" {exprs}" if exprs else "" 1665 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1666 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1667 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1668 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1669 tag = self.expressions(expression, key="tag", flat=True) 1670 tag = f" TAG {tag}" if tag else "" 1671 1672 return f"SET{exprs}{file_format}{copy_options}{tag}" 1673 1674 def strtotime_sql(self, expression: exp.StrToTime): 1675 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1676 return self.func( 1677 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1678 ) 1679 1680 def timestampsub_sql(self, expression: exp.TimestampSub): 1681 return self.sql( 1682 exp.TimestampAdd( 1683 this=expression.this, 1684 expression=expression.expression * -1, 1685 unit=expression.unit, 1686 ) 1687 ) 1688 1689 def jsonextract_sql(self, expression: exp.JSONExtract): 1690 this = expression.this 1691 1692 # JSON strings are valid coming from other dialects such as BQ so 1693 # for these cases we PARSE_JSON preemptively 1694 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1695 "requires_json" 1696 ): 1697 this = exp.ParseJSON(this=this) 1698 1699 return self.func( 1700 "GET_PATH", 1701 this, 1702 expression.expression, 1703 ) 1704 1705 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1706 this = expression.this 1707 if this.is_string: 1708 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1709 1710 return self.func("TO_CHAR", this, self.format_time(expression)) 1711 1712 def datesub_sql(self, expression: exp.DateSub) -> str: 1713 value = expression.expression 1714 if value: 1715 value.replace(value * (-1)) 1716 else: 1717 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1718 1719 return date_delta_sql("DATEADD")(self, expression) 1720 1721 def select_sql(self, expression: exp.Select) -> str: 1722 limit = expression.args.get("limit") 1723 offset = expression.args.get("offset") 1724 if offset and not limit: 1725 expression.limit(exp.Null(), copy=False) 1726 return super().select_sql(expression) 1727 1728 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1729 is_materialized = expression.find(exp.MaterializedProperty) 1730 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1731 1732 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1733 # For materialized views, COPY GRANTS is located *before* the columns list 1734 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1735 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1736 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1737 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1738 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1739 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1740 1741 this_name = self.sql(expression.this, "this") 1742 copy_grants = self.sql(copy_grants_property) 1743 this_schema = self.schema_columns_sql(expression.this) 1744 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1745 1746 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1747 1748 return super().createable_sql(expression, locations) 1749 1750 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1751 this = expression.this 1752 1753 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1754 # and add it later as part of the WITHIN GROUP clause 1755 order = this if isinstance(this, exp.Order) else None 1756 if order: 1757 expression.set("this", order.this.pop()) 1758 1759 expr_sql = super().arrayagg_sql(expression) 1760 1761 if order: 1762 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1763 1764 return expr_sql 1765 1766 def array_sql(self, expression: exp.Array) -> str: 1767 expressions = expression.expressions 1768 1769 first_expr = seq_get(expressions, 0) 1770 if isinstance(first_expr, exp.Select): 1771 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1772 if first_expr.text("kind").upper() == "STRUCT": 1773 object_construct_args = [] 1774 for expr in first_expr.expressions: 1775 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1776 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1777 name = expr.this if isinstance(expr, exp.Alias) else expr 1778 1779 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1780 1781 array_agg = exp.ArrayAgg( 1782 this=_build_object_construct(args=object_construct_args) 1783 ) 1784 1785 first_expr.set("kind", None) 1786 first_expr.set("expressions", [array_agg]) 1787 1788 return self.sql(first_expr.subquery()) 1789 1790 return inline_array_sql(self, expression) 1791 1792 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1793 zone = self.sql(expression, "this") 1794 if not zone: 1795 return super().currentdate_sql(expression) 1796 1797 expr = exp.Cast( 1798 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1799 to=exp.DataType(this=exp.DataType.Type.DATE), 1800 ) 1801 return self.sql(expr) 1802 1803 def dot_sql(self, expression: exp.Dot) -> str: 1804 this = expression.this 1805 1806 if not this.type: 1807 from sqlglot.optimizer.annotate_types import annotate_types 1808 1809 this = annotate_types(this, dialect=self.dialect) 1810 1811 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1812 # Generate colon notation for the top level STRUCT 1813 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1814 1815 return super().dot_sql(expression) 1816 1817 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 1818 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}"
508class Snowflake(Dialect): 509 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 510 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 511 NULL_ORDERING = "nulls_are_large" 512 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 513 SUPPORTS_USER_DEFINED_TYPES = False 514 SUPPORTS_SEMI_ANTI_JOIN = False 515 PREFER_CTE_ALIAS_COLUMN = True 516 TABLESAMPLE_SIZE_IS_PERCENT = True 517 COPY_PARAMS_ARE_CSV = False 518 ARRAY_AGG_INCLUDES_NULLS = None 519 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 520 TRY_CAST_REQUIRES_STRING = True 521 522 TYPE_TO_EXPRESSIONS = { 523 **Dialect.TYPE_TO_EXPRESSIONS, 524 exp.DataType.Type.INT: { 525 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.INT], 526 exp.Length, 527 }, 528 exp.DataType.Type.VARCHAR: { 529 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.VARCHAR], 530 exp.MD5, 531 exp.AIAgg, 532 exp.AIClassify, 533 exp.AISummarizeAgg, 534 exp.RegexpExtract, 535 exp.RegexpReplace, 536 exp.Repeat, 537 exp.Replace, 538 exp.SHA, 539 exp.SHA2, 540 exp.Space, 541 exp.Uuid, 542 }, 543 exp.DataType.Type.BINARY: { 544 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BINARY], 545 exp.MD5Digest, 546 exp.SHA1Digest, 547 exp.SHA2Digest, 548 }, 549 exp.DataType.Type.BIGINT: { 550 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BIGINT], 551 exp.MD5NumberLower64, 552 exp.MD5NumberUpper64, 553 }, 554 exp.DataType.Type.ARRAY: { 555 exp.Split, 556 }, 557 } 558 559 ANNOTATORS = { 560 **Dialect.ANNOTATORS, 561 **{ 562 expr_type: annotate_with_type_lambda(data_type) 563 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 564 for expr_type in expressions 565 }, 566 **{ 567 expr_type: lambda self, e: self._annotate_by_args(e, "this") 568 for expr_type in ( 569 exp.Left, 570 exp.Right, 571 exp.Substring, 572 ) 573 }, 574 exp.ConcatWs: lambda self, e: self._annotate_by_args(e, "expressions"), 575 exp.Reverse: _annotate_reverse, 576 } 577 578 TIME_MAPPING = { 579 "YYYY": "%Y", 580 "yyyy": "%Y", 581 "YY": "%y", 582 "yy": "%y", 583 "MMMM": "%B", 584 "mmmm": "%B", 585 "MON": "%b", 586 "mon": "%b", 587 "MM": "%m", 588 "mm": "%m", 589 "DD": "%d", 590 "dd": "%-d", 591 "DY": "%a", 592 "dy": "%w", 593 "HH24": "%H", 594 "hh24": "%H", 595 "HH12": "%I", 596 "hh12": "%I", 597 "MI": "%M", 598 "mi": "%M", 599 "SS": "%S", 600 "ss": "%S", 601 "FF6": "%f", 602 "ff6": "%f", 603 } 604 605 DATE_PART_MAPPING = { 606 **Dialect.DATE_PART_MAPPING, 607 "ISOWEEK": "WEEKISO", 608 } 609 610 def quote_identifier(self, expression: E, identify: bool = True) -> E: 611 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 612 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 613 if ( 614 isinstance(expression, exp.Identifier) 615 and isinstance(expression.parent, exp.Table) 616 and expression.name.lower() == "dual" 617 ): 618 return expression # type: ignore 619 620 return super().quote_identifier(expression, identify=identify) 621 622 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 623 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 624 SINGLE_TOKENS.pop("$") 625 626 class Parser(parser.Parser): 627 IDENTIFY_PIVOT_STRINGS = True 628 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 629 COLON_IS_VARIANT_EXTRACT = True 630 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 631 632 ID_VAR_TOKENS = { 633 *parser.Parser.ID_VAR_TOKENS, 634 TokenType.EXCEPT, 635 TokenType.MATCH_CONDITION, 636 } 637 638 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 639 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 640 641 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 642 643 FUNCTIONS = { 644 **parser.Parser.FUNCTIONS, 645 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 646 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 647 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 648 this=seq_get(args, 1), expression=seq_get(args, 0) 649 ), 650 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 651 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 652 start=seq_get(args, 0), 653 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 654 step=seq_get(args, 2), 655 ), 656 "ARRAY_SORT": exp.SortArray.from_arg_list, 657 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 658 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 659 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 660 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 661 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 662 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 663 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 664 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 665 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 666 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 667 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 668 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 669 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 670 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 671 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 672 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 673 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 674 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 675 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 676 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 677 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 678 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 679 "DATE_TRUNC": _date_trunc_to_time, 680 "DATEADD": _build_date_time_add(exp.DateAdd), 681 "DATEDIFF": _build_datediff, 682 "DIV0": _build_if_from_div0, 683 "EDITDISTANCE": lambda args: exp.Levenshtein( 684 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 685 ), 686 "FLATTEN": exp.Explode.from_arg_list, 687 "GET": exp.GetExtract.from_arg_list, 688 "GET_PATH": lambda args, dialect: exp.JSONExtract( 689 this=seq_get(args, 0), 690 expression=dialect.to_json_path(seq_get(args, 1)), 691 requires_json=True, 692 ), 693 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 694 "IFF": exp.If.from_arg_list, 695 "MD5_HEX": exp.MD5.from_arg_list, 696 "MD5_BINARY": exp.MD5Digest.from_arg_list, 697 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 698 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 699 "LAST_DAY": lambda args: exp.LastDay( 700 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 701 ), 702 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 703 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 704 "NULLIFZERO": _build_if_from_nullifzero, 705 "OBJECT_CONSTRUCT": _build_object_construct, 706 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 707 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 708 "REGEXP_REPLACE": _build_regexp_replace, 709 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 710 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 711 "REPLACE": build_replace_with_optional_replacement, 712 "RLIKE": exp.RegexpLike.from_arg_list, 713 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 714 "SHA1_HEX": exp.SHA.from_arg_list, 715 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 716 "SHA2_HEX": exp.SHA2.from_arg_list, 717 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 718 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 719 "TIMEADD": _build_date_time_add(exp.TimeAdd), 720 "TIMEDIFF": _build_datediff, 721 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 722 "TIMESTAMPDIFF": _build_datediff, 723 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 724 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 725 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 726 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 727 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 728 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 729 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 730 "TRY_TO_TIMESTAMP": _build_datetime( 731 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 732 ), 733 "TO_CHAR": build_timetostr_or_tochar, 734 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 735 "TO_NUMBER": lambda args: exp.ToNumber( 736 this=seq_get(args, 0), 737 format=seq_get(args, 1), 738 precision=seq_get(args, 2), 739 scale=seq_get(args, 3), 740 ), 741 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 742 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 743 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 744 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 745 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 746 "TO_VARCHAR": build_timetostr_or_tochar, 747 "TO_JSON": exp.JSONFormat.from_arg_list, 748 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 749 "ZEROIFNULL": _build_if_from_zeroifnull, 750 } 751 FUNCTIONS.pop("PREDICT") 752 753 FUNCTION_PARSERS = { 754 **parser.Parser.FUNCTION_PARSERS, 755 "DATE_PART": lambda self: self._parse_date_part(), 756 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 757 "LISTAGG": lambda self: self._parse_string_agg(), 758 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 759 } 760 FUNCTION_PARSERS.pop("TRIM") 761 762 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 763 764 ALTER_PARSERS = { 765 **parser.Parser.ALTER_PARSERS, 766 "SESSION": lambda self: self._parse_alter_session(), 767 "UNSET": lambda self: self.expression( 768 exp.Set, 769 tag=self._match_text_seq("TAG"), 770 expressions=self._parse_csv(self._parse_id_var), 771 unset=True, 772 ), 773 } 774 775 STATEMENT_PARSERS = { 776 **parser.Parser.STATEMENT_PARSERS, 777 TokenType.GET: lambda self: self._parse_get(), 778 TokenType.PUT: lambda self: self._parse_put(), 779 TokenType.SHOW: lambda self: self._parse_show(), 780 } 781 782 PROPERTY_PARSERS = { 783 **parser.Parser.PROPERTY_PARSERS, 784 "CREDENTIALS": lambda self: self._parse_credentials_property(), 785 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 786 "LOCATION": lambda self: self._parse_location_property(), 787 "TAG": lambda self: self._parse_tag(), 788 "USING": lambda self: self._match_text_seq("TEMPLATE") 789 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 790 } 791 792 TYPE_CONVERTERS = { 793 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 794 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 795 } 796 797 SHOW_PARSERS = { 798 "DATABASES": _show_parser("DATABASES"), 799 "TERSE DATABASES": _show_parser("DATABASES"), 800 "SCHEMAS": _show_parser("SCHEMAS"), 801 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 802 "OBJECTS": _show_parser("OBJECTS"), 803 "TERSE OBJECTS": _show_parser("OBJECTS"), 804 "TABLES": _show_parser("TABLES"), 805 "TERSE TABLES": _show_parser("TABLES"), 806 "VIEWS": _show_parser("VIEWS"), 807 "TERSE VIEWS": _show_parser("VIEWS"), 808 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 809 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 810 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 811 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 812 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 813 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 814 "SEQUENCES": _show_parser("SEQUENCES"), 815 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 816 "STAGES": _show_parser("STAGES"), 817 "COLUMNS": _show_parser("COLUMNS"), 818 "USERS": _show_parser("USERS"), 819 "TERSE USERS": _show_parser("USERS"), 820 "FILE FORMATS": _show_parser("FILE FORMATS"), 821 "FUNCTIONS": _show_parser("FUNCTIONS"), 822 "PROCEDURES": _show_parser("PROCEDURES"), 823 "WAREHOUSES": _show_parser("WAREHOUSES"), 824 } 825 826 CONSTRAINT_PARSERS = { 827 **parser.Parser.CONSTRAINT_PARSERS, 828 "WITH": lambda self: self._parse_with_constraint(), 829 "MASKING": lambda self: self._parse_with_constraint(), 830 "PROJECTION": lambda self: self._parse_with_constraint(), 831 "TAG": lambda self: self._parse_with_constraint(), 832 } 833 834 STAGED_FILE_SINGLE_TOKENS = { 835 TokenType.DOT, 836 TokenType.MOD, 837 TokenType.SLASH, 838 } 839 840 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 841 842 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 843 844 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 845 846 LAMBDAS = { 847 **parser.Parser.LAMBDAS, 848 TokenType.ARROW: lambda self, expressions: self.expression( 849 exp.Lambda, 850 this=self._replace_lambda( 851 self._parse_assignment(), 852 expressions, 853 ), 854 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 855 ), 856 } 857 858 COLUMN_OPERATORS = { 859 **parser.Parser.COLUMN_OPERATORS, 860 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 861 exp.ModelAttribute, this=this, expression=attr 862 ), 863 } 864 865 def _parse_use(self) -> exp.Use: 866 if self._match_text_seq("SECONDARY", "ROLES"): 867 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 868 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 869 return self.expression( 870 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 871 ) 872 873 return super()._parse_use() 874 875 def _negate_range( 876 self, this: t.Optional[exp.Expression] = None 877 ) -> t.Optional[exp.Expression]: 878 if not this: 879 return this 880 881 query = this.args.get("query") 882 if isinstance(this, exp.In) and isinstance(query, exp.Query): 883 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 884 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 885 # which can produce different results (most likely a SnowFlake bug). 886 # 887 # https://docs.snowflake.com/en/sql-reference/functions/in 888 # Context: https://github.com/tobymao/sqlglot/issues/3890 889 return self.expression( 890 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 891 ) 892 893 return self.expression(exp.Not, this=this) 894 895 def _parse_tag(self) -> exp.Tags: 896 return self.expression( 897 exp.Tags, 898 expressions=self._parse_wrapped_csv(self._parse_property), 899 ) 900 901 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 902 if self._prev.token_type != TokenType.WITH: 903 self._retreat(self._index - 1) 904 905 if self._match_text_seq("MASKING", "POLICY"): 906 policy = self._parse_column() 907 return self.expression( 908 exp.MaskingPolicyColumnConstraint, 909 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 910 expressions=self._match(TokenType.USING) 911 and self._parse_wrapped_csv(self._parse_id_var), 912 ) 913 if self._match_text_seq("PROJECTION", "POLICY"): 914 policy = self._parse_column() 915 return self.expression( 916 exp.ProjectionPolicyColumnConstraint, 917 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 918 ) 919 if self._match(TokenType.TAG): 920 return self._parse_tag() 921 922 return None 923 924 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 925 if self._match(TokenType.TAG): 926 return self._parse_tag() 927 928 return super()._parse_with_property() 929 930 def _parse_create(self) -> exp.Create | exp.Command: 931 expression = super()._parse_create() 932 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 933 # Replace the Table node with the enclosed Identifier 934 expression.this.replace(expression.this.this) 935 936 return expression 937 938 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 939 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 940 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 941 this = self._parse_var() or self._parse_type() 942 943 if not this: 944 return None 945 946 self._match(TokenType.COMMA) 947 expression = self._parse_bitwise() 948 this = map_date_part(this) 949 name = this.name.upper() 950 951 if name.startswith("EPOCH"): 952 if name == "EPOCH_MILLISECOND": 953 scale = 10**3 954 elif name == "EPOCH_MICROSECOND": 955 scale = 10**6 956 elif name == "EPOCH_NANOSECOND": 957 scale = 10**9 958 else: 959 scale = None 960 961 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 962 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 963 964 if scale: 965 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 966 967 return to_unix 968 969 return self.expression(exp.Extract, this=this, expression=expression) 970 971 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 972 if is_map: 973 # Keys are strings in Snowflake's objects, see also: 974 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 975 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 976 return self._parse_slice(self._parse_string()) or self._parse_assignment() 977 978 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 979 980 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 981 lateral = super()._parse_lateral() 982 if not lateral: 983 return lateral 984 985 if isinstance(lateral.this, exp.Explode): 986 table_alias = lateral.args.get("alias") 987 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 988 if table_alias and not table_alias.args.get("columns"): 989 table_alias.set("columns", columns) 990 elif not table_alias: 991 exp.alias_(lateral, "_flattened", table=columns, copy=False) 992 993 return lateral 994 995 def _parse_table_parts( 996 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 997 ) -> exp.Table: 998 # https://docs.snowflake.com/en/user-guide/querying-stage 999 if self._match(TokenType.STRING, advance=False): 1000 table = self._parse_string() 1001 elif self._match_text_seq("@", advance=False): 1002 table = self._parse_location_path() 1003 else: 1004 table = None 1005 1006 if table: 1007 file_format = None 1008 pattern = None 1009 1010 wrapped = self._match(TokenType.L_PAREN) 1011 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1012 if self._match_text_seq("FILE_FORMAT", "=>"): 1013 file_format = self._parse_string() or super()._parse_table_parts( 1014 is_db_reference=is_db_reference 1015 ) 1016 elif self._match_text_seq("PATTERN", "=>"): 1017 pattern = self._parse_string() 1018 else: 1019 break 1020 1021 self._match(TokenType.COMMA) 1022 1023 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1024 else: 1025 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1026 1027 return table 1028 1029 def _parse_table( 1030 self, 1031 schema: bool = False, 1032 joins: bool = False, 1033 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1034 parse_bracket: bool = False, 1035 is_db_reference: bool = False, 1036 parse_partition: bool = False, 1037 consume_pipe: bool = False, 1038 ) -> t.Optional[exp.Expression]: 1039 table = super()._parse_table( 1040 schema=schema, 1041 joins=joins, 1042 alias_tokens=alias_tokens, 1043 parse_bracket=parse_bracket, 1044 is_db_reference=is_db_reference, 1045 parse_partition=parse_partition, 1046 ) 1047 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1048 table_from_rows = table.this 1049 for arg in exp.TableFromRows.arg_types: 1050 if arg != "this": 1051 table_from_rows.set(arg, table.args.get(arg)) 1052 1053 table = table_from_rows 1054 1055 return table 1056 1057 def _parse_id_var( 1058 self, 1059 any_token: bool = True, 1060 tokens: t.Optional[t.Collection[TokenType]] = None, 1061 ) -> t.Optional[exp.Expression]: 1062 if self._match_text_seq("IDENTIFIER", "("): 1063 identifier = ( 1064 super()._parse_id_var(any_token=any_token, tokens=tokens) 1065 or self._parse_string() 1066 ) 1067 self._match_r_paren() 1068 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1069 1070 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1071 1072 def _parse_show_snowflake(self, this: str) -> exp.Show: 1073 scope = None 1074 scope_kind = None 1075 1076 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1077 # which is syntactically valid but has no effect on the output 1078 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1079 1080 history = self._match_text_seq("HISTORY") 1081 1082 like = self._parse_string() if self._match(TokenType.LIKE) else None 1083 1084 if self._match(TokenType.IN): 1085 if self._match_text_seq("ACCOUNT"): 1086 scope_kind = "ACCOUNT" 1087 elif self._match_text_seq("CLASS"): 1088 scope_kind = "CLASS" 1089 scope = self._parse_table_parts() 1090 elif self._match_text_seq("APPLICATION"): 1091 scope_kind = "APPLICATION" 1092 if self._match_text_seq("PACKAGE"): 1093 scope_kind += " PACKAGE" 1094 scope = self._parse_table_parts() 1095 elif self._match_set(self.DB_CREATABLES): 1096 scope_kind = self._prev.text.upper() 1097 if self._curr: 1098 scope = self._parse_table_parts() 1099 elif self._curr: 1100 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1101 scope = self._parse_table_parts() 1102 1103 return self.expression( 1104 exp.Show, 1105 **{ 1106 "terse": terse, 1107 "this": this, 1108 "history": history, 1109 "like": like, 1110 "scope": scope, 1111 "scope_kind": scope_kind, 1112 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1113 "limit": self._parse_limit(), 1114 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1115 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1116 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1117 }, 1118 ) 1119 1120 def _parse_put(self) -> exp.Put | exp.Command: 1121 if self._curr.token_type != TokenType.STRING: 1122 return self._parse_as_command(self._prev) 1123 1124 return self.expression( 1125 exp.Put, 1126 this=self._parse_string(), 1127 target=self._parse_location_path(), 1128 properties=self._parse_properties(), 1129 ) 1130 1131 def _parse_get(self) -> t.Optional[exp.Expression]: 1132 start = self._prev 1133 1134 # If we detect GET( then we need to parse a function, not a statement 1135 if self._match(TokenType.L_PAREN): 1136 self._retreat(self._index - 2) 1137 return self._parse_expression() 1138 1139 target = self._parse_location_path() 1140 1141 # Parse as command if unquoted file path 1142 if self._curr.token_type == TokenType.URI_START: 1143 return self._parse_as_command(start) 1144 1145 return self.expression( 1146 exp.Get, 1147 this=self._parse_string(), 1148 target=target, 1149 properties=self._parse_properties(), 1150 ) 1151 1152 def _parse_location_property(self) -> exp.LocationProperty: 1153 self._match(TokenType.EQ) 1154 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1155 1156 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1157 # Parse either a subquery or a staged file 1158 return ( 1159 self._parse_select(table=True, parse_subquery_alias=False) 1160 if self._match(TokenType.L_PAREN, advance=False) 1161 else self._parse_table_parts() 1162 ) 1163 1164 def _parse_location_path(self) -> exp.Var: 1165 start = self._curr 1166 self._advance_any(ignore_reserved=True) 1167 1168 # We avoid consuming a comma token because external tables like @foo and @bar 1169 # can be joined in a query with a comma separator, as well as closing paren 1170 # in case of subqueries 1171 while self._is_connected() and not self._match_set( 1172 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1173 ): 1174 self._advance_any(ignore_reserved=True) 1175 1176 return exp.var(self._find_sql(start, self._prev)) 1177 1178 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1179 this = super()._parse_lambda_arg() 1180 1181 if not this: 1182 return this 1183 1184 typ = self._parse_types() 1185 1186 if typ: 1187 return self.expression(exp.Cast, this=this, to=typ) 1188 1189 return this 1190 1191 def _parse_foreign_key(self) -> exp.ForeignKey: 1192 # inlineFK, the REFERENCES columns are implied 1193 if self._match(TokenType.REFERENCES, advance=False): 1194 return self.expression(exp.ForeignKey) 1195 1196 # outoflineFK, explicitly names the columns 1197 return super()._parse_foreign_key() 1198 1199 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1200 self._match(TokenType.EQ) 1201 if self._match(TokenType.L_PAREN, advance=False): 1202 expressions = self._parse_wrapped_options() 1203 else: 1204 expressions = [self._parse_format_name()] 1205 1206 return self.expression( 1207 exp.FileFormatProperty, 1208 expressions=expressions, 1209 ) 1210 1211 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1212 return self.expression( 1213 exp.CredentialsProperty, 1214 expressions=self._parse_wrapped_options(), 1215 ) 1216 1217 def _parse_semantic_view(self) -> exp.SemanticView: 1218 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1219 1220 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1221 if self._match_text_seq("DIMENSIONS"): 1222 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1223 if self._match_text_seq("METRICS"): 1224 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1225 if self._match_text_seq("WHERE"): 1226 kwargs["where"] = self._parse_expression() 1227 1228 return self.expression(exp.SemanticView, **kwargs) 1229 1230 class Tokenizer(tokens.Tokenizer): 1231 STRING_ESCAPES = ["\\", "'"] 1232 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1233 RAW_STRINGS = ["$$"] 1234 COMMENTS = ["--", "//", ("/*", "*/")] 1235 NESTED_COMMENTS = False 1236 1237 KEYWORDS = { 1238 **tokens.Tokenizer.KEYWORDS, 1239 "BYTEINT": TokenType.INT, 1240 "FILE://": TokenType.URI_START, 1241 "FILE FORMAT": TokenType.FILE_FORMAT, 1242 "GET": TokenType.GET, 1243 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1244 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1245 "MINUS": TokenType.EXCEPT, 1246 "NCHAR VARYING": TokenType.VARCHAR, 1247 "PUT": TokenType.PUT, 1248 "REMOVE": TokenType.COMMAND, 1249 "RM": TokenType.COMMAND, 1250 "SAMPLE": TokenType.TABLE_SAMPLE, 1251 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1252 "SQL_DOUBLE": TokenType.DOUBLE, 1253 "SQL_VARCHAR": TokenType.VARCHAR, 1254 "STAGE": TokenType.STAGE, 1255 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1256 "STREAMLIT": TokenType.STREAMLIT, 1257 "TAG": TokenType.TAG, 1258 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1259 "TOP": TokenType.TOP, 1260 "WAREHOUSE": TokenType.WAREHOUSE, 1261 } 1262 KEYWORDS.pop("/*+") 1263 1264 SINGLE_TOKENS = { 1265 **tokens.Tokenizer.SINGLE_TOKENS, 1266 "$": TokenType.PARAMETER, 1267 "!": TokenType.EXCLAMATION, 1268 } 1269 1270 VAR_SINGLE_TOKENS = {"$"} 1271 1272 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1273 1274 class Generator(generator.Generator): 1275 PARAMETER_TOKEN = "$" 1276 MATCHED_BY_SOURCE = False 1277 SINGLE_STRING_INTERVAL = True 1278 JOIN_HINTS = False 1279 TABLE_HINTS = False 1280 QUERY_HINTS = False 1281 AGGREGATE_FILTER_SUPPORTED = False 1282 SUPPORTS_TABLE_COPY = False 1283 COLLATE_IS_FUNC = True 1284 LIMIT_ONLY_LITERALS = True 1285 JSON_KEY_VALUE_PAIR_SEP = "," 1286 INSERT_OVERWRITE = " OVERWRITE INTO" 1287 STRUCT_DELIMITER = ("(", ")") 1288 COPY_PARAMS_ARE_WRAPPED = False 1289 COPY_PARAMS_EQ_REQUIRED = True 1290 STAR_EXCEPT = "EXCLUDE" 1291 SUPPORTS_EXPLODING_PROJECTIONS = False 1292 ARRAY_CONCAT_IS_VAR_LEN = False 1293 SUPPORTS_CONVERT_TIMEZONE = True 1294 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1295 SUPPORTS_MEDIAN = True 1296 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1297 SUPPORTS_DECODE_CASE = True 1298 IS_BOOL_ALLOWED = False 1299 1300 TRANSFORMS = { 1301 **generator.Generator.TRANSFORMS, 1302 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1303 exp.ArgMax: rename_func("MAX_BY"), 1304 exp.ArgMin: rename_func("MIN_BY"), 1305 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1306 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1307 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1308 exp.AtTimeZone: lambda self, e: self.func( 1309 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1310 ), 1311 exp.BitwiseOr: rename_func("BITOR"), 1312 exp.BitwiseXor: rename_func("BITXOR"), 1313 exp.BitwiseAnd: rename_func("BITAND"), 1314 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1315 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1316 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1317 exp.BitwiseNot: rename_func("BITNOT"), 1318 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1319 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1320 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1321 exp.DateAdd: date_delta_sql("DATEADD"), 1322 exp.DateDiff: date_delta_sql("DATEDIFF"), 1323 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1324 exp.DatetimeDiff: timestampdiff_sql, 1325 exp.DateStrToDate: datestrtodate_sql, 1326 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1327 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1328 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1329 exp.DayOfYear: rename_func("DAYOFYEAR"), 1330 exp.Explode: rename_func("FLATTEN"), 1331 exp.Extract: lambda self, e: self.func( 1332 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1333 ), 1334 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1335 exp.FileFormatProperty: lambda self, 1336 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1337 exp.FromTimeZone: lambda self, e: self.func( 1338 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1339 ), 1340 exp.GenerateSeries: lambda self, e: self.func( 1341 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1342 ), 1343 exp.GetExtract: rename_func("GET"), 1344 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1345 exp.If: if_sql(name="IFF", false_value="NULL"), 1346 exp.JSONExtractArray: _json_extract_value_array_sql, 1347 exp.JSONExtractScalar: lambda self, e: self.func( 1348 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1349 ), 1350 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1351 exp.JSONPathRoot: lambda *_: "", 1352 exp.JSONValueArray: _json_extract_value_array_sql, 1353 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1354 rename_func("EDITDISTANCE") 1355 ), 1356 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1357 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1358 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1359 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1360 exp.MakeInterval: no_make_interval_sql, 1361 exp.Max: max_or_greatest, 1362 exp.Min: min_or_least, 1363 exp.ParseJSON: lambda self, e: self.func( 1364 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1365 ), 1366 exp.JSONFormat: rename_func("TO_JSON"), 1367 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1368 exp.PercentileCont: transforms.preprocess( 1369 [transforms.add_within_group_for_percentiles] 1370 ), 1371 exp.PercentileDisc: transforms.preprocess( 1372 [transforms.add_within_group_for_percentiles] 1373 ), 1374 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1375 exp.RegexpExtract: _regexpextract_sql, 1376 exp.RegexpExtractAll: _regexpextract_sql, 1377 exp.RegexpILike: _regexpilike_sql, 1378 exp.Rand: rename_func("RANDOM"), 1379 exp.Select: transforms.preprocess( 1380 [ 1381 transforms.eliminate_window_clause, 1382 transforms.eliminate_distinct_on, 1383 transforms.explode_projection_to_unnest(), 1384 transforms.eliminate_semi_and_anti_joins, 1385 _transform_generate_date_array, 1386 _qualify_unnested_columns, 1387 _eliminate_dot_variant_lookup, 1388 ] 1389 ), 1390 exp.SHA: rename_func("SHA1"), 1391 exp.MD5Digest: rename_func("MD5_BINARY"), 1392 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1393 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1394 exp.LowerHex: rename_func("TO_CHAR"), 1395 exp.SortArray: rename_func("ARRAY_SORT"), 1396 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1397 exp.StartsWith: rename_func("STARTSWITH"), 1398 exp.EndsWith: rename_func("ENDSWITH"), 1399 exp.StrPosition: lambda self, e: strposition_sql( 1400 self, e, func_name="CHARINDEX", supports_position=True 1401 ), 1402 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1403 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1404 exp.Stuff: rename_func("INSERT"), 1405 exp.StPoint: rename_func("ST_MAKEPOINT"), 1406 exp.TimeAdd: date_delta_sql("TIMEADD"), 1407 exp.Timestamp: no_timestamp_sql, 1408 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1409 exp.TimestampDiff: lambda self, e: self.func( 1410 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1411 ), 1412 exp.TimestampTrunc: timestamptrunc_sql(), 1413 exp.TimeStrToTime: timestrtotime_sql, 1414 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1415 exp.ToArray: rename_func("TO_ARRAY"), 1416 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1417 exp.ToDouble: rename_func("TO_DOUBLE"), 1418 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1419 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1420 exp.TsOrDsToDate: lambda self, e: self.func( 1421 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1422 ), 1423 exp.TsOrDsToTime: lambda self, e: self.func( 1424 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1425 ), 1426 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1427 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1428 exp.Uuid: rename_func("UUID_STRING"), 1429 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1430 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1431 exp.Xor: rename_func("BOOLXOR"), 1432 exp.ByteLength: rename_func("OCTET_LENGTH"), 1433 } 1434 1435 SUPPORTED_JSON_PATH_PARTS = { 1436 exp.JSONPathKey, 1437 exp.JSONPathRoot, 1438 exp.JSONPathSubscript, 1439 } 1440 1441 TYPE_MAPPING = { 1442 **generator.Generator.TYPE_MAPPING, 1443 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1444 exp.DataType.Type.NESTED: "OBJECT", 1445 exp.DataType.Type.STRUCT: "OBJECT", 1446 exp.DataType.Type.TEXT: "VARCHAR", 1447 } 1448 1449 TOKEN_MAPPING = { 1450 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1451 } 1452 1453 PROPERTIES_LOCATION = { 1454 **generator.Generator.PROPERTIES_LOCATION, 1455 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1456 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1457 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1458 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1459 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1460 } 1461 1462 UNSUPPORTED_VALUES_EXPRESSIONS = { 1463 exp.Map, 1464 exp.StarMap, 1465 exp.Struct, 1466 exp.VarMap, 1467 } 1468 1469 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1470 1471 def with_properties(self, properties: exp.Properties) -> str: 1472 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1473 1474 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1475 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1476 values_as_table = False 1477 1478 return super().values_sql(expression, values_as_table=values_as_table) 1479 1480 def datatype_sql(self, expression: exp.DataType) -> str: 1481 expressions = expression.expressions 1482 if ( 1483 expressions 1484 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1485 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1486 ): 1487 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1488 return "OBJECT" 1489 1490 return super().datatype_sql(expression) 1491 1492 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1493 return self.func( 1494 "TO_NUMBER", 1495 expression.this, 1496 expression.args.get("format"), 1497 expression.args.get("precision"), 1498 expression.args.get("scale"), 1499 ) 1500 1501 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1502 milli = expression.args.get("milli") 1503 if milli is not None: 1504 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1505 expression.set("nano", milli_to_nano) 1506 1507 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1508 1509 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1510 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1511 return self.func("TO_GEOGRAPHY", expression.this) 1512 if expression.is_type(exp.DataType.Type.GEOMETRY): 1513 return self.func("TO_GEOMETRY", expression.this) 1514 1515 return super().cast_sql(expression, safe_prefix=safe_prefix) 1516 1517 def trycast_sql(self, expression: exp.TryCast) -> str: 1518 value = expression.this 1519 1520 if value.type is None: 1521 from sqlglot.optimizer.annotate_types import annotate_types 1522 1523 value = annotate_types(value, dialect=self.dialect) 1524 1525 # Snowflake requires that TRY_CAST's value be a string 1526 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1527 # if we can deduce that the value is a string, then we can generate TRY_CAST 1528 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1529 return super().trycast_sql(expression) 1530 1531 return self.cast_sql(expression) 1532 1533 def log_sql(self, expression: exp.Log) -> str: 1534 if not expression.expression: 1535 return self.func("LN", expression.this) 1536 1537 return super().log_sql(expression) 1538 1539 def unnest_sql(self, expression: exp.Unnest) -> str: 1540 unnest_alias = expression.args.get("alias") 1541 offset = expression.args.get("offset") 1542 1543 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1544 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1545 1546 columns = [ 1547 exp.to_identifier("seq"), 1548 exp.to_identifier("key"), 1549 exp.to_identifier("path"), 1550 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1551 value, 1552 exp.to_identifier("this"), 1553 ] 1554 1555 if unnest_alias: 1556 unnest_alias.set("columns", columns) 1557 else: 1558 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1559 1560 table_input = self.sql(expression.expressions[0]) 1561 if not table_input.startswith("INPUT =>"): 1562 table_input = f"INPUT => {table_input}" 1563 1564 expression_parent = expression.parent 1565 1566 explode = ( 1567 f"FLATTEN({table_input})" 1568 if isinstance(expression_parent, exp.Lateral) 1569 else f"TABLE(FLATTEN({table_input}))" 1570 ) 1571 alias = self.sql(unnest_alias) 1572 alias = f" AS {alias}" if alias else "" 1573 value = ( 1574 "" 1575 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1576 else f"{value} FROM " 1577 ) 1578 1579 return f"{value}{explode}{alias}" 1580 1581 def show_sql(self, expression: exp.Show) -> str: 1582 terse = "TERSE " if expression.args.get("terse") else "" 1583 history = " HISTORY" if expression.args.get("history") else "" 1584 like = self.sql(expression, "like") 1585 like = f" LIKE {like}" if like else "" 1586 1587 scope = self.sql(expression, "scope") 1588 scope = f" {scope}" if scope else "" 1589 1590 scope_kind = self.sql(expression, "scope_kind") 1591 if scope_kind: 1592 scope_kind = f" IN {scope_kind}" 1593 1594 starts_with = self.sql(expression, "starts_with") 1595 if starts_with: 1596 starts_with = f" STARTS WITH {starts_with}" 1597 1598 limit = self.sql(expression, "limit") 1599 1600 from_ = self.sql(expression, "from") 1601 if from_: 1602 from_ = f" FROM {from_}" 1603 1604 privileges = self.expressions(expression, key="privileges", flat=True) 1605 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1606 1607 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1608 1609 def describe_sql(self, expression: exp.Describe) -> str: 1610 # Default to table if kind is unknown 1611 kind_value = expression.args.get("kind") or "TABLE" 1612 kind = f" {kind_value}" if kind_value else "" 1613 this = f" {self.sql(expression, 'this')}" 1614 expressions = self.expressions(expression, flat=True) 1615 expressions = f" {expressions}" if expressions else "" 1616 return f"DESCRIBE{kind}{this}{expressions}" 1617 1618 def generatedasidentitycolumnconstraint_sql( 1619 self, expression: exp.GeneratedAsIdentityColumnConstraint 1620 ) -> str: 1621 start = expression.args.get("start") 1622 start = f" START {start}" if start else "" 1623 increment = expression.args.get("increment") 1624 increment = f" INCREMENT {increment}" if increment else "" 1625 1626 order = expression.args.get("order") 1627 if order is not None: 1628 order_clause = " ORDER" if order else " NOORDER" 1629 else: 1630 order_clause = "" 1631 1632 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1633 1634 def cluster_sql(self, expression: exp.Cluster) -> str: 1635 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1636 1637 def struct_sql(self, expression: exp.Struct) -> str: 1638 if len(expression.expressions) == 1: 1639 arg = expression.expressions[0] 1640 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1641 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1642 return f"{{{self.sql(expression.expressions[0])}}}" 1643 1644 keys = [] 1645 values = [] 1646 1647 for i, e in enumerate(expression.expressions): 1648 if isinstance(e, exp.PropertyEQ): 1649 keys.append( 1650 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1651 ) 1652 values.append(e.expression) 1653 else: 1654 keys.append(exp.Literal.string(f"_{i}")) 1655 values.append(e) 1656 1657 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1658 1659 @unsupported_args("weight", "accuracy") 1660 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1661 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1662 1663 def alterset_sql(self, expression: exp.AlterSet) -> str: 1664 exprs = self.expressions(expression, flat=True) 1665 exprs = f" {exprs}" if exprs else "" 1666 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1667 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1668 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1669 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1670 tag = self.expressions(expression, key="tag", flat=True) 1671 tag = f" TAG {tag}" if tag else "" 1672 1673 return f"SET{exprs}{file_format}{copy_options}{tag}" 1674 1675 def strtotime_sql(self, expression: exp.StrToTime): 1676 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1677 return self.func( 1678 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1679 ) 1680 1681 def timestampsub_sql(self, expression: exp.TimestampSub): 1682 return self.sql( 1683 exp.TimestampAdd( 1684 this=expression.this, 1685 expression=expression.expression * -1, 1686 unit=expression.unit, 1687 ) 1688 ) 1689 1690 def jsonextract_sql(self, expression: exp.JSONExtract): 1691 this = expression.this 1692 1693 # JSON strings are valid coming from other dialects such as BQ so 1694 # for these cases we PARSE_JSON preemptively 1695 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1696 "requires_json" 1697 ): 1698 this = exp.ParseJSON(this=this) 1699 1700 return self.func( 1701 "GET_PATH", 1702 this, 1703 expression.expression, 1704 ) 1705 1706 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1707 this = expression.this 1708 if this.is_string: 1709 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1710 1711 return self.func("TO_CHAR", this, self.format_time(expression)) 1712 1713 def datesub_sql(self, expression: exp.DateSub) -> str: 1714 value = expression.expression 1715 if value: 1716 value.replace(value * (-1)) 1717 else: 1718 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1719 1720 return date_delta_sql("DATEADD")(self, expression) 1721 1722 def select_sql(self, expression: exp.Select) -> str: 1723 limit = expression.args.get("limit") 1724 offset = expression.args.get("offset") 1725 if offset and not limit: 1726 expression.limit(exp.Null(), copy=False) 1727 return super().select_sql(expression) 1728 1729 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1730 is_materialized = expression.find(exp.MaterializedProperty) 1731 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1732 1733 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1734 # For materialized views, COPY GRANTS is located *before* the columns list 1735 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1736 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1737 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1738 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1739 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1740 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1741 1742 this_name = self.sql(expression.this, "this") 1743 copy_grants = self.sql(copy_grants_property) 1744 this_schema = self.schema_columns_sql(expression.this) 1745 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1746 1747 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1748 1749 return super().createable_sql(expression, locations) 1750 1751 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1752 this = expression.this 1753 1754 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1755 # and add it later as part of the WITHIN GROUP clause 1756 order = this if isinstance(this, exp.Order) else None 1757 if order: 1758 expression.set("this", order.this.pop()) 1759 1760 expr_sql = super().arrayagg_sql(expression) 1761 1762 if order: 1763 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1764 1765 return expr_sql 1766 1767 def array_sql(self, expression: exp.Array) -> str: 1768 expressions = expression.expressions 1769 1770 first_expr = seq_get(expressions, 0) 1771 if isinstance(first_expr, exp.Select): 1772 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1773 if first_expr.text("kind").upper() == "STRUCT": 1774 object_construct_args = [] 1775 for expr in first_expr.expressions: 1776 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1777 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1778 name = expr.this if isinstance(expr, exp.Alias) else expr 1779 1780 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1781 1782 array_agg = exp.ArrayAgg( 1783 this=_build_object_construct(args=object_construct_args) 1784 ) 1785 1786 first_expr.set("kind", None) 1787 first_expr.set("expressions", [array_agg]) 1788 1789 return self.sql(first_expr.subquery()) 1790 1791 return inline_array_sql(self, expression) 1792 1793 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1794 zone = self.sql(expression, "this") 1795 if not zone: 1796 return super().currentdate_sql(expression) 1797 1798 expr = exp.Cast( 1799 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1800 to=exp.DataType(this=exp.DataType.Type.DATE), 1801 ) 1802 return self.sql(expr) 1803 1804 def dot_sql(self, expression: exp.Dot) -> str: 1805 this = expression.this 1806 1807 if not this.type: 1808 from sqlglot.optimizer.annotate_types import annotate_types 1809 1810 this = annotate_types(this, dialect=self.dialect) 1811 1812 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1813 # Generate colon notation for the top level STRUCT 1814 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1815 1816 return super().dot_sql(expression) 1817 1818 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 1819 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime formats.
610 def quote_identifier(self, expression: E, identify: bool = True) -> E: 611 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 612 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 613 if ( 614 isinstance(expression, exp.Identifier) 615 and isinstance(expression.parent, exp.Table) 616 and expression.name.lower() == "dual" 617 ): 618 return expression # type: ignore 619 620 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier, this method is a no-op. - identify: If set to
False, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n) to its unescaped version (
).
622 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 623 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 624 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
626 class Parser(parser.Parser): 627 IDENTIFY_PIVOT_STRINGS = True 628 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 629 COLON_IS_VARIANT_EXTRACT = True 630 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 631 632 ID_VAR_TOKENS = { 633 *parser.Parser.ID_VAR_TOKENS, 634 TokenType.EXCEPT, 635 TokenType.MATCH_CONDITION, 636 } 637 638 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 639 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 640 641 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 642 643 FUNCTIONS = { 644 **parser.Parser.FUNCTIONS, 645 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 646 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 647 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 648 this=seq_get(args, 1), expression=seq_get(args, 0) 649 ), 650 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 651 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 652 start=seq_get(args, 0), 653 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 654 step=seq_get(args, 2), 655 ), 656 "ARRAY_SORT": exp.SortArray.from_arg_list, 657 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 658 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 659 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 660 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 661 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 662 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 663 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 664 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 665 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 666 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 667 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 668 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 669 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 670 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 671 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 672 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 673 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 674 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 675 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 676 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 677 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 678 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 679 "DATE_TRUNC": _date_trunc_to_time, 680 "DATEADD": _build_date_time_add(exp.DateAdd), 681 "DATEDIFF": _build_datediff, 682 "DIV0": _build_if_from_div0, 683 "EDITDISTANCE": lambda args: exp.Levenshtein( 684 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 685 ), 686 "FLATTEN": exp.Explode.from_arg_list, 687 "GET": exp.GetExtract.from_arg_list, 688 "GET_PATH": lambda args, dialect: exp.JSONExtract( 689 this=seq_get(args, 0), 690 expression=dialect.to_json_path(seq_get(args, 1)), 691 requires_json=True, 692 ), 693 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 694 "IFF": exp.If.from_arg_list, 695 "MD5_HEX": exp.MD5.from_arg_list, 696 "MD5_BINARY": exp.MD5Digest.from_arg_list, 697 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 698 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 699 "LAST_DAY": lambda args: exp.LastDay( 700 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 701 ), 702 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 703 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 704 "NULLIFZERO": _build_if_from_nullifzero, 705 "OBJECT_CONSTRUCT": _build_object_construct, 706 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 707 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 708 "REGEXP_REPLACE": _build_regexp_replace, 709 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 710 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 711 "REPLACE": build_replace_with_optional_replacement, 712 "RLIKE": exp.RegexpLike.from_arg_list, 713 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 714 "SHA1_HEX": exp.SHA.from_arg_list, 715 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 716 "SHA2_HEX": exp.SHA2.from_arg_list, 717 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 718 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 719 "TIMEADD": _build_date_time_add(exp.TimeAdd), 720 "TIMEDIFF": _build_datediff, 721 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 722 "TIMESTAMPDIFF": _build_datediff, 723 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 724 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 725 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 726 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 727 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 728 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 729 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 730 "TRY_TO_TIMESTAMP": _build_datetime( 731 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 732 ), 733 "TO_CHAR": build_timetostr_or_tochar, 734 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 735 "TO_NUMBER": lambda args: exp.ToNumber( 736 this=seq_get(args, 0), 737 format=seq_get(args, 1), 738 precision=seq_get(args, 2), 739 scale=seq_get(args, 3), 740 ), 741 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 742 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 743 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 744 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 745 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 746 "TO_VARCHAR": build_timetostr_or_tochar, 747 "TO_JSON": exp.JSONFormat.from_arg_list, 748 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 749 "ZEROIFNULL": _build_if_from_zeroifnull, 750 } 751 FUNCTIONS.pop("PREDICT") 752 753 FUNCTION_PARSERS = { 754 **parser.Parser.FUNCTION_PARSERS, 755 "DATE_PART": lambda self: self._parse_date_part(), 756 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 757 "LISTAGG": lambda self: self._parse_string_agg(), 758 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 759 } 760 FUNCTION_PARSERS.pop("TRIM") 761 762 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 763 764 ALTER_PARSERS = { 765 **parser.Parser.ALTER_PARSERS, 766 "SESSION": lambda self: self._parse_alter_session(), 767 "UNSET": lambda self: self.expression( 768 exp.Set, 769 tag=self._match_text_seq("TAG"), 770 expressions=self._parse_csv(self._parse_id_var), 771 unset=True, 772 ), 773 } 774 775 STATEMENT_PARSERS = { 776 **parser.Parser.STATEMENT_PARSERS, 777 TokenType.GET: lambda self: self._parse_get(), 778 TokenType.PUT: lambda self: self._parse_put(), 779 TokenType.SHOW: lambda self: self._parse_show(), 780 } 781 782 PROPERTY_PARSERS = { 783 **parser.Parser.PROPERTY_PARSERS, 784 "CREDENTIALS": lambda self: self._parse_credentials_property(), 785 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 786 "LOCATION": lambda self: self._parse_location_property(), 787 "TAG": lambda self: self._parse_tag(), 788 "USING": lambda self: self._match_text_seq("TEMPLATE") 789 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 790 } 791 792 TYPE_CONVERTERS = { 793 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 794 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 795 } 796 797 SHOW_PARSERS = { 798 "DATABASES": _show_parser("DATABASES"), 799 "TERSE DATABASES": _show_parser("DATABASES"), 800 "SCHEMAS": _show_parser("SCHEMAS"), 801 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 802 "OBJECTS": _show_parser("OBJECTS"), 803 "TERSE OBJECTS": _show_parser("OBJECTS"), 804 "TABLES": _show_parser("TABLES"), 805 "TERSE TABLES": _show_parser("TABLES"), 806 "VIEWS": _show_parser("VIEWS"), 807 "TERSE VIEWS": _show_parser("VIEWS"), 808 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 809 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 810 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 811 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 812 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 813 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 814 "SEQUENCES": _show_parser("SEQUENCES"), 815 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 816 "STAGES": _show_parser("STAGES"), 817 "COLUMNS": _show_parser("COLUMNS"), 818 "USERS": _show_parser("USERS"), 819 "TERSE USERS": _show_parser("USERS"), 820 "FILE FORMATS": _show_parser("FILE FORMATS"), 821 "FUNCTIONS": _show_parser("FUNCTIONS"), 822 "PROCEDURES": _show_parser("PROCEDURES"), 823 "WAREHOUSES": _show_parser("WAREHOUSES"), 824 } 825 826 CONSTRAINT_PARSERS = { 827 **parser.Parser.CONSTRAINT_PARSERS, 828 "WITH": lambda self: self._parse_with_constraint(), 829 "MASKING": lambda self: self._parse_with_constraint(), 830 "PROJECTION": lambda self: self._parse_with_constraint(), 831 "TAG": lambda self: self._parse_with_constraint(), 832 } 833 834 STAGED_FILE_SINGLE_TOKENS = { 835 TokenType.DOT, 836 TokenType.MOD, 837 TokenType.SLASH, 838 } 839 840 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 841 842 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 843 844 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 845 846 LAMBDAS = { 847 **parser.Parser.LAMBDAS, 848 TokenType.ARROW: lambda self, expressions: self.expression( 849 exp.Lambda, 850 this=self._replace_lambda( 851 self._parse_assignment(), 852 expressions, 853 ), 854 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 855 ), 856 } 857 858 COLUMN_OPERATORS = { 859 **parser.Parser.COLUMN_OPERATORS, 860 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 861 exp.ModelAttribute, this=this, expression=attr 862 ), 863 } 864 865 def _parse_use(self) -> exp.Use: 866 if self._match_text_seq("SECONDARY", "ROLES"): 867 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 868 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 869 return self.expression( 870 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 871 ) 872 873 return super()._parse_use() 874 875 def _negate_range( 876 self, this: t.Optional[exp.Expression] = None 877 ) -> t.Optional[exp.Expression]: 878 if not this: 879 return this 880 881 query = this.args.get("query") 882 if isinstance(this, exp.In) and isinstance(query, exp.Query): 883 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 884 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 885 # which can produce different results (most likely a SnowFlake bug). 886 # 887 # https://docs.snowflake.com/en/sql-reference/functions/in 888 # Context: https://github.com/tobymao/sqlglot/issues/3890 889 return self.expression( 890 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 891 ) 892 893 return self.expression(exp.Not, this=this) 894 895 def _parse_tag(self) -> exp.Tags: 896 return self.expression( 897 exp.Tags, 898 expressions=self._parse_wrapped_csv(self._parse_property), 899 ) 900 901 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 902 if self._prev.token_type != TokenType.WITH: 903 self._retreat(self._index - 1) 904 905 if self._match_text_seq("MASKING", "POLICY"): 906 policy = self._parse_column() 907 return self.expression( 908 exp.MaskingPolicyColumnConstraint, 909 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 910 expressions=self._match(TokenType.USING) 911 and self._parse_wrapped_csv(self._parse_id_var), 912 ) 913 if self._match_text_seq("PROJECTION", "POLICY"): 914 policy = self._parse_column() 915 return self.expression( 916 exp.ProjectionPolicyColumnConstraint, 917 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 918 ) 919 if self._match(TokenType.TAG): 920 return self._parse_tag() 921 922 return None 923 924 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 925 if self._match(TokenType.TAG): 926 return self._parse_tag() 927 928 return super()._parse_with_property() 929 930 def _parse_create(self) -> exp.Create | exp.Command: 931 expression = super()._parse_create() 932 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 933 # Replace the Table node with the enclosed Identifier 934 expression.this.replace(expression.this.this) 935 936 return expression 937 938 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 939 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 940 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 941 this = self._parse_var() or self._parse_type() 942 943 if not this: 944 return None 945 946 self._match(TokenType.COMMA) 947 expression = self._parse_bitwise() 948 this = map_date_part(this) 949 name = this.name.upper() 950 951 if name.startswith("EPOCH"): 952 if name == "EPOCH_MILLISECOND": 953 scale = 10**3 954 elif name == "EPOCH_MICROSECOND": 955 scale = 10**6 956 elif name == "EPOCH_NANOSECOND": 957 scale = 10**9 958 else: 959 scale = None 960 961 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 962 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 963 964 if scale: 965 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 966 967 return to_unix 968 969 return self.expression(exp.Extract, this=this, expression=expression) 970 971 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 972 if is_map: 973 # Keys are strings in Snowflake's objects, see also: 974 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 975 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 976 return self._parse_slice(self._parse_string()) or self._parse_assignment() 977 978 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 979 980 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 981 lateral = super()._parse_lateral() 982 if not lateral: 983 return lateral 984 985 if isinstance(lateral.this, exp.Explode): 986 table_alias = lateral.args.get("alias") 987 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 988 if table_alias and not table_alias.args.get("columns"): 989 table_alias.set("columns", columns) 990 elif not table_alias: 991 exp.alias_(lateral, "_flattened", table=columns, copy=False) 992 993 return lateral 994 995 def _parse_table_parts( 996 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 997 ) -> exp.Table: 998 # https://docs.snowflake.com/en/user-guide/querying-stage 999 if self._match(TokenType.STRING, advance=False): 1000 table = self._parse_string() 1001 elif self._match_text_seq("@", advance=False): 1002 table = self._parse_location_path() 1003 else: 1004 table = None 1005 1006 if table: 1007 file_format = None 1008 pattern = None 1009 1010 wrapped = self._match(TokenType.L_PAREN) 1011 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1012 if self._match_text_seq("FILE_FORMAT", "=>"): 1013 file_format = self._parse_string() or super()._parse_table_parts( 1014 is_db_reference=is_db_reference 1015 ) 1016 elif self._match_text_seq("PATTERN", "=>"): 1017 pattern = self._parse_string() 1018 else: 1019 break 1020 1021 self._match(TokenType.COMMA) 1022 1023 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1024 else: 1025 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1026 1027 return table 1028 1029 def _parse_table( 1030 self, 1031 schema: bool = False, 1032 joins: bool = False, 1033 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1034 parse_bracket: bool = False, 1035 is_db_reference: bool = False, 1036 parse_partition: bool = False, 1037 consume_pipe: bool = False, 1038 ) -> t.Optional[exp.Expression]: 1039 table = super()._parse_table( 1040 schema=schema, 1041 joins=joins, 1042 alias_tokens=alias_tokens, 1043 parse_bracket=parse_bracket, 1044 is_db_reference=is_db_reference, 1045 parse_partition=parse_partition, 1046 ) 1047 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1048 table_from_rows = table.this 1049 for arg in exp.TableFromRows.arg_types: 1050 if arg != "this": 1051 table_from_rows.set(arg, table.args.get(arg)) 1052 1053 table = table_from_rows 1054 1055 return table 1056 1057 def _parse_id_var( 1058 self, 1059 any_token: bool = True, 1060 tokens: t.Optional[t.Collection[TokenType]] = None, 1061 ) -> t.Optional[exp.Expression]: 1062 if self._match_text_seq("IDENTIFIER", "("): 1063 identifier = ( 1064 super()._parse_id_var(any_token=any_token, tokens=tokens) 1065 or self._parse_string() 1066 ) 1067 self._match_r_paren() 1068 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1069 1070 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1071 1072 def _parse_show_snowflake(self, this: str) -> exp.Show: 1073 scope = None 1074 scope_kind = None 1075 1076 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1077 # which is syntactically valid but has no effect on the output 1078 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1079 1080 history = self._match_text_seq("HISTORY") 1081 1082 like = self._parse_string() if self._match(TokenType.LIKE) else None 1083 1084 if self._match(TokenType.IN): 1085 if self._match_text_seq("ACCOUNT"): 1086 scope_kind = "ACCOUNT" 1087 elif self._match_text_seq("CLASS"): 1088 scope_kind = "CLASS" 1089 scope = self._parse_table_parts() 1090 elif self._match_text_seq("APPLICATION"): 1091 scope_kind = "APPLICATION" 1092 if self._match_text_seq("PACKAGE"): 1093 scope_kind += " PACKAGE" 1094 scope = self._parse_table_parts() 1095 elif self._match_set(self.DB_CREATABLES): 1096 scope_kind = self._prev.text.upper() 1097 if self._curr: 1098 scope = self._parse_table_parts() 1099 elif self._curr: 1100 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1101 scope = self._parse_table_parts() 1102 1103 return self.expression( 1104 exp.Show, 1105 **{ 1106 "terse": terse, 1107 "this": this, 1108 "history": history, 1109 "like": like, 1110 "scope": scope, 1111 "scope_kind": scope_kind, 1112 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1113 "limit": self._parse_limit(), 1114 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1115 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1116 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1117 }, 1118 ) 1119 1120 def _parse_put(self) -> exp.Put | exp.Command: 1121 if self._curr.token_type != TokenType.STRING: 1122 return self._parse_as_command(self._prev) 1123 1124 return self.expression( 1125 exp.Put, 1126 this=self._parse_string(), 1127 target=self._parse_location_path(), 1128 properties=self._parse_properties(), 1129 ) 1130 1131 def _parse_get(self) -> t.Optional[exp.Expression]: 1132 start = self._prev 1133 1134 # If we detect GET( then we need to parse a function, not a statement 1135 if self._match(TokenType.L_PAREN): 1136 self._retreat(self._index - 2) 1137 return self._parse_expression() 1138 1139 target = self._parse_location_path() 1140 1141 # Parse as command if unquoted file path 1142 if self._curr.token_type == TokenType.URI_START: 1143 return self._parse_as_command(start) 1144 1145 return self.expression( 1146 exp.Get, 1147 this=self._parse_string(), 1148 target=target, 1149 properties=self._parse_properties(), 1150 ) 1151 1152 def _parse_location_property(self) -> exp.LocationProperty: 1153 self._match(TokenType.EQ) 1154 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1155 1156 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1157 # Parse either a subquery or a staged file 1158 return ( 1159 self._parse_select(table=True, parse_subquery_alias=False) 1160 if self._match(TokenType.L_PAREN, advance=False) 1161 else self._parse_table_parts() 1162 ) 1163 1164 def _parse_location_path(self) -> exp.Var: 1165 start = self._curr 1166 self._advance_any(ignore_reserved=True) 1167 1168 # We avoid consuming a comma token because external tables like @foo and @bar 1169 # can be joined in a query with a comma separator, as well as closing paren 1170 # in case of subqueries 1171 while self._is_connected() and not self._match_set( 1172 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1173 ): 1174 self._advance_any(ignore_reserved=True) 1175 1176 return exp.var(self._find_sql(start, self._prev)) 1177 1178 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1179 this = super()._parse_lambda_arg() 1180 1181 if not this: 1182 return this 1183 1184 typ = self._parse_types() 1185 1186 if typ: 1187 return self.expression(exp.Cast, this=this, to=typ) 1188 1189 return this 1190 1191 def _parse_foreign_key(self) -> exp.ForeignKey: 1192 # inlineFK, the REFERENCES columns are implied 1193 if self._match(TokenType.REFERENCES, advance=False): 1194 return self.expression(exp.ForeignKey) 1195 1196 # outoflineFK, explicitly names the columns 1197 return super()._parse_foreign_key() 1198 1199 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1200 self._match(TokenType.EQ) 1201 if self._match(TokenType.L_PAREN, advance=False): 1202 expressions = self._parse_wrapped_options() 1203 else: 1204 expressions = [self._parse_format_name()] 1205 1206 return self.expression( 1207 exp.FileFormatProperty, 1208 expressions=expressions, 1209 ) 1210 1211 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1212 return self.expression( 1213 exp.CredentialsProperty, 1214 expressions=self._parse_wrapped_options(), 1215 ) 1216 1217 def _parse_semantic_view(self) -> exp.SemanticView: 1218 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1219 1220 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1221 if self._match_text_seq("DIMENSIONS"): 1222 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1223 if self._match_text_seq("METRICS"): 1224 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1225 if self._match_text_seq("WHERE"): 1226 kwargs["where"] = self._parse_expression() 1227 1228 return self.expression(exp.SemanticView, **kwargs)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- ADD_JOIN_ON_TRUE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1230 class Tokenizer(tokens.Tokenizer): 1231 STRING_ESCAPES = ["\\", "'"] 1232 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1233 RAW_STRINGS = ["$$"] 1234 COMMENTS = ["--", "//", ("/*", "*/")] 1235 NESTED_COMMENTS = False 1236 1237 KEYWORDS = { 1238 **tokens.Tokenizer.KEYWORDS, 1239 "BYTEINT": TokenType.INT, 1240 "FILE://": TokenType.URI_START, 1241 "FILE FORMAT": TokenType.FILE_FORMAT, 1242 "GET": TokenType.GET, 1243 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1244 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1245 "MINUS": TokenType.EXCEPT, 1246 "NCHAR VARYING": TokenType.VARCHAR, 1247 "PUT": TokenType.PUT, 1248 "REMOVE": TokenType.COMMAND, 1249 "RM": TokenType.COMMAND, 1250 "SAMPLE": TokenType.TABLE_SAMPLE, 1251 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1252 "SQL_DOUBLE": TokenType.DOUBLE, 1253 "SQL_VARCHAR": TokenType.VARCHAR, 1254 "STAGE": TokenType.STAGE, 1255 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1256 "STREAMLIT": TokenType.STREAMLIT, 1257 "TAG": TokenType.TAG, 1258 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1259 "TOP": TokenType.TOP, 1260 "WAREHOUSE": TokenType.WAREHOUSE, 1261 } 1262 KEYWORDS.pop("/*+") 1263 1264 SINGLE_TOKENS = { 1265 **tokens.Tokenizer.SINGLE_TOKENS, 1266 "$": TokenType.PARAMETER, 1267 "!": TokenType.EXCLAMATION, 1268 } 1269 1270 VAR_SINGLE_TOKENS = {"$"} 1271 1272 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1274 class Generator(generator.Generator): 1275 PARAMETER_TOKEN = "$" 1276 MATCHED_BY_SOURCE = False 1277 SINGLE_STRING_INTERVAL = True 1278 JOIN_HINTS = False 1279 TABLE_HINTS = False 1280 QUERY_HINTS = False 1281 AGGREGATE_FILTER_SUPPORTED = False 1282 SUPPORTS_TABLE_COPY = False 1283 COLLATE_IS_FUNC = True 1284 LIMIT_ONLY_LITERALS = True 1285 JSON_KEY_VALUE_PAIR_SEP = "," 1286 INSERT_OVERWRITE = " OVERWRITE INTO" 1287 STRUCT_DELIMITER = ("(", ")") 1288 COPY_PARAMS_ARE_WRAPPED = False 1289 COPY_PARAMS_EQ_REQUIRED = True 1290 STAR_EXCEPT = "EXCLUDE" 1291 SUPPORTS_EXPLODING_PROJECTIONS = False 1292 ARRAY_CONCAT_IS_VAR_LEN = False 1293 SUPPORTS_CONVERT_TIMEZONE = True 1294 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1295 SUPPORTS_MEDIAN = True 1296 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1297 SUPPORTS_DECODE_CASE = True 1298 IS_BOOL_ALLOWED = False 1299 1300 TRANSFORMS = { 1301 **generator.Generator.TRANSFORMS, 1302 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1303 exp.ArgMax: rename_func("MAX_BY"), 1304 exp.ArgMin: rename_func("MIN_BY"), 1305 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1306 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1307 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1308 exp.AtTimeZone: lambda self, e: self.func( 1309 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1310 ), 1311 exp.BitwiseOr: rename_func("BITOR"), 1312 exp.BitwiseXor: rename_func("BITXOR"), 1313 exp.BitwiseAnd: rename_func("BITAND"), 1314 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1315 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1316 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1317 exp.BitwiseNot: rename_func("BITNOT"), 1318 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1319 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1320 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1321 exp.DateAdd: date_delta_sql("DATEADD"), 1322 exp.DateDiff: date_delta_sql("DATEDIFF"), 1323 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1324 exp.DatetimeDiff: timestampdiff_sql, 1325 exp.DateStrToDate: datestrtodate_sql, 1326 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1327 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1328 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1329 exp.DayOfYear: rename_func("DAYOFYEAR"), 1330 exp.Explode: rename_func("FLATTEN"), 1331 exp.Extract: lambda self, e: self.func( 1332 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1333 ), 1334 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1335 exp.FileFormatProperty: lambda self, 1336 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1337 exp.FromTimeZone: lambda self, e: self.func( 1338 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1339 ), 1340 exp.GenerateSeries: lambda self, e: self.func( 1341 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1342 ), 1343 exp.GetExtract: rename_func("GET"), 1344 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1345 exp.If: if_sql(name="IFF", false_value="NULL"), 1346 exp.JSONExtractArray: _json_extract_value_array_sql, 1347 exp.JSONExtractScalar: lambda self, e: self.func( 1348 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1349 ), 1350 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1351 exp.JSONPathRoot: lambda *_: "", 1352 exp.JSONValueArray: _json_extract_value_array_sql, 1353 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1354 rename_func("EDITDISTANCE") 1355 ), 1356 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1357 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1358 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1359 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1360 exp.MakeInterval: no_make_interval_sql, 1361 exp.Max: max_or_greatest, 1362 exp.Min: min_or_least, 1363 exp.ParseJSON: lambda self, e: self.func( 1364 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1365 ), 1366 exp.JSONFormat: rename_func("TO_JSON"), 1367 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1368 exp.PercentileCont: transforms.preprocess( 1369 [transforms.add_within_group_for_percentiles] 1370 ), 1371 exp.PercentileDisc: transforms.preprocess( 1372 [transforms.add_within_group_for_percentiles] 1373 ), 1374 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1375 exp.RegexpExtract: _regexpextract_sql, 1376 exp.RegexpExtractAll: _regexpextract_sql, 1377 exp.RegexpILike: _regexpilike_sql, 1378 exp.Rand: rename_func("RANDOM"), 1379 exp.Select: transforms.preprocess( 1380 [ 1381 transforms.eliminate_window_clause, 1382 transforms.eliminate_distinct_on, 1383 transforms.explode_projection_to_unnest(), 1384 transforms.eliminate_semi_and_anti_joins, 1385 _transform_generate_date_array, 1386 _qualify_unnested_columns, 1387 _eliminate_dot_variant_lookup, 1388 ] 1389 ), 1390 exp.SHA: rename_func("SHA1"), 1391 exp.MD5Digest: rename_func("MD5_BINARY"), 1392 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1393 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1394 exp.LowerHex: rename_func("TO_CHAR"), 1395 exp.SortArray: rename_func("ARRAY_SORT"), 1396 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1397 exp.StartsWith: rename_func("STARTSWITH"), 1398 exp.EndsWith: rename_func("ENDSWITH"), 1399 exp.StrPosition: lambda self, e: strposition_sql( 1400 self, e, func_name="CHARINDEX", supports_position=True 1401 ), 1402 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1403 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1404 exp.Stuff: rename_func("INSERT"), 1405 exp.StPoint: rename_func("ST_MAKEPOINT"), 1406 exp.TimeAdd: date_delta_sql("TIMEADD"), 1407 exp.Timestamp: no_timestamp_sql, 1408 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1409 exp.TimestampDiff: lambda self, e: self.func( 1410 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1411 ), 1412 exp.TimestampTrunc: timestamptrunc_sql(), 1413 exp.TimeStrToTime: timestrtotime_sql, 1414 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1415 exp.ToArray: rename_func("TO_ARRAY"), 1416 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1417 exp.ToDouble: rename_func("TO_DOUBLE"), 1418 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1419 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1420 exp.TsOrDsToDate: lambda self, e: self.func( 1421 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1422 ), 1423 exp.TsOrDsToTime: lambda self, e: self.func( 1424 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1425 ), 1426 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1427 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1428 exp.Uuid: rename_func("UUID_STRING"), 1429 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1430 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1431 exp.Xor: rename_func("BOOLXOR"), 1432 exp.ByteLength: rename_func("OCTET_LENGTH"), 1433 } 1434 1435 SUPPORTED_JSON_PATH_PARTS = { 1436 exp.JSONPathKey, 1437 exp.JSONPathRoot, 1438 exp.JSONPathSubscript, 1439 } 1440 1441 TYPE_MAPPING = { 1442 **generator.Generator.TYPE_MAPPING, 1443 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1444 exp.DataType.Type.NESTED: "OBJECT", 1445 exp.DataType.Type.STRUCT: "OBJECT", 1446 exp.DataType.Type.TEXT: "VARCHAR", 1447 } 1448 1449 TOKEN_MAPPING = { 1450 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1451 } 1452 1453 PROPERTIES_LOCATION = { 1454 **generator.Generator.PROPERTIES_LOCATION, 1455 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1456 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1457 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1458 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1459 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1460 } 1461 1462 UNSUPPORTED_VALUES_EXPRESSIONS = { 1463 exp.Map, 1464 exp.StarMap, 1465 exp.Struct, 1466 exp.VarMap, 1467 } 1468 1469 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1470 1471 def with_properties(self, properties: exp.Properties) -> str: 1472 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1473 1474 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1475 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1476 values_as_table = False 1477 1478 return super().values_sql(expression, values_as_table=values_as_table) 1479 1480 def datatype_sql(self, expression: exp.DataType) -> str: 1481 expressions = expression.expressions 1482 if ( 1483 expressions 1484 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1485 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1486 ): 1487 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1488 return "OBJECT" 1489 1490 return super().datatype_sql(expression) 1491 1492 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1493 return self.func( 1494 "TO_NUMBER", 1495 expression.this, 1496 expression.args.get("format"), 1497 expression.args.get("precision"), 1498 expression.args.get("scale"), 1499 ) 1500 1501 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1502 milli = expression.args.get("milli") 1503 if milli is not None: 1504 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1505 expression.set("nano", milli_to_nano) 1506 1507 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1508 1509 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1510 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1511 return self.func("TO_GEOGRAPHY", expression.this) 1512 if expression.is_type(exp.DataType.Type.GEOMETRY): 1513 return self.func("TO_GEOMETRY", expression.this) 1514 1515 return super().cast_sql(expression, safe_prefix=safe_prefix) 1516 1517 def trycast_sql(self, expression: exp.TryCast) -> str: 1518 value = expression.this 1519 1520 if value.type is None: 1521 from sqlglot.optimizer.annotate_types import annotate_types 1522 1523 value = annotate_types(value, dialect=self.dialect) 1524 1525 # Snowflake requires that TRY_CAST's value be a string 1526 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1527 # if we can deduce that the value is a string, then we can generate TRY_CAST 1528 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1529 return super().trycast_sql(expression) 1530 1531 return self.cast_sql(expression) 1532 1533 def log_sql(self, expression: exp.Log) -> str: 1534 if not expression.expression: 1535 return self.func("LN", expression.this) 1536 1537 return super().log_sql(expression) 1538 1539 def unnest_sql(self, expression: exp.Unnest) -> str: 1540 unnest_alias = expression.args.get("alias") 1541 offset = expression.args.get("offset") 1542 1543 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1544 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1545 1546 columns = [ 1547 exp.to_identifier("seq"), 1548 exp.to_identifier("key"), 1549 exp.to_identifier("path"), 1550 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1551 value, 1552 exp.to_identifier("this"), 1553 ] 1554 1555 if unnest_alias: 1556 unnest_alias.set("columns", columns) 1557 else: 1558 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1559 1560 table_input = self.sql(expression.expressions[0]) 1561 if not table_input.startswith("INPUT =>"): 1562 table_input = f"INPUT => {table_input}" 1563 1564 expression_parent = expression.parent 1565 1566 explode = ( 1567 f"FLATTEN({table_input})" 1568 if isinstance(expression_parent, exp.Lateral) 1569 else f"TABLE(FLATTEN({table_input}))" 1570 ) 1571 alias = self.sql(unnest_alias) 1572 alias = f" AS {alias}" if alias else "" 1573 value = ( 1574 "" 1575 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1576 else f"{value} FROM " 1577 ) 1578 1579 return f"{value}{explode}{alias}" 1580 1581 def show_sql(self, expression: exp.Show) -> str: 1582 terse = "TERSE " if expression.args.get("terse") else "" 1583 history = " HISTORY" if expression.args.get("history") else "" 1584 like = self.sql(expression, "like") 1585 like = f" LIKE {like}" if like else "" 1586 1587 scope = self.sql(expression, "scope") 1588 scope = f" {scope}" if scope else "" 1589 1590 scope_kind = self.sql(expression, "scope_kind") 1591 if scope_kind: 1592 scope_kind = f" IN {scope_kind}" 1593 1594 starts_with = self.sql(expression, "starts_with") 1595 if starts_with: 1596 starts_with = f" STARTS WITH {starts_with}" 1597 1598 limit = self.sql(expression, "limit") 1599 1600 from_ = self.sql(expression, "from") 1601 if from_: 1602 from_ = f" FROM {from_}" 1603 1604 privileges = self.expressions(expression, key="privileges", flat=True) 1605 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1606 1607 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1608 1609 def describe_sql(self, expression: exp.Describe) -> str: 1610 # Default to table if kind is unknown 1611 kind_value = expression.args.get("kind") or "TABLE" 1612 kind = f" {kind_value}" if kind_value else "" 1613 this = f" {self.sql(expression, 'this')}" 1614 expressions = self.expressions(expression, flat=True) 1615 expressions = f" {expressions}" if expressions else "" 1616 return f"DESCRIBE{kind}{this}{expressions}" 1617 1618 def generatedasidentitycolumnconstraint_sql( 1619 self, expression: exp.GeneratedAsIdentityColumnConstraint 1620 ) -> str: 1621 start = expression.args.get("start") 1622 start = f" START {start}" if start else "" 1623 increment = expression.args.get("increment") 1624 increment = f" INCREMENT {increment}" if increment else "" 1625 1626 order = expression.args.get("order") 1627 if order is not None: 1628 order_clause = " ORDER" if order else " NOORDER" 1629 else: 1630 order_clause = "" 1631 1632 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1633 1634 def cluster_sql(self, expression: exp.Cluster) -> str: 1635 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1636 1637 def struct_sql(self, expression: exp.Struct) -> str: 1638 if len(expression.expressions) == 1: 1639 arg = expression.expressions[0] 1640 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1641 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1642 return f"{{{self.sql(expression.expressions[0])}}}" 1643 1644 keys = [] 1645 values = [] 1646 1647 for i, e in enumerate(expression.expressions): 1648 if isinstance(e, exp.PropertyEQ): 1649 keys.append( 1650 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1651 ) 1652 values.append(e.expression) 1653 else: 1654 keys.append(exp.Literal.string(f"_{i}")) 1655 values.append(e) 1656 1657 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1658 1659 @unsupported_args("weight", "accuracy") 1660 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1661 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1662 1663 def alterset_sql(self, expression: exp.AlterSet) -> str: 1664 exprs = self.expressions(expression, flat=True) 1665 exprs = f" {exprs}" if exprs else "" 1666 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1667 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1668 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1669 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1670 tag = self.expressions(expression, key="tag", flat=True) 1671 tag = f" TAG {tag}" if tag else "" 1672 1673 return f"SET{exprs}{file_format}{copy_options}{tag}" 1674 1675 def strtotime_sql(self, expression: exp.StrToTime): 1676 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1677 return self.func( 1678 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1679 ) 1680 1681 def timestampsub_sql(self, expression: exp.TimestampSub): 1682 return self.sql( 1683 exp.TimestampAdd( 1684 this=expression.this, 1685 expression=expression.expression * -1, 1686 unit=expression.unit, 1687 ) 1688 ) 1689 1690 def jsonextract_sql(self, expression: exp.JSONExtract): 1691 this = expression.this 1692 1693 # JSON strings are valid coming from other dialects such as BQ so 1694 # for these cases we PARSE_JSON preemptively 1695 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1696 "requires_json" 1697 ): 1698 this = exp.ParseJSON(this=this) 1699 1700 return self.func( 1701 "GET_PATH", 1702 this, 1703 expression.expression, 1704 ) 1705 1706 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1707 this = expression.this 1708 if this.is_string: 1709 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1710 1711 return self.func("TO_CHAR", this, self.format_time(expression)) 1712 1713 def datesub_sql(self, expression: exp.DateSub) -> str: 1714 value = expression.expression 1715 if value: 1716 value.replace(value * (-1)) 1717 else: 1718 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1719 1720 return date_delta_sql("DATEADD")(self, expression) 1721 1722 def select_sql(self, expression: exp.Select) -> str: 1723 limit = expression.args.get("limit") 1724 offset = expression.args.get("offset") 1725 if offset and not limit: 1726 expression.limit(exp.Null(), copy=False) 1727 return super().select_sql(expression) 1728 1729 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1730 is_materialized = expression.find(exp.MaterializedProperty) 1731 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1732 1733 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1734 # For materialized views, COPY GRANTS is located *before* the columns list 1735 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1736 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1737 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1738 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1739 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1740 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1741 1742 this_name = self.sql(expression.this, "this") 1743 copy_grants = self.sql(copy_grants_property) 1744 this_schema = self.schema_columns_sql(expression.this) 1745 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1746 1747 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1748 1749 return super().createable_sql(expression, locations) 1750 1751 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1752 this = expression.this 1753 1754 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1755 # and add it later as part of the WITHIN GROUP clause 1756 order = this if isinstance(this, exp.Order) else None 1757 if order: 1758 expression.set("this", order.this.pop()) 1759 1760 expr_sql = super().arrayagg_sql(expression) 1761 1762 if order: 1763 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1764 1765 return expr_sql 1766 1767 def array_sql(self, expression: exp.Array) -> str: 1768 expressions = expression.expressions 1769 1770 first_expr = seq_get(expressions, 0) 1771 if isinstance(first_expr, exp.Select): 1772 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1773 if first_expr.text("kind").upper() == "STRUCT": 1774 object_construct_args = [] 1775 for expr in first_expr.expressions: 1776 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1777 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1778 name = expr.this if isinstance(expr, exp.Alias) else expr 1779 1780 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1781 1782 array_agg = exp.ArrayAgg( 1783 this=_build_object_construct(args=object_construct_args) 1784 ) 1785 1786 first_expr.set("kind", None) 1787 first_expr.set("expressions", [array_agg]) 1788 1789 return self.sql(first_expr.subquery()) 1790 1791 return inline_array_sql(self, expression) 1792 1793 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1794 zone = self.sql(expression, "this") 1795 if not zone: 1796 return super().currentdate_sql(expression) 1797 1798 expr = exp.Cast( 1799 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1800 to=exp.DataType(this=exp.DataType.Type.DATE), 1801 ) 1802 return self.sql(expr) 1803 1804 def dot_sql(self, expression: exp.Dot) -> str: 1805 this = expression.this 1806 1807 if not this.type: 1808 from sqlglot.optimizer.annotate_types import annotate_types 1809 1810 this = annotate_types(this, dialect=self.dialect) 1811 1812 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1813 # Generate colon notation for the top level STRUCT 1814 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1815 1816 return super().dot_sql(expression) 1817 1818 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 1819 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1480 def datatype_sql(self, expression: exp.DataType) -> str: 1481 expressions = expression.expressions 1482 if ( 1483 expressions 1484 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1485 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1486 ): 1487 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1488 return "OBJECT" 1489 1490 return super().datatype_sql(expression)
1501 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1502 milli = expression.args.get("milli") 1503 if milli is not None: 1504 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1505 expression.set("nano", milli_to_nano) 1506 1507 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1509 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1510 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1511 return self.func("TO_GEOGRAPHY", expression.this) 1512 if expression.is_type(exp.DataType.Type.GEOMETRY): 1513 return self.func("TO_GEOMETRY", expression.this) 1514 1515 return super().cast_sql(expression, safe_prefix=safe_prefix)
1517 def trycast_sql(self, expression: exp.TryCast) -> str: 1518 value = expression.this 1519 1520 if value.type is None: 1521 from sqlglot.optimizer.annotate_types import annotate_types 1522 1523 value = annotate_types(value, dialect=self.dialect) 1524 1525 # Snowflake requires that TRY_CAST's value be a string 1526 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1527 # if we can deduce that the value is a string, then we can generate TRY_CAST 1528 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1529 return super().trycast_sql(expression) 1530 1531 return self.cast_sql(expression)
1539 def unnest_sql(self, expression: exp.Unnest) -> str: 1540 unnest_alias = expression.args.get("alias") 1541 offset = expression.args.get("offset") 1542 1543 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1544 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1545 1546 columns = [ 1547 exp.to_identifier("seq"), 1548 exp.to_identifier("key"), 1549 exp.to_identifier("path"), 1550 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1551 value, 1552 exp.to_identifier("this"), 1553 ] 1554 1555 if unnest_alias: 1556 unnest_alias.set("columns", columns) 1557 else: 1558 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1559 1560 table_input = self.sql(expression.expressions[0]) 1561 if not table_input.startswith("INPUT =>"): 1562 table_input = f"INPUT => {table_input}" 1563 1564 expression_parent = expression.parent 1565 1566 explode = ( 1567 f"FLATTEN({table_input})" 1568 if isinstance(expression_parent, exp.Lateral) 1569 else f"TABLE(FLATTEN({table_input}))" 1570 ) 1571 alias = self.sql(unnest_alias) 1572 alias = f" AS {alias}" if alias else "" 1573 value = ( 1574 "" 1575 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1576 else f"{value} FROM " 1577 ) 1578 1579 return f"{value}{explode}{alias}"
1581 def show_sql(self, expression: exp.Show) -> str: 1582 terse = "TERSE " if expression.args.get("terse") else "" 1583 history = " HISTORY" if expression.args.get("history") else "" 1584 like = self.sql(expression, "like") 1585 like = f" LIKE {like}" if like else "" 1586 1587 scope = self.sql(expression, "scope") 1588 scope = f" {scope}" if scope else "" 1589 1590 scope_kind = self.sql(expression, "scope_kind") 1591 if scope_kind: 1592 scope_kind = f" IN {scope_kind}" 1593 1594 starts_with = self.sql(expression, "starts_with") 1595 if starts_with: 1596 starts_with = f" STARTS WITH {starts_with}" 1597 1598 limit = self.sql(expression, "limit") 1599 1600 from_ = self.sql(expression, "from") 1601 if from_: 1602 from_ = f" FROM {from_}" 1603 1604 privileges = self.expressions(expression, key="privileges", flat=True) 1605 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1606 1607 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1609 def describe_sql(self, expression: exp.Describe) -> str: 1610 # Default to table if kind is unknown 1611 kind_value = expression.args.get("kind") or "TABLE" 1612 kind = f" {kind_value}" if kind_value else "" 1613 this = f" {self.sql(expression, 'this')}" 1614 expressions = self.expressions(expression, flat=True) 1615 expressions = f" {expressions}" if expressions else "" 1616 return f"DESCRIBE{kind}{this}{expressions}"
1618 def generatedasidentitycolumnconstraint_sql( 1619 self, expression: exp.GeneratedAsIdentityColumnConstraint 1620 ) -> str: 1621 start = expression.args.get("start") 1622 start = f" START {start}" if start else "" 1623 increment = expression.args.get("increment") 1624 increment = f" INCREMENT {increment}" if increment else "" 1625 1626 order = expression.args.get("order") 1627 if order is not None: 1628 order_clause = " ORDER" if order else " NOORDER" 1629 else: 1630 order_clause = "" 1631 1632 return f"AUTOINCREMENT{start}{increment}{order_clause}"
1637 def struct_sql(self, expression: exp.Struct) -> str: 1638 if len(expression.expressions) == 1: 1639 arg = expression.expressions[0] 1640 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1641 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1642 return f"{{{self.sql(expression.expressions[0])}}}" 1643 1644 keys = [] 1645 values = [] 1646 1647 for i, e in enumerate(expression.expressions): 1648 if isinstance(e, exp.PropertyEQ): 1649 keys.append( 1650 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1651 ) 1652 values.append(e.expression) 1653 else: 1654 keys.append(exp.Literal.string(f"_{i}")) 1655 values.append(e) 1656 1657 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1663 def alterset_sql(self, expression: exp.AlterSet) -> str: 1664 exprs = self.expressions(expression, flat=True) 1665 exprs = f" {exprs}" if exprs else "" 1666 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1667 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1668 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1669 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1670 tag = self.expressions(expression, key="tag", flat=True) 1671 tag = f" TAG {tag}" if tag else "" 1672 1673 return f"SET{exprs}{file_format}{copy_options}{tag}"
1690 def jsonextract_sql(self, expression: exp.JSONExtract): 1691 this = expression.this 1692 1693 # JSON strings are valid coming from other dialects such as BQ so 1694 # for these cases we PARSE_JSON preemptively 1695 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1696 "requires_json" 1697 ): 1698 this = exp.ParseJSON(this=this) 1699 1700 return self.func( 1701 "GET_PATH", 1702 this, 1703 expression.expression, 1704 )
1713 def datesub_sql(self, expression: exp.DateSub) -> str: 1714 value = expression.expression 1715 if value: 1716 value.replace(value * (-1)) 1717 else: 1718 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1719 1720 return date_delta_sql("DATEADD")(self, expression)
1729 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1730 is_materialized = expression.find(exp.MaterializedProperty) 1731 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1732 1733 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1734 # For materialized views, COPY GRANTS is located *before* the columns list 1735 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1736 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1737 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1738 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1739 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1740 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1741 1742 this_name = self.sql(expression.this, "this") 1743 copy_grants = self.sql(copy_grants_property) 1744 this_schema = self.schema_columns_sql(expression.this) 1745 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1746 1747 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1748 1749 return super().createable_sql(expression, locations)
1751 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1752 this = expression.this 1753 1754 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1755 # and add it later as part of the WITHIN GROUP clause 1756 order = this if isinstance(this, exp.Order) else None 1757 if order: 1758 expression.set("this", order.this.pop()) 1759 1760 expr_sql = super().arrayagg_sql(expression) 1761 1762 if order: 1763 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1764 1765 return expr_sql
1767 def array_sql(self, expression: exp.Array) -> str: 1768 expressions = expression.expressions 1769 1770 first_expr = seq_get(expressions, 0) 1771 if isinstance(first_expr, exp.Select): 1772 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1773 if first_expr.text("kind").upper() == "STRUCT": 1774 object_construct_args = [] 1775 for expr in first_expr.expressions: 1776 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1777 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1778 name = expr.this if isinstance(expr, exp.Alias) else expr 1779 1780 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1781 1782 array_agg = exp.ArrayAgg( 1783 this=_build_object_construct(args=object_construct_args) 1784 ) 1785 1786 first_expr.set("kind", None) 1787 first_expr.set("expressions", [array_agg]) 1788 1789 return self.sql(first_expr.subquery()) 1790 1791 return inline_array_sql(self, expression)
1793 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1794 zone = self.sql(expression, "this") 1795 if not zone: 1796 return super().currentdate_sql(expression) 1797 1798 expr = exp.Cast( 1799 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1800 to=exp.DataType(this=exp.DataType.Type.DATE), 1801 ) 1802 return self.sql(expr)
1804 def dot_sql(self, expression: exp.Dot) -> str: 1805 this = expression.this 1806 1807 if not this.type: 1808 from sqlglot.optimizer.annotate_types import annotate_types 1809 1810 this = annotate_types(this, dialect=self.dialect) 1811 1812 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1813 # Generate colon notation for the top level STRUCT 1814 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1815 1816 return super().dot_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- UNICODE_SUBSTITUTE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql