sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 annotate_with_type_lambda, 10 build_timetostr_or_tochar, 11 binary_from_function, 12 build_default_decimal_type, 13 build_replace_with_optional_replacement, 14 build_timestamp_from_parts, 15 date_delta_sql, 16 date_trunc_to_time, 17 datestrtodate_sql, 18 build_formatted_time, 19 if_sql, 20 inline_array_sql, 21 max_or_greatest, 22 min_or_least, 23 rename_func, 24 timestamptrunc_sql, 25 timestrtotime_sql, 26 var_map_sql, 27 map_date_part, 28 no_timestamp_sql, 29 strposition_sql, 30 timestampdiff_sql, 31 no_make_interval_sql, 32 groupconcat_sql, 33) 34from sqlglot.generator import unsupported_args 35from sqlglot.helper import find_new_name, flatten, is_float, is_int, seq_get 36from sqlglot.optimizer.annotate_types import TypeAnnotator 37from sqlglot.optimizer.scope import build_scope, find_all_in_scope 38from sqlglot.tokens import TokenType 39 40if t.TYPE_CHECKING: 41 from sqlglot._typing import E, B 42 43 44# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 45def _build_datetime( 46 name: str, kind: exp.DataType.Type, safe: bool = False 47) -> t.Callable[[t.List], exp.Func]: 48 def _builder(args: t.List) -> exp.Func: 49 value = seq_get(args, 0) 50 scale_or_fmt = seq_get(args, 1) 51 52 int_value = value is not None and is_int(value.name) 53 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 54 55 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 56 # Converts calls like `TO_TIME('01:02:03')` into casts 57 if len(args) == 1 and value.is_string and not int_value: 58 return ( 59 exp.TryCast(this=value, to=exp.DataType.build(kind), requires_string=True) 60 if safe 61 else exp.cast(value, kind) 62 ) 63 64 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 65 # cases so we can transpile them, since they're relatively common 66 if kind == exp.DataType.Type.TIMESTAMP: 67 if not safe and (int_value or int_scale_or_fmt): 68 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 69 # it's not easily transpilable 70 return exp.UnixToTime(this=value, scale=scale_or_fmt) 71 if not int_scale_or_fmt and not is_float(value.name): 72 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 73 expr.set("safe", safe) 74 return expr 75 76 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 77 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 78 formatted_exp = build_formatted_time(klass, "snowflake")(args) 79 formatted_exp.set("safe", safe) 80 return formatted_exp 81 82 return exp.Anonymous(this=name, expressions=args) 83 84 return _builder 85 86 87def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 88 expression = parser.build_var_map(args) 89 90 if isinstance(expression, exp.StarMap): 91 return expression 92 93 return exp.Struct( 94 expressions=[ 95 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 96 ] 97 ) 98 99 100def _build_datediff(args: t.List) -> exp.DateDiff: 101 return exp.DateDiff( 102 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 103 ) 104 105 106def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 107 def _builder(args: t.List) -> E: 108 return expr_type( 109 this=seq_get(args, 2), 110 expression=seq_get(args, 1), 111 unit=map_date_part(seq_get(args, 0)), 112 ) 113 114 return _builder 115 116 117def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 118 def _builder(args: t.List) -> B | exp.Anonymous: 119 if len(args) == 3: 120 return exp.Anonymous(this=name, expressions=args) 121 122 return binary_from_function(expr_type)(args) 123 124 return _builder 125 126 127# https://docs.snowflake.com/en/sql-reference/functions/div0 128def _build_if_from_div0(args: t.List) -> exp.If: 129 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 130 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 131 132 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 133 exp.Is(this=lhs, expression=exp.null()).not_() 134 ) 135 true = exp.Literal.number(0) 136 false = exp.Div(this=lhs, expression=rhs) 137 return exp.If(this=cond, true=true, false=false) 138 139 140# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 141def _build_if_from_zeroifnull(args: t.List) -> exp.If: 142 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 143 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 144 145 146# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 147def _build_if_from_nullifzero(args: t.List) -> exp.If: 148 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 149 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 150 151 152def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 153 flag = expression.text("flag") 154 155 if "i" not in flag: 156 flag += "i" 157 158 return self.func( 159 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 160 ) 161 162 163def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 164 regexp_replace = exp.RegexpReplace.from_arg_list(args) 165 166 if not regexp_replace.args.get("replacement"): 167 regexp_replace.set("replacement", exp.Literal.string("")) 168 169 return regexp_replace 170 171 172def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 173 def _parse(self: Snowflake.Parser) -> exp.Show: 174 return self._parse_show_snowflake(*args, **kwargs) 175 176 return _parse 177 178 179def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 180 trunc = date_trunc_to_time(args) 181 trunc.set("unit", map_date_part(trunc.args["unit"])) 182 return trunc 183 184 185def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 186 """ 187 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 188 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 189 190 Example: 191 >>> from sqlglot import parse_one 192 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 193 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 194 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 195 """ 196 if isinstance(expression, exp.Pivot): 197 if expression.unpivot: 198 expression = transforms.unqualify_columns(expression) 199 else: 200 for field in expression.fields: 201 field_expr = seq_get(field.expressions if field else [], 0) 202 203 if isinstance(field_expr, exp.PivotAny): 204 unqualified_field_expr = transforms.unqualify_columns(field_expr) 205 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 206 207 return expression 208 209 210def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 211 assert isinstance(expression, exp.Create) 212 213 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 214 if expression.this in exp.DataType.NESTED_TYPES: 215 expression.set("expressions", None) 216 return expression 217 218 props = expression.args.get("properties") 219 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 220 for schema_expression in expression.this.expressions: 221 if isinstance(schema_expression, exp.ColumnDef): 222 column_type = schema_expression.kind 223 if isinstance(column_type, exp.DataType): 224 column_type.transform(_flatten_structured_type, copy=False) 225 226 return expression 227 228 229def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 230 generate_date_array = unnest.expressions[0] 231 start = generate_date_array.args.get("start") 232 end = generate_date_array.args.get("end") 233 step = generate_date_array.args.get("step") 234 235 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 236 return 237 238 unit = step.args.get("unit") 239 240 unnest_alias = unnest.args.get("alias") 241 if unnest_alias: 242 unnest_alias = unnest_alias.copy() 243 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 244 else: 245 sequence_value_name = "value" 246 247 # We'll add the next sequence value to the starting date and project the result 248 date_add = _build_date_time_add(exp.DateAdd)( 249 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 250 ) 251 252 # We use DATEDIFF to compute the number of sequence values needed 253 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 254 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 255 ) 256 257 unnest.set("expressions", [number_sequence]) 258 259 unnest_parent = unnest.parent 260 if isinstance(unnest_parent, exp.Join): 261 select = unnest_parent.parent 262 if isinstance(select, exp.Select): 263 replace_column_name = ( 264 sequence_value_name 265 if isinstance(sequence_value_name, str) 266 else sequence_value_name.name 267 ) 268 269 scope = build_scope(select) 270 if scope: 271 for column in scope.columns: 272 if column.name.lower() == replace_column_name.lower(): 273 column.replace( 274 date_add.as_(replace_column_name) 275 if isinstance(column.parent, exp.Select) 276 else date_add 277 ) 278 279 lateral = exp.Lateral(this=unnest_parent.this.pop()) 280 unnest_parent.replace(exp.Join(this=lateral)) 281 else: 282 unnest.replace( 283 exp.select(date_add.as_(sequence_value_name)) 284 .from_(unnest.copy()) 285 .subquery(unnest_alias) 286 ) 287 288 289def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 290 if isinstance(expression, exp.Select): 291 for generate_date_array in expression.find_all(exp.GenerateDateArray): 292 parent = generate_date_array.parent 293 294 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 295 # query is the following (it'll be unnested properly on the next iteration due to copy): 296 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 297 if not isinstance(parent, exp.Unnest): 298 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 299 generate_date_array.replace( 300 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 301 ) 302 303 if ( 304 isinstance(parent, exp.Unnest) 305 and isinstance(parent.parent, (exp.From, exp.Join)) 306 and len(parent.expressions) == 1 307 ): 308 _unnest_generate_date_array(parent) 309 310 return expression 311 312 313def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 314 def _builder(args: t.List) -> E: 315 return expr_type( 316 this=seq_get(args, 0), 317 expression=seq_get(args, 1), 318 position=seq_get(args, 2), 319 occurrence=seq_get(args, 3), 320 parameters=seq_get(args, 4), 321 group=seq_get(args, 5) or exp.Literal.number(0), 322 ) 323 324 return _builder 325 326 327def _build_like(expr_type: t.Type[E]) -> t.Callable[[t.List], E | exp.Escape]: 328 def _builder(args: t.List) -> E | exp.Escape: 329 like_expr = expr_type(this=args[0], expression=args[1]) 330 escape = seq_get(args, 2) 331 return exp.Escape(this=like_expr, expression=escape) if escape else like_expr 332 333 return _builder 334 335 336def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 337 # Other dialects don't support all of the following parameters, so we need to 338 # generate default values as necessary to ensure the transpilation is correct 339 group = expression.args.get("group") 340 341 # To avoid generating all these default values, we set group to None if 342 # it's 0 (also default value) which doesn't trigger the following chain 343 if group and group.name == "0": 344 group = None 345 346 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 347 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 348 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 349 350 return self.func( 351 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 352 expression.this, 353 expression.expression, 354 position, 355 occurrence, 356 parameters, 357 group, 358 ) 359 360 361def _json_extract_value_array_sql( 362 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 363) -> str: 364 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 365 ident = exp.to_identifier("x") 366 367 if isinstance(expression, exp.JSONValueArray): 368 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 369 else: 370 this = exp.ParseJSON(this=f"TO_JSON({ident})") 371 372 transform_lambda = exp.Lambda(expressions=[ident], this=this) 373 374 return self.func("TRANSFORM", json_extract, transform_lambda) 375 376 377def _qualify_unnested_columns(expression: exp.Expression) -> exp.Expression: 378 if isinstance(expression, exp.Select): 379 scope = build_scope(expression) 380 if not scope: 381 return expression 382 383 unnests = list(scope.find_all(exp.Unnest)) 384 385 if not unnests: 386 return expression 387 388 taken_source_names = set(scope.sources) 389 column_source: t.Dict[str, exp.Identifier] = {} 390 unnest_to_identifier: t.Dict[exp.Unnest, exp.Identifier] = {} 391 392 unnest_identifier: t.Optional[exp.Identifier] = None 393 orig_expression = expression.copy() 394 395 for unnest in unnests: 396 if not isinstance(unnest.parent, (exp.From, exp.Join)): 397 continue 398 399 # Try to infer column names produced by an unnest operator. This is only possible 400 # when we can peek into the (statically known) contents of the unnested value. 401 unnest_columns: t.Set[str] = set() 402 for unnest_expr in unnest.expressions: 403 if not isinstance(unnest_expr, exp.Array): 404 continue 405 406 for array_expr in unnest_expr.expressions: 407 if not ( 408 isinstance(array_expr, exp.Struct) 409 and array_expr.expressions 410 and all( 411 isinstance(struct_expr, exp.PropertyEQ) 412 for struct_expr in array_expr.expressions 413 ) 414 ): 415 continue 416 417 unnest_columns.update( 418 struct_expr.this.name.lower() for struct_expr in array_expr.expressions 419 ) 420 break 421 422 if unnest_columns: 423 break 424 425 unnest_alias = unnest.args.get("alias") 426 if not unnest_alias: 427 alias_name = find_new_name(taken_source_names, "value") 428 taken_source_names.add(alias_name) 429 430 # Produce a `TableAlias` AST similar to what is produced for BigQuery. This 431 # will be corrected later, when we generate SQL for the `Unnest` AST node. 432 aliased_unnest = exp.alias_(unnest, None, table=[alias_name]) 433 scope.replace(unnest, aliased_unnest) 434 435 unnest_identifier = aliased_unnest.args["alias"].columns[0] 436 else: 437 alias_columns = getattr(unnest_alias, "columns", []) 438 unnest_identifier = unnest_alias.this or seq_get(alias_columns, 0) 439 440 if not isinstance(unnest_identifier, exp.Identifier): 441 return orig_expression 442 443 unnest_to_identifier[unnest] = unnest_identifier 444 column_source.update({c.lower(): unnest_identifier for c in unnest_columns}) 445 446 for column in scope.columns: 447 if column.table: 448 continue 449 450 table = column_source.get(column.name.lower()) 451 if ( 452 unnest_identifier 453 and not table 454 and len(scope.sources) == 1 455 and column.name.lower() != unnest_identifier.name.lower() 456 ): 457 unnest_ancestor = column.find_ancestor(exp.Unnest, exp.Select) 458 ancestor_identifier = unnest_to_identifier.get(unnest_ancestor) 459 if ( 460 isinstance(unnest_ancestor, exp.Unnest) 461 and ancestor_identifier 462 and ancestor_identifier.name.lower() == unnest_identifier.name.lower() 463 ): 464 continue 465 466 table = unnest_identifier 467 468 column.set("table", table and table.copy()) 469 470 return expression 471 472 473def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 474 if isinstance(expression, exp.Select): 475 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 476 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 477 # by Snowflake's parser. 478 # 479 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 480 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 481 unnest_aliases = set() 482 for unnest in find_all_in_scope(expression, exp.Unnest): 483 unnest_alias = unnest.args.get("alias") 484 if ( 485 isinstance(unnest_alias, exp.TableAlias) 486 and not unnest_alias.this 487 and len(unnest_alias.columns) == 1 488 ): 489 unnest_aliases.add(unnest_alias.columns[0].name) 490 491 if unnest_aliases: 492 for c in find_all_in_scope(expression, exp.Column): 493 if c.table in unnest_aliases: 494 bracket_lhs = c.args["table"] 495 bracket_rhs = exp.Literal.string(c.name) 496 bracket = exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]) 497 498 if c.parent is expression: 499 # Retain column projection names by using aliases 500 c.replace(exp.alias_(bracket, c.this.copy())) 501 else: 502 c.replace(bracket) 503 504 return expression 505 506 507def _annotate_reverse(self: TypeAnnotator, expression: exp.Reverse) -> exp.Reverse: 508 expression = self._annotate_by_args(expression, "this") 509 if expression.is_type(exp.DataType.Type.NULL): 510 # Snowflake treats REVERSE(NULL) as a VARCHAR 511 self._set_type(expression, exp.DataType.Type.VARCHAR) 512 513 return expression 514 515 516class Snowflake(Dialect): 517 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 518 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 519 NULL_ORDERING = "nulls_are_large" 520 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 521 SUPPORTS_USER_DEFINED_TYPES = False 522 SUPPORTS_SEMI_ANTI_JOIN = False 523 PREFER_CTE_ALIAS_COLUMN = True 524 TABLESAMPLE_SIZE_IS_PERCENT = True 525 COPY_PARAMS_ARE_CSV = False 526 ARRAY_AGG_INCLUDES_NULLS = None 527 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 528 TRY_CAST_REQUIRES_STRING = True 529 530 TYPE_TO_EXPRESSIONS = { 531 **Dialect.TYPE_TO_EXPRESSIONS, 532 exp.DataType.Type.INT: { 533 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.INT], 534 exp.Ascii, 535 exp.ByteLength, 536 exp.Length, 537 exp.RtrimmedLength, 538 exp.BitLength, 539 exp.Levenshtein, 540 exp.JarowinklerSimilarity, 541 exp.StrPosition, 542 }, 543 exp.DataType.Type.VARCHAR: { 544 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.VARCHAR], 545 exp.Base64DecodeString, 546 exp.Base64Encode, 547 exp.DecompressString, 548 exp.MD5, 549 exp.AIAgg, 550 exp.AIClassify, 551 exp.AISummarizeAgg, 552 exp.Chr, 553 exp.Collate, 554 exp.Collation, 555 exp.HexDecodeString, 556 exp.HexEncode, 557 exp.Initcap, 558 exp.RegexpExtract, 559 exp.RegexpReplace, 560 exp.Repeat, 561 exp.Replace, 562 exp.SHA, 563 exp.SHA2, 564 exp.Space, 565 exp.Uuid, 566 }, 567 exp.DataType.Type.BINARY: { 568 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BINARY], 569 exp.Base64DecodeBinary, 570 exp.Compress, 571 exp.DecompressBinary, 572 exp.MD5Digest, 573 exp.SHA1Digest, 574 exp.SHA2Digest, 575 exp.Unhex, 576 }, 577 exp.DataType.Type.BIGINT: { 578 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BIGINT], 579 exp.MD5NumberLower64, 580 exp.MD5NumberUpper64, 581 }, 582 exp.DataType.Type.ARRAY: { 583 exp.Split, 584 }, 585 exp.DataType.Type.OBJECT: { 586 exp.ParseUrl, 587 exp.ParseIp, 588 }, 589 } 590 591 ANNOTATORS = { 592 **Dialect.ANNOTATORS, 593 **{ 594 expr_type: annotate_with_type_lambda(data_type) 595 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 596 for expr_type in expressions 597 }, 598 **{ 599 expr_type: lambda self, e: self._annotate_by_args(e, "this") 600 for expr_type in ( 601 exp.Left, 602 exp.Pad, 603 exp.Right, 604 exp.Stuff, 605 exp.Substring, 606 ) 607 }, 608 exp.ConcatWs: lambda self, e: self._annotate_by_args(e, "expressions"), 609 exp.Reverse: _annotate_reverse, 610 } 611 612 TIME_MAPPING = { 613 "YYYY": "%Y", 614 "yyyy": "%Y", 615 "YY": "%y", 616 "yy": "%y", 617 "MMMM": "%B", 618 "mmmm": "%B", 619 "MON": "%b", 620 "mon": "%b", 621 "MM": "%m", 622 "mm": "%m", 623 "DD": "%d", 624 "dd": "%-d", 625 "DY": "%a", 626 "dy": "%w", 627 "HH24": "%H", 628 "hh24": "%H", 629 "HH12": "%I", 630 "hh12": "%I", 631 "MI": "%M", 632 "mi": "%M", 633 "SS": "%S", 634 "ss": "%S", 635 "FF6": "%f", 636 "ff6": "%f", 637 } 638 639 DATE_PART_MAPPING = { 640 **Dialect.DATE_PART_MAPPING, 641 "ISOWEEK": "WEEKISO", 642 } 643 644 def quote_identifier(self, expression: E, identify: bool = True) -> E: 645 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 646 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 647 if ( 648 isinstance(expression, exp.Identifier) 649 and isinstance(expression.parent, exp.Table) 650 and expression.name.lower() == "dual" 651 ): 652 return expression # type: ignore 653 654 return super().quote_identifier(expression, identify=identify) 655 656 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 657 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 658 SINGLE_TOKENS.pop("$") 659 660 class Parser(parser.Parser): 661 IDENTIFY_PIVOT_STRINGS = True 662 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 663 COLON_IS_VARIANT_EXTRACT = True 664 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 665 666 ID_VAR_TOKENS = { 667 *parser.Parser.ID_VAR_TOKENS, 668 TokenType.EXCEPT, 669 TokenType.MATCH_CONDITION, 670 } 671 672 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 673 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 674 675 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 676 677 FUNCTIONS = { 678 **parser.Parser.FUNCTIONS, 679 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 680 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 681 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 682 this=seq_get(args, 1), expression=seq_get(args, 0) 683 ), 684 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 685 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 686 start=seq_get(args, 0), 687 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 688 step=seq_get(args, 2), 689 ), 690 "ARRAY_SORT": exp.SortArray.from_arg_list, 691 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 692 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 693 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 694 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 695 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 696 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 697 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 698 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 699 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 700 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 701 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 702 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 703 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 704 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 705 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 706 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 707 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 708 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 709 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 710 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 711 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 712 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 713 "DATE_TRUNC": _date_trunc_to_time, 714 "DATEADD": _build_date_time_add(exp.DateAdd), 715 "DATEDIFF": _build_datediff, 716 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 717 "DIV0": _build_if_from_div0, 718 "EDITDISTANCE": lambda args: exp.Levenshtein( 719 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 720 ), 721 "FLATTEN": exp.Explode.from_arg_list, 722 "GET": exp.GetExtract.from_arg_list, 723 "GET_PATH": lambda args, dialect: exp.JSONExtract( 724 this=seq_get(args, 0), 725 expression=dialect.to_json_path(seq_get(args, 1)), 726 requires_json=True, 727 ), 728 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 729 "IFF": exp.If.from_arg_list, 730 "MD5_HEX": exp.MD5.from_arg_list, 731 "MD5_BINARY": exp.MD5Digest.from_arg_list, 732 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 733 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 734 "LAST_DAY": lambda args: exp.LastDay( 735 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 736 ), 737 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 738 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 739 "NULLIFZERO": _build_if_from_nullifzero, 740 "OBJECT_CONSTRUCT": _build_object_construct, 741 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 742 "PARSE_URL": lambda args: exp.ParseUrl( 743 this=seq_get(args, 0), permissive=seq_get(args, 1) 744 ), 745 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 746 "REGEXP_REPLACE": _build_regexp_replace, 747 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 748 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 749 "REPLACE": build_replace_with_optional_replacement, 750 "RLIKE": exp.RegexpLike.from_arg_list, 751 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 752 "SHA1_HEX": exp.SHA.from_arg_list, 753 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 754 "SHA2_HEX": exp.SHA2.from_arg_list, 755 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 756 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 757 "TIMEADD": _build_date_time_add(exp.TimeAdd), 758 "TIMEDIFF": _build_datediff, 759 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 760 "TIMESTAMPDIFF": _build_datediff, 761 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 762 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 763 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 764 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 765 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 766 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 767 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 768 "TRY_TO_TIMESTAMP": _build_datetime( 769 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 770 ), 771 "TO_CHAR": build_timetostr_or_tochar, 772 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 773 "TO_NUMBER": lambda args: exp.ToNumber( 774 this=seq_get(args, 0), 775 format=seq_get(args, 1), 776 precision=seq_get(args, 2), 777 scale=seq_get(args, 3), 778 ), 779 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 780 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 781 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 782 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 783 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 784 "TO_VARCHAR": build_timetostr_or_tochar, 785 "TO_JSON": exp.JSONFormat.from_arg_list, 786 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 787 "ZEROIFNULL": _build_if_from_zeroifnull, 788 "LIKE": _build_like(exp.Like), 789 "ILIKE": _build_like(exp.ILike), 790 } 791 FUNCTIONS.pop("PREDICT") 792 793 FUNCTION_PARSERS = { 794 **parser.Parser.FUNCTION_PARSERS, 795 "DATE_PART": lambda self: self._parse_date_part(), 796 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 797 "LISTAGG": lambda self: self._parse_string_agg(), 798 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 799 } 800 FUNCTION_PARSERS.pop("TRIM") 801 802 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 803 804 ALTER_PARSERS = { 805 **parser.Parser.ALTER_PARSERS, 806 "SESSION": lambda self: self._parse_alter_session(), 807 "UNSET": lambda self: self.expression( 808 exp.Set, 809 tag=self._match_text_seq("TAG"), 810 expressions=self._parse_csv(self._parse_id_var), 811 unset=True, 812 ), 813 } 814 815 STATEMENT_PARSERS = { 816 **parser.Parser.STATEMENT_PARSERS, 817 TokenType.GET: lambda self: self._parse_get(), 818 TokenType.PUT: lambda self: self._parse_put(), 819 TokenType.SHOW: lambda self: self._parse_show(), 820 } 821 822 PROPERTY_PARSERS = { 823 **parser.Parser.PROPERTY_PARSERS, 824 "CREDENTIALS": lambda self: self._parse_credentials_property(), 825 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 826 "LOCATION": lambda self: self._parse_location_property(), 827 "TAG": lambda self: self._parse_tag(), 828 "USING": lambda self: self._match_text_seq("TEMPLATE") 829 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 830 } 831 832 TYPE_CONVERTERS = { 833 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 834 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 835 } 836 837 SHOW_PARSERS = { 838 "DATABASES": _show_parser("DATABASES"), 839 "TERSE DATABASES": _show_parser("DATABASES"), 840 "SCHEMAS": _show_parser("SCHEMAS"), 841 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 842 "OBJECTS": _show_parser("OBJECTS"), 843 "TERSE OBJECTS": _show_parser("OBJECTS"), 844 "TABLES": _show_parser("TABLES"), 845 "TERSE TABLES": _show_parser("TABLES"), 846 "VIEWS": _show_parser("VIEWS"), 847 "TERSE VIEWS": _show_parser("VIEWS"), 848 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 849 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 850 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 851 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 852 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 853 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 854 "SEQUENCES": _show_parser("SEQUENCES"), 855 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 856 "STAGES": _show_parser("STAGES"), 857 "COLUMNS": _show_parser("COLUMNS"), 858 "USERS": _show_parser("USERS"), 859 "TERSE USERS": _show_parser("USERS"), 860 "FILE FORMATS": _show_parser("FILE FORMATS"), 861 "FUNCTIONS": _show_parser("FUNCTIONS"), 862 "PROCEDURES": _show_parser("PROCEDURES"), 863 "WAREHOUSES": _show_parser("WAREHOUSES"), 864 } 865 866 CONSTRAINT_PARSERS = { 867 **parser.Parser.CONSTRAINT_PARSERS, 868 "WITH": lambda self: self._parse_with_constraint(), 869 "MASKING": lambda self: self._parse_with_constraint(), 870 "PROJECTION": lambda self: self._parse_with_constraint(), 871 "TAG": lambda self: self._parse_with_constraint(), 872 } 873 874 STAGED_FILE_SINGLE_TOKENS = { 875 TokenType.DOT, 876 TokenType.MOD, 877 TokenType.SLASH, 878 } 879 880 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 881 882 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 883 884 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 885 886 LAMBDAS = { 887 **parser.Parser.LAMBDAS, 888 TokenType.ARROW: lambda self, expressions: self.expression( 889 exp.Lambda, 890 this=self._replace_lambda( 891 self._parse_assignment(), 892 expressions, 893 ), 894 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 895 ), 896 } 897 898 COLUMN_OPERATORS = { 899 **parser.Parser.COLUMN_OPERATORS, 900 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 901 exp.ModelAttribute, this=this, expression=attr 902 ), 903 } 904 905 def _parse_use(self) -> exp.Use: 906 if self._match_text_seq("SECONDARY", "ROLES"): 907 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 908 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 909 return self.expression( 910 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 911 ) 912 913 return super()._parse_use() 914 915 def _negate_range( 916 self, this: t.Optional[exp.Expression] = None 917 ) -> t.Optional[exp.Expression]: 918 if not this: 919 return this 920 921 query = this.args.get("query") 922 if isinstance(this, exp.In) and isinstance(query, exp.Query): 923 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 924 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 925 # which can produce different results (most likely a SnowFlake bug). 926 # 927 # https://docs.snowflake.com/en/sql-reference/functions/in 928 # Context: https://github.com/tobymao/sqlglot/issues/3890 929 return self.expression( 930 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 931 ) 932 933 return self.expression(exp.Not, this=this) 934 935 def _parse_tag(self) -> exp.Tags: 936 return self.expression( 937 exp.Tags, 938 expressions=self._parse_wrapped_csv(self._parse_property), 939 ) 940 941 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 942 if self._prev.token_type != TokenType.WITH: 943 self._retreat(self._index - 1) 944 945 if self._match_text_seq("MASKING", "POLICY"): 946 policy = self._parse_column() 947 return self.expression( 948 exp.MaskingPolicyColumnConstraint, 949 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 950 expressions=self._match(TokenType.USING) 951 and self._parse_wrapped_csv(self._parse_id_var), 952 ) 953 if self._match_text_seq("PROJECTION", "POLICY"): 954 policy = self._parse_column() 955 return self.expression( 956 exp.ProjectionPolicyColumnConstraint, 957 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 958 ) 959 if self._match(TokenType.TAG): 960 return self._parse_tag() 961 962 return None 963 964 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 965 if self._match(TokenType.TAG): 966 return self._parse_tag() 967 968 return super()._parse_with_property() 969 970 def _parse_create(self) -> exp.Create | exp.Command: 971 expression = super()._parse_create() 972 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 973 # Replace the Table node with the enclosed Identifier 974 expression.this.replace(expression.this.this) 975 976 return expression 977 978 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 979 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 980 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 981 this = self._parse_var() or self._parse_type() 982 983 if not this: 984 return None 985 986 self._match(TokenType.COMMA) 987 expression = self._parse_bitwise() 988 this = map_date_part(this) 989 name = this.name.upper() 990 991 if name.startswith("EPOCH"): 992 if name == "EPOCH_MILLISECOND": 993 scale = 10**3 994 elif name == "EPOCH_MICROSECOND": 995 scale = 10**6 996 elif name == "EPOCH_NANOSECOND": 997 scale = 10**9 998 else: 999 scale = None 1000 1001 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 1002 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 1003 1004 if scale: 1005 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 1006 1007 return to_unix 1008 1009 return self.expression(exp.Extract, this=this, expression=expression) 1010 1011 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 1012 if is_map: 1013 # Keys are strings in Snowflake's objects, see also: 1014 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1015 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1016 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1017 1018 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1019 1020 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1021 lateral = super()._parse_lateral() 1022 if not lateral: 1023 return lateral 1024 1025 if isinstance(lateral.this, exp.Explode): 1026 table_alias = lateral.args.get("alias") 1027 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1028 if table_alias and not table_alias.args.get("columns"): 1029 table_alias.set("columns", columns) 1030 elif not table_alias: 1031 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1032 1033 return lateral 1034 1035 def _parse_table_parts( 1036 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1037 ) -> exp.Table: 1038 # https://docs.snowflake.com/en/user-guide/querying-stage 1039 if self._match(TokenType.STRING, advance=False): 1040 table = self._parse_string() 1041 elif self._match_text_seq("@", advance=False): 1042 table = self._parse_location_path() 1043 else: 1044 table = None 1045 1046 if table: 1047 file_format = None 1048 pattern = None 1049 1050 wrapped = self._match(TokenType.L_PAREN) 1051 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1052 if self._match_text_seq("FILE_FORMAT", "=>"): 1053 file_format = self._parse_string() or super()._parse_table_parts( 1054 is_db_reference=is_db_reference 1055 ) 1056 elif self._match_text_seq("PATTERN", "=>"): 1057 pattern = self._parse_string() 1058 else: 1059 break 1060 1061 self._match(TokenType.COMMA) 1062 1063 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1064 else: 1065 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1066 1067 return table 1068 1069 def _parse_table( 1070 self, 1071 schema: bool = False, 1072 joins: bool = False, 1073 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1074 parse_bracket: bool = False, 1075 is_db_reference: bool = False, 1076 parse_partition: bool = False, 1077 consume_pipe: bool = False, 1078 ) -> t.Optional[exp.Expression]: 1079 table = super()._parse_table( 1080 schema=schema, 1081 joins=joins, 1082 alias_tokens=alias_tokens, 1083 parse_bracket=parse_bracket, 1084 is_db_reference=is_db_reference, 1085 parse_partition=parse_partition, 1086 ) 1087 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1088 table_from_rows = table.this 1089 for arg in exp.TableFromRows.arg_types: 1090 if arg != "this": 1091 table_from_rows.set(arg, table.args.get(arg)) 1092 1093 table = table_from_rows 1094 1095 return table 1096 1097 def _parse_id_var( 1098 self, 1099 any_token: bool = True, 1100 tokens: t.Optional[t.Collection[TokenType]] = None, 1101 ) -> t.Optional[exp.Expression]: 1102 if self._match_text_seq("IDENTIFIER", "("): 1103 identifier = ( 1104 super()._parse_id_var(any_token=any_token, tokens=tokens) 1105 or self._parse_string() 1106 ) 1107 self._match_r_paren() 1108 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1109 1110 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1111 1112 def _parse_show_snowflake(self, this: str) -> exp.Show: 1113 scope = None 1114 scope_kind = None 1115 1116 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1117 # which is syntactically valid but has no effect on the output 1118 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1119 1120 history = self._match_text_seq("HISTORY") 1121 1122 like = self._parse_string() if self._match(TokenType.LIKE) else None 1123 1124 if self._match(TokenType.IN): 1125 if self._match_text_seq("ACCOUNT"): 1126 scope_kind = "ACCOUNT" 1127 elif self._match_text_seq("CLASS"): 1128 scope_kind = "CLASS" 1129 scope = self._parse_table_parts() 1130 elif self._match_text_seq("APPLICATION"): 1131 scope_kind = "APPLICATION" 1132 if self._match_text_seq("PACKAGE"): 1133 scope_kind += " PACKAGE" 1134 scope = self._parse_table_parts() 1135 elif self._match_set(self.DB_CREATABLES): 1136 scope_kind = self._prev.text.upper() 1137 if self._curr: 1138 scope = self._parse_table_parts() 1139 elif self._curr: 1140 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1141 scope = self._parse_table_parts() 1142 1143 return self.expression( 1144 exp.Show, 1145 **{ 1146 "terse": terse, 1147 "this": this, 1148 "history": history, 1149 "like": like, 1150 "scope": scope, 1151 "scope_kind": scope_kind, 1152 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1153 "limit": self._parse_limit(), 1154 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1155 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1156 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1157 }, 1158 ) 1159 1160 def _parse_put(self) -> exp.Put | exp.Command: 1161 if self._curr.token_type != TokenType.STRING: 1162 return self._parse_as_command(self._prev) 1163 1164 return self.expression( 1165 exp.Put, 1166 this=self._parse_string(), 1167 target=self._parse_location_path(), 1168 properties=self._parse_properties(), 1169 ) 1170 1171 def _parse_get(self) -> t.Optional[exp.Expression]: 1172 start = self._prev 1173 1174 # If we detect GET( then we need to parse a function, not a statement 1175 if self._match(TokenType.L_PAREN): 1176 self._retreat(self._index - 2) 1177 return self._parse_expression() 1178 1179 target = self._parse_location_path() 1180 1181 # Parse as command if unquoted file path 1182 if self._curr.token_type == TokenType.URI_START: 1183 return self._parse_as_command(start) 1184 1185 return self.expression( 1186 exp.Get, 1187 this=self._parse_string(), 1188 target=target, 1189 properties=self._parse_properties(), 1190 ) 1191 1192 def _parse_location_property(self) -> exp.LocationProperty: 1193 self._match(TokenType.EQ) 1194 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1195 1196 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1197 # Parse either a subquery or a staged file 1198 return ( 1199 self._parse_select(table=True, parse_subquery_alias=False) 1200 if self._match(TokenType.L_PAREN, advance=False) 1201 else self._parse_table_parts() 1202 ) 1203 1204 def _parse_location_path(self) -> exp.Var: 1205 start = self._curr 1206 self._advance_any(ignore_reserved=True) 1207 1208 # We avoid consuming a comma token because external tables like @foo and @bar 1209 # can be joined in a query with a comma separator, as well as closing paren 1210 # in case of subqueries 1211 while self._is_connected() and not self._match_set( 1212 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1213 ): 1214 self._advance_any(ignore_reserved=True) 1215 1216 return exp.var(self._find_sql(start, self._prev)) 1217 1218 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1219 this = super()._parse_lambda_arg() 1220 1221 if not this: 1222 return this 1223 1224 typ = self._parse_types() 1225 1226 if typ: 1227 return self.expression(exp.Cast, this=this, to=typ) 1228 1229 return this 1230 1231 def _parse_foreign_key(self) -> exp.ForeignKey: 1232 # inlineFK, the REFERENCES columns are implied 1233 if self._match(TokenType.REFERENCES, advance=False): 1234 return self.expression(exp.ForeignKey) 1235 1236 # outoflineFK, explicitly names the columns 1237 return super()._parse_foreign_key() 1238 1239 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1240 self._match(TokenType.EQ) 1241 if self._match(TokenType.L_PAREN, advance=False): 1242 expressions = self._parse_wrapped_options() 1243 else: 1244 expressions = [self._parse_format_name()] 1245 1246 return self.expression( 1247 exp.FileFormatProperty, 1248 expressions=expressions, 1249 ) 1250 1251 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1252 return self.expression( 1253 exp.CredentialsProperty, 1254 expressions=self._parse_wrapped_options(), 1255 ) 1256 1257 def _parse_semantic_view(self) -> exp.SemanticView: 1258 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1259 1260 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1261 if self._match_text_seq("DIMENSIONS"): 1262 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1263 if self._match_text_seq("METRICS"): 1264 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1265 if self._match_text_seq("WHERE"): 1266 kwargs["where"] = self._parse_expression() 1267 1268 return self.expression(exp.SemanticView, **kwargs) 1269 1270 class Tokenizer(tokens.Tokenizer): 1271 STRING_ESCAPES = ["\\", "'"] 1272 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1273 RAW_STRINGS = ["$$"] 1274 COMMENTS = ["--", "//", ("/*", "*/")] 1275 NESTED_COMMENTS = False 1276 1277 KEYWORDS = { 1278 **tokens.Tokenizer.KEYWORDS, 1279 "BYTEINT": TokenType.INT, 1280 "FILE://": TokenType.URI_START, 1281 "FILE FORMAT": TokenType.FILE_FORMAT, 1282 "GET": TokenType.GET, 1283 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1284 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1285 "MINUS": TokenType.EXCEPT, 1286 "NCHAR VARYING": TokenType.VARCHAR, 1287 "PUT": TokenType.PUT, 1288 "REMOVE": TokenType.COMMAND, 1289 "RM": TokenType.COMMAND, 1290 "SAMPLE": TokenType.TABLE_SAMPLE, 1291 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1292 "SQL_DOUBLE": TokenType.DOUBLE, 1293 "SQL_VARCHAR": TokenType.VARCHAR, 1294 "STAGE": TokenType.STAGE, 1295 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1296 "STREAMLIT": TokenType.STREAMLIT, 1297 "TAG": TokenType.TAG, 1298 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1299 "TOP": TokenType.TOP, 1300 "WAREHOUSE": TokenType.WAREHOUSE, 1301 } 1302 KEYWORDS.pop("/*+") 1303 1304 SINGLE_TOKENS = { 1305 **tokens.Tokenizer.SINGLE_TOKENS, 1306 "$": TokenType.PARAMETER, 1307 "!": TokenType.EXCLAMATION, 1308 } 1309 1310 VAR_SINGLE_TOKENS = {"$"} 1311 1312 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1313 1314 class Generator(generator.Generator): 1315 PARAMETER_TOKEN = "$" 1316 MATCHED_BY_SOURCE = False 1317 SINGLE_STRING_INTERVAL = True 1318 JOIN_HINTS = False 1319 TABLE_HINTS = False 1320 QUERY_HINTS = False 1321 AGGREGATE_FILTER_SUPPORTED = False 1322 SUPPORTS_TABLE_COPY = False 1323 COLLATE_IS_FUNC = True 1324 LIMIT_ONLY_LITERALS = True 1325 JSON_KEY_VALUE_PAIR_SEP = "," 1326 INSERT_OVERWRITE = " OVERWRITE INTO" 1327 STRUCT_DELIMITER = ("(", ")") 1328 COPY_PARAMS_ARE_WRAPPED = False 1329 COPY_PARAMS_EQ_REQUIRED = True 1330 STAR_EXCEPT = "EXCLUDE" 1331 SUPPORTS_EXPLODING_PROJECTIONS = False 1332 ARRAY_CONCAT_IS_VAR_LEN = False 1333 SUPPORTS_CONVERT_TIMEZONE = True 1334 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1335 SUPPORTS_MEDIAN = True 1336 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1337 SUPPORTS_DECODE_CASE = True 1338 IS_BOOL_ALLOWED = False 1339 1340 TRANSFORMS = { 1341 **generator.Generator.TRANSFORMS, 1342 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1343 exp.ArgMax: rename_func("MAX_BY"), 1344 exp.ArgMin: rename_func("MIN_BY"), 1345 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1346 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1347 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1348 exp.AtTimeZone: lambda self, e: self.func( 1349 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1350 ), 1351 exp.BitwiseOr: rename_func("BITOR"), 1352 exp.BitwiseXor: rename_func("BITXOR"), 1353 exp.BitwiseAnd: rename_func("BITAND"), 1354 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1355 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1356 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1357 exp.BitwiseNot: rename_func("BITNOT"), 1358 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1359 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1360 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1361 exp.DateAdd: date_delta_sql("DATEADD"), 1362 exp.DateDiff: date_delta_sql("DATEDIFF"), 1363 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1364 exp.DatetimeDiff: timestampdiff_sql, 1365 exp.DateStrToDate: datestrtodate_sql, 1366 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1367 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1368 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1369 exp.DayOfYear: rename_func("DAYOFYEAR"), 1370 exp.Explode: rename_func("FLATTEN"), 1371 exp.Extract: lambda self, e: self.func( 1372 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1373 ), 1374 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1375 exp.FileFormatProperty: lambda self, 1376 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1377 exp.FromTimeZone: lambda self, e: self.func( 1378 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1379 ), 1380 exp.GenerateSeries: lambda self, e: self.func( 1381 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1382 ), 1383 exp.GetExtract: rename_func("GET"), 1384 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1385 exp.If: if_sql(name="IFF", false_value="NULL"), 1386 exp.JSONExtractArray: _json_extract_value_array_sql, 1387 exp.JSONExtractScalar: lambda self, e: self.func( 1388 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1389 ), 1390 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1391 exp.JSONPathRoot: lambda *_: "", 1392 exp.JSONValueArray: _json_extract_value_array_sql, 1393 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1394 rename_func("EDITDISTANCE") 1395 ), 1396 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1397 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1398 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1399 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1400 exp.MakeInterval: no_make_interval_sql, 1401 exp.Max: max_or_greatest, 1402 exp.Min: min_or_least, 1403 exp.ParseJSON: lambda self, e: self.func( 1404 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1405 ), 1406 exp.JSONFormat: rename_func("TO_JSON"), 1407 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1408 exp.PercentileCont: transforms.preprocess( 1409 [transforms.add_within_group_for_percentiles] 1410 ), 1411 exp.PercentileDisc: transforms.preprocess( 1412 [transforms.add_within_group_for_percentiles] 1413 ), 1414 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1415 exp.RegexpExtract: _regexpextract_sql, 1416 exp.RegexpExtractAll: _regexpextract_sql, 1417 exp.RegexpILike: _regexpilike_sql, 1418 exp.Rand: rename_func("RANDOM"), 1419 exp.Select: transforms.preprocess( 1420 [ 1421 transforms.eliminate_window_clause, 1422 transforms.eliminate_distinct_on, 1423 transforms.explode_projection_to_unnest(), 1424 transforms.eliminate_semi_and_anti_joins, 1425 _transform_generate_date_array, 1426 _qualify_unnested_columns, 1427 _eliminate_dot_variant_lookup, 1428 ] 1429 ), 1430 exp.SHA: rename_func("SHA1"), 1431 exp.MD5Digest: rename_func("MD5_BINARY"), 1432 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1433 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1434 exp.LowerHex: rename_func("TO_CHAR"), 1435 exp.SortArray: rename_func("ARRAY_SORT"), 1436 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1437 exp.StartsWith: rename_func("STARTSWITH"), 1438 exp.EndsWith: rename_func("ENDSWITH"), 1439 exp.StrPosition: lambda self, e: strposition_sql( 1440 self, e, func_name="CHARINDEX", supports_position=True 1441 ), 1442 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1443 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1444 exp.Stuff: rename_func("INSERT"), 1445 exp.StPoint: rename_func("ST_MAKEPOINT"), 1446 exp.TimeAdd: date_delta_sql("TIMEADD"), 1447 exp.Timestamp: no_timestamp_sql, 1448 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1449 exp.TimestampDiff: lambda self, e: self.func( 1450 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1451 ), 1452 exp.TimestampTrunc: timestamptrunc_sql(), 1453 exp.TimeStrToTime: timestrtotime_sql, 1454 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1455 exp.ToArray: rename_func("TO_ARRAY"), 1456 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1457 exp.ToDouble: rename_func("TO_DOUBLE"), 1458 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1459 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1460 exp.TsOrDsToDate: lambda self, e: self.func( 1461 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1462 ), 1463 exp.TsOrDsToTime: lambda self, e: self.func( 1464 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1465 ), 1466 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1467 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1468 exp.Uuid: rename_func("UUID_STRING"), 1469 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1470 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1471 exp.Xor: rename_func("BOOLXOR"), 1472 exp.ByteLength: rename_func("OCTET_LENGTH"), 1473 } 1474 1475 SUPPORTED_JSON_PATH_PARTS = { 1476 exp.JSONPathKey, 1477 exp.JSONPathRoot, 1478 exp.JSONPathSubscript, 1479 } 1480 1481 TYPE_MAPPING = { 1482 **generator.Generator.TYPE_MAPPING, 1483 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1484 exp.DataType.Type.NESTED: "OBJECT", 1485 exp.DataType.Type.STRUCT: "OBJECT", 1486 exp.DataType.Type.TEXT: "VARCHAR", 1487 } 1488 1489 TOKEN_MAPPING = { 1490 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1491 } 1492 1493 PROPERTIES_LOCATION = { 1494 **generator.Generator.PROPERTIES_LOCATION, 1495 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1496 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1497 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1498 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1499 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1500 } 1501 1502 UNSUPPORTED_VALUES_EXPRESSIONS = { 1503 exp.Map, 1504 exp.StarMap, 1505 exp.Struct, 1506 exp.VarMap, 1507 } 1508 1509 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1510 1511 def with_properties(self, properties: exp.Properties) -> str: 1512 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1513 1514 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1515 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1516 values_as_table = False 1517 1518 return super().values_sql(expression, values_as_table=values_as_table) 1519 1520 def datatype_sql(self, expression: exp.DataType) -> str: 1521 expressions = expression.expressions 1522 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1523 for field_type in expressions: 1524 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1525 if isinstance(field_type, exp.DataType): 1526 return "OBJECT" 1527 if ( 1528 isinstance(field_type, exp.ColumnDef) 1529 and field_type.this 1530 and field_type.this.is_string 1531 ): 1532 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1533 # converting 'foo' into an identifier, we also need to quote it because these 1534 # keys are case-sensitive. For example: 1535 # 1536 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1537 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1538 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1539 1540 return super().datatype_sql(expression) 1541 1542 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1543 return self.func( 1544 "TO_NUMBER", 1545 expression.this, 1546 expression.args.get("format"), 1547 expression.args.get("precision"), 1548 expression.args.get("scale"), 1549 ) 1550 1551 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1552 milli = expression.args.get("milli") 1553 if milli is not None: 1554 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1555 expression.set("nano", milli_to_nano) 1556 1557 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1558 1559 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1560 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1561 return self.func("TO_GEOGRAPHY", expression.this) 1562 if expression.is_type(exp.DataType.Type.GEOMETRY): 1563 return self.func("TO_GEOMETRY", expression.this) 1564 1565 return super().cast_sql(expression, safe_prefix=safe_prefix) 1566 1567 def trycast_sql(self, expression: exp.TryCast) -> str: 1568 value = expression.this 1569 1570 if value.type is None: 1571 from sqlglot.optimizer.annotate_types import annotate_types 1572 1573 value = annotate_types(value, dialect=self.dialect) 1574 1575 # Snowflake requires that TRY_CAST's value be a string 1576 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1577 # if we can deduce that the value is a string, then we can generate TRY_CAST 1578 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1579 return super().trycast_sql(expression) 1580 1581 return self.cast_sql(expression) 1582 1583 def log_sql(self, expression: exp.Log) -> str: 1584 if not expression.expression: 1585 return self.func("LN", expression.this) 1586 1587 return super().log_sql(expression) 1588 1589 def unnest_sql(self, expression: exp.Unnest) -> str: 1590 unnest_alias = expression.args.get("alias") 1591 offset = expression.args.get("offset") 1592 1593 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1594 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1595 1596 columns = [ 1597 exp.to_identifier("seq"), 1598 exp.to_identifier("key"), 1599 exp.to_identifier("path"), 1600 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1601 value, 1602 exp.to_identifier("this"), 1603 ] 1604 1605 if unnest_alias: 1606 unnest_alias.set("columns", columns) 1607 else: 1608 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1609 1610 table_input = self.sql(expression.expressions[0]) 1611 if not table_input.startswith("INPUT =>"): 1612 table_input = f"INPUT => {table_input}" 1613 1614 expression_parent = expression.parent 1615 1616 explode = ( 1617 f"FLATTEN({table_input})" 1618 if isinstance(expression_parent, exp.Lateral) 1619 else f"TABLE(FLATTEN({table_input}))" 1620 ) 1621 alias = self.sql(unnest_alias) 1622 alias = f" AS {alias}" if alias else "" 1623 value = ( 1624 "" 1625 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1626 else f"{value} FROM " 1627 ) 1628 1629 return f"{value}{explode}{alias}" 1630 1631 def show_sql(self, expression: exp.Show) -> str: 1632 terse = "TERSE " if expression.args.get("terse") else "" 1633 history = " HISTORY" if expression.args.get("history") else "" 1634 like = self.sql(expression, "like") 1635 like = f" LIKE {like}" if like else "" 1636 1637 scope = self.sql(expression, "scope") 1638 scope = f" {scope}" if scope else "" 1639 1640 scope_kind = self.sql(expression, "scope_kind") 1641 if scope_kind: 1642 scope_kind = f" IN {scope_kind}" 1643 1644 starts_with = self.sql(expression, "starts_with") 1645 if starts_with: 1646 starts_with = f" STARTS WITH {starts_with}" 1647 1648 limit = self.sql(expression, "limit") 1649 1650 from_ = self.sql(expression, "from") 1651 if from_: 1652 from_ = f" FROM {from_}" 1653 1654 privileges = self.expressions(expression, key="privileges", flat=True) 1655 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1656 1657 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1658 1659 def describe_sql(self, expression: exp.Describe) -> str: 1660 # Default to table if kind is unknown 1661 kind_value = expression.args.get("kind") or "TABLE" 1662 kind = f" {kind_value}" if kind_value else "" 1663 this = f" {self.sql(expression, 'this')}" 1664 expressions = self.expressions(expression, flat=True) 1665 expressions = f" {expressions}" if expressions else "" 1666 return f"DESCRIBE{kind}{this}{expressions}" 1667 1668 def generatedasidentitycolumnconstraint_sql( 1669 self, expression: exp.GeneratedAsIdentityColumnConstraint 1670 ) -> str: 1671 start = expression.args.get("start") 1672 start = f" START {start}" if start else "" 1673 increment = expression.args.get("increment") 1674 increment = f" INCREMENT {increment}" if increment else "" 1675 1676 order = expression.args.get("order") 1677 if order is not None: 1678 order_clause = " ORDER" if order else " NOORDER" 1679 else: 1680 order_clause = "" 1681 1682 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1683 1684 def cluster_sql(self, expression: exp.Cluster) -> str: 1685 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1686 1687 def struct_sql(self, expression: exp.Struct) -> str: 1688 if len(expression.expressions) == 1: 1689 arg = expression.expressions[0] 1690 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1691 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1692 return f"{{{self.sql(expression.expressions[0])}}}" 1693 1694 keys = [] 1695 values = [] 1696 1697 for i, e in enumerate(expression.expressions): 1698 if isinstance(e, exp.PropertyEQ): 1699 keys.append( 1700 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1701 ) 1702 values.append(e.expression) 1703 else: 1704 keys.append(exp.Literal.string(f"_{i}")) 1705 values.append(e) 1706 1707 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1708 1709 @unsupported_args("weight", "accuracy") 1710 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1711 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1712 1713 def alterset_sql(self, expression: exp.AlterSet) -> str: 1714 exprs = self.expressions(expression, flat=True) 1715 exprs = f" {exprs}" if exprs else "" 1716 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1717 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1718 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1719 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1720 tag = self.expressions(expression, key="tag", flat=True) 1721 tag = f" TAG {tag}" if tag else "" 1722 1723 return f"SET{exprs}{file_format}{copy_options}{tag}" 1724 1725 def strtotime_sql(self, expression: exp.StrToTime): 1726 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1727 return self.func( 1728 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1729 ) 1730 1731 def timestampsub_sql(self, expression: exp.TimestampSub): 1732 return self.sql( 1733 exp.TimestampAdd( 1734 this=expression.this, 1735 expression=expression.expression * -1, 1736 unit=expression.unit, 1737 ) 1738 ) 1739 1740 def jsonextract_sql(self, expression: exp.JSONExtract): 1741 this = expression.this 1742 1743 # JSON strings are valid coming from other dialects such as BQ so 1744 # for these cases we PARSE_JSON preemptively 1745 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1746 "requires_json" 1747 ): 1748 this = exp.ParseJSON(this=this) 1749 1750 return self.func( 1751 "GET_PATH", 1752 this, 1753 expression.expression, 1754 ) 1755 1756 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1757 this = expression.this 1758 if this.is_string: 1759 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1760 1761 return self.func("TO_CHAR", this, self.format_time(expression)) 1762 1763 def datesub_sql(self, expression: exp.DateSub) -> str: 1764 value = expression.expression 1765 if value: 1766 value.replace(value * (-1)) 1767 else: 1768 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1769 1770 return date_delta_sql("DATEADD")(self, expression) 1771 1772 def select_sql(self, expression: exp.Select) -> str: 1773 limit = expression.args.get("limit") 1774 offset = expression.args.get("offset") 1775 if offset and not limit: 1776 expression.limit(exp.Null(), copy=False) 1777 return super().select_sql(expression) 1778 1779 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1780 is_materialized = expression.find(exp.MaterializedProperty) 1781 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1782 1783 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1784 # For materialized views, COPY GRANTS is located *before* the columns list 1785 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1786 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1787 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1788 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1789 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1790 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1791 1792 this_name = self.sql(expression.this, "this") 1793 copy_grants = self.sql(copy_grants_property) 1794 this_schema = self.schema_columns_sql(expression.this) 1795 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1796 1797 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1798 1799 return super().createable_sql(expression, locations) 1800 1801 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1802 this = expression.this 1803 1804 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1805 # and add it later as part of the WITHIN GROUP clause 1806 order = this if isinstance(this, exp.Order) else None 1807 if order: 1808 expression.set("this", order.this.pop()) 1809 1810 expr_sql = super().arrayagg_sql(expression) 1811 1812 if order: 1813 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1814 1815 return expr_sql 1816 1817 def array_sql(self, expression: exp.Array) -> str: 1818 expressions = expression.expressions 1819 1820 first_expr = seq_get(expressions, 0) 1821 if isinstance(first_expr, exp.Select): 1822 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1823 if first_expr.text("kind").upper() == "STRUCT": 1824 object_construct_args = [] 1825 for expr in first_expr.expressions: 1826 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1827 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1828 name = expr.this if isinstance(expr, exp.Alias) else expr 1829 1830 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1831 1832 array_agg = exp.ArrayAgg( 1833 this=_build_object_construct(args=object_construct_args) 1834 ) 1835 1836 first_expr.set("kind", None) 1837 first_expr.set("expressions", [array_agg]) 1838 1839 return self.sql(first_expr.subquery()) 1840 1841 return inline_array_sql(self, expression) 1842 1843 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1844 zone = self.sql(expression, "this") 1845 if not zone: 1846 return super().currentdate_sql(expression) 1847 1848 expr = exp.Cast( 1849 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1850 to=exp.DataType(this=exp.DataType.Type.DATE), 1851 ) 1852 return self.sql(expr) 1853 1854 def dot_sql(self, expression: exp.Dot) -> str: 1855 this = expression.this 1856 1857 if not this.type: 1858 from sqlglot.optimizer.annotate_types import annotate_types 1859 1860 this = annotate_types(this, dialect=self.dialect) 1861 1862 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1863 # Generate colon notation for the top level STRUCT 1864 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1865 1866 return super().dot_sql(expression) 1867 1868 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 1869 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 1870 1871 def format_sql(self, expression: exp.Format) -> str: 1872 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 1873 return self.func("TO_CHAR", expression.expressions[0]) 1874 1875 return self.function_fallback_sql(expression)
517class Snowflake(Dialect): 518 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 519 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 520 NULL_ORDERING = "nulls_are_large" 521 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 522 SUPPORTS_USER_DEFINED_TYPES = False 523 SUPPORTS_SEMI_ANTI_JOIN = False 524 PREFER_CTE_ALIAS_COLUMN = True 525 TABLESAMPLE_SIZE_IS_PERCENT = True 526 COPY_PARAMS_ARE_CSV = False 527 ARRAY_AGG_INCLUDES_NULLS = None 528 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 529 TRY_CAST_REQUIRES_STRING = True 530 531 TYPE_TO_EXPRESSIONS = { 532 **Dialect.TYPE_TO_EXPRESSIONS, 533 exp.DataType.Type.INT: { 534 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.INT], 535 exp.Ascii, 536 exp.ByteLength, 537 exp.Length, 538 exp.RtrimmedLength, 539 exp.BitLength, 540 exp.Levenshtein, 541 exp.JarowinklerSimilarity, 542 exp.StrPosition, 543 }, 544 exp.DataType.Type.VARCHAR: { 545 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.VARCHAR], 546 exp.Base64DecodeString, 547 exp.Base64Encode, 548 exp.DecompressString, 549 exp.MD5, 550 exp.AIAgg, 551 exp.AIClassify, 552 exp.AISummarizeAgg, 553 exp.Chr, 554 exp.Collate, 555 exp.Collation, 556 exp.HexDecodeString, 557 exp.HexEncode, 558 exp.Initcap, 559 exp.RegexpExtract, 560 exp.RegexpReplace, 561 exp.Repeat, 562 exp.Replace, 563 exp.SHA, 564 exp.SHA2, 565 exp.Space, 566 exp.Uuid, 567 }, 568 exp.DataType.Type.BINARY: { 569 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BINARY], 570 exp.Base64DecodeBinary, 571 exp.Compress, 572 exp.DecompressBinary, 573 exp.MD5Digest, 574 exp.SHA1Digest, 575 exp.SHA2Digest, 576 exp.Unhex, 577 }, 578 exp.DataType.Type.BIGINT: { 579 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BIGINT], 580 exp.MD5NumberLower64, 581 exp.MD5NumberUpper64, 582 }, 583 exp.DataType.Type.ARRAY: { 584 exp.Split, 585 }, 586 exp.DataType.Type.OBJECT: { 587 exp.ParseUrl, 588 exp.ParseIp, 589 }, 590 } 591 592 ANNOTATORS = { 593 **Dialect.ANNOTATORS, 594 **{ 595 expr_type: annotate_with_type_lambda(data_type) 596 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 597 for expr_type in expressions 598 }, 599 **{ 600 expr_type: lambda self, e: self._annotate_by_args(e, "this") 601 for expr_type in ( 602 exp.Left, 603 exp.Pad, 604 exp.Right, 605 exp.Stuff, 606 exp.Substring, 607 ) 608 }, 609 exp.ConcatWs: lambda self, e: self._annotate_by_args(e, "expressions"), 610 exp.Reverse: _annotate_reverse, 611 } 612 613 TIME_MAPPING = { 614 "YYYY": "%Y", 615 "yyyy": "%Y", 616 "YY": "%y", 617 "yy": "%y", 618 "MMMM": "%B", 619 "mmmm": "%B", 620 "MON": "%b", 621 "mon": "%b", 622 "MM": "%m", 623 "mm": "%m", 624 "DD": "%d", 625 "dd": "%-d", 626 "DY": "%a", 627 "dy": "%w", 628 "HH24": "%H", 629 "hh24": "%H", 630 "HH12": "%I", 631 "hh12": "%I", 632 "MI": "%M", 633 "mi": "%M", 634 "SS": "%S", 635 "ss": "%S", 636 "FF6": "%f", 637 "ff6": "%f", 638 } 639 640 DATE_PART_MAPPING = { 641 **Dialect.DATE_PART_MAPPING, 642 "ISOWEEK": "WEEKISO", 643 } 644 645 def quote_identifier(self, expression: E, identify: bool = True) -> E: 646 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 647 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 648 if ( 649 isinstance(expression, exp.Identifier) 650 and isinstance(expression.parent, exp.Table) 651 and expression.name.lower() == "dual" 652 ): 653 return expression # type: ignore 654 655 return super().quote_identifier(expression, identify=identify) 656 657 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 658 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 659 SINGLE_TOKENS.pop("$") 660 661 class Parser(parser.Parser): 662 IDENTIFY_PIVOT_STRINGS = True 663 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 664 COLON_IS_VARIANT_EXTRACT = True 665 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 666 667 ID_VAR_TOKENS = { 668 *parser.Parser.ID_VAR_TOKENS, 669 TokenType.EXCEPT, 670 TokenType.MATCH_CONDITION, 671 } 672 673 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 674 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 675 676 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 677 678 FUNCTIONS = { 679 **parser.Parser.FUNCTIONS, 680 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 681 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 682 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 683 this=seq_get(args, 1), expression=seq_get(args, 0) 684 ), 685 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 686 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 687 start=seq_get(args, 0), 688 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 689 step=seq_get(args, 2), 690 ), 691 "ARRAY_SORT": exp.SortArray.from_arg_list, 692 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 693 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 694 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 695 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 696 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 697 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 698 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 699 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 700 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 701 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 702 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 703 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 704 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 705 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 706 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 707 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 708 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 709 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 710 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 711 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 712 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 713 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 714 "DATE_TRUNC": _date_trunc_to_time, 715 "DATEADD": _build_date_time_add(exp.DateAdd), 716 "DATEDIFF": _build_datediff, 717 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 718 "DIV0": _build_if_from_div0, 719 "EDITDISTANCE": lambda args: exp.Levenshtein( 720 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 721 ), 722 "FLATTEN": exp.Explode.from_arg_list, 723 "GET": exp.GetExtract.from_arg_list, 724 "GET_PATH": lambda args, dialect: exp.JSONExtract( 725 this=seq_get(args, 0), 726 expression=dialect.to_json_path(seq_get(args, 1)), 727 requires_json=True, 728 ), 729 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 730 "IFF": exp.If.from_arg_list, 731 "MD5_HEX": exp.MD5.from_arg_list, 732 "MD5_BINARY": exp.MD5Digest.from_arg_list, 733 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 734 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 735 "LAST_DAY": lambda args: exp.LastDay( 736 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 737 ), 738 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 739 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 740 "NULLIFZERO": _build_if_from_nullifzero, 741 "OBJECT_CONSTRUCT": _build_object_construct, 742 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 743 "PARSE_URL": lambda args: exp.ParseUrl( 744 this=seq_get(args, 0), permissive=seq_get(args, 1) 745 ), 746 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 747 "REGEXP_REPLACE": _build_regexp_replace, 748 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 749 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 750 "REPLACE": build_replace_with_optional_replacement, 751 "RLIKE": exp.RegexpLike.from_arg_list, 752 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 753 "SHA1_HEX": exp.SHA.from_arg_list, 754 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 755 "SHA2_HEX": exp.SHA2.from_arg_list, 756 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 757 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 758 "TIMEADD": _build_date_time_add(exp.TimeAdd), 759 "TIMEDIFF": _build_datediff, 760 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 761 "TIMESTAMPDIFF": _build_datediff, 762 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 763 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 764 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 765 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 766 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 767 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 768 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 769 "TRY_TO_TIMESTAMP": _build_datetime( 770 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 771 ), 772 "TO_CHAR": build_timetostr_or_tochar, 773 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 774 "TO_NUMBER": lambda args: exp.ToNumber( 775 this=seq_get(args, 0), 776 format=seq_get(args, 1), 777 precision=seq_get(args, 2), 778 scale=seq_get(args, 3), 779 ), 780 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 781 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 782 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 783 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 784 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 785 "TO_VARCHAR": build_timetostr_or_tochar, 786 "TO_JSON": exp.JSONFormat.from_arg_list, 787 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 788 "ZEROIFNULL": _build_if_from_zeroifnull, 789 "LIKE": _build_like(exp.Like), 790 "ILIKE": _build_like(exp.ILike), 791 } 792 FUNCTIONS.pop("PREDICT") 793 794 FUNCTION_PARSERS = { 795 **parser.Parser.FUNCTION_PARSERS, 796 "DATE_PART": lambda self: self._parse_date_part(), 797 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 798 "LISTAGG": lambda self: self._parse_string_agg(), 799 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 800 } 801 FUNCTION_PARSERS.pop("TRIM") 802 803 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 804 805 ALTER_PARSERS = { 806 **parser.Parser.ALTER_PARSERS, 807 "SESSION": lambda self: self._parse_alter_session(), 808 "UNSET": lambda self: self.expression( 809 exp.Set, 810 tag=self._match_text_seq("TAG"), 811 expressions=self._parse_csv(self._parse_id_var), 812 unset=True, 813 ), 814 } 815 816 STATEMENT_PARSERS = { 817 **parser.Parser.STATEMENT_PARSERS, 818 TokenType.GET: lambda self: self._parse_get(), 819 TokenType.PUT: lambda self: self._parse_put(), 820 TokenType.SHOW: lambda self: self._parse_show(), 821 } 822 823 PROPERTY_PARSERS = { 824 **parser.Parser.PROPERTY_PARSERS, 825 "CREDENTIALS": lambda self: self._parse_credentials_property(), 826 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 827 "LOCATION": lambda self: self._parse_location_property(), 828 "TAG": lambda self: self._parse_tag(), 829 "USING": lambda self: self._match_text_seq("TEMPLATE") 830 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 831 } 832 833 TYPE_CONVERTERS = { 834 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 835 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 836 } 837 838 SHOW_PARSERS = { 839 "DATABASES": _show_parser("DATABASES"), 840 "TERSE DATABASES": _show_parser("DATABASES"), 841 "SCHEMAS": _show_parser("SCHEMAS"), 842 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 843 "OBJECTS": _show_parser("OBJECTS"), 844 "TERSE OBJECTS": _show_parser("OBJECTS"), 845 "TABLES": _show_parser("TABLES"), 846 "TERSE TABLES": _show_parser("TABLES"), 847 "VIEWS": _show_parser("VIEWS"), 848 "TERSE VIEWS": _show_parser("VIEWS"), 849 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 850 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 851 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 852 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 853 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 854 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 855 "SEQUENCES": _show_parser("SEQUENCES"), 856 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 857 "STAGES": _show_parser("STAGES"), 858 "COLUMNS": _show_parser("COLUMNS"), 859 "USERS": _show_parser("USERS"), 860 "TERSE USERS": _show_parser("USERS"), 861 "FILE FORMATS": _show_parser("FILE FORMATS"), 862 "FUNCTIONS": _show_parser("FUNCTIONS"), 863 "PROCEDURES": _show_parser("PROCEDURES"), 864 "WAREHOUSES": _show_parser("WAREHOUSES"), 865 } 866 867 CONSTRAINT_PARSERS = { 868 **parser.Parser.CONSTRAINT_PARSERS, 869 "WITH": lambda self: self._parse_with_constraint(), 870 "MASKING": lambda self: self._parse_with_constraint(), 871 "PROJECTION": lambda self: self._parse_with_constraint(), 872 "TAG": lambda self: self._parse_with_constraint(), 873 } 874 875 STAGED_FILE_SINGLE_TOKENS = { 876 TokenType.DOT, 877 TokenType.MOD, 878 TokenType.SLASH, 879 } 880 881 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 882 883 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 884 885 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 886 887 LAMBDAS = { 888 **parser.Parser.LAMBDAS, 889 TokenType.ARROW: lambda self, expressions: self.expression( 890 exp.Lambda, 891 this=self._replace_lambda( 892 self._parse_assignment(), 893 expressions, 894 ), 895 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 896 ), 897 } 898 899 COLUMN_OPERATORS = { 900 **parser.Parser.COLUMN_OPERATORS, 901 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 902 exp.ModelAttribute, this=this, expression=attr 903 ), 904 } 905 906 def _parse_use(self) -> exp.Use: 907 if self._match_text_seq("SECONDARY", "ROLES"): 908 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 909 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 910 return self.expression( 911 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 912 ) 913 914 return super()._parse_use() 915 916 def _negate_range( 917 self, this: t.Optional[exp.Expression] = None 918 ) -> t.Optional[exp.Expression]: 919 if not this: 920 return this 921 922 query = this.args.get("query") 923 if isinstance(this, exp.In) and isinstance(query, exp.Query): 924 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 925 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 926 # which can produce different results (most likely a SnowFlake bug). 927 # 928 # https://docs.snowflake.com/en/sql-reference/functions/in 929 # Context: https://github.com/tobymao/sqlglot/issues/3890 930 return self.expression( 931 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 932 ) 933 934 return self.expression(exp.Not, this=this) 935 936 def _parse_tag(self) -> exp.Tags: 937 return self.expression( 938 exp.Tags, 939 expressions=self._parse_wrapped_csv(self._parse_property), 940 ) 941 942 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 943 if self._prev.token_type != TokenType.WITH: 944 self._retreat(self._index - 1) 945 946 if self._match_text_seq("MASKING", "POLICY"): 947 policy = self._parse_column() 948 return self.expression( 949 exp.MaskingPolicyColumnConstraint, 950 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 951 expressions=self._match(TokenType.USING) 952 and self._parse_wrapped_csv(self._parse_id_var), 953 ) 954 if self._match_text_seq("PROJECTION", "POLICY"): 955 policy = self._parse_column() 956 return self.expression( 957 exp.ProjectionPolicyColumnConstraint, 958 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 959 ) 960 if self._match(TokenType.TAG): 961 return self._parse_tag() 962 963 return None 964 965 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 966 if self._match(TokenType.TAG): 967 return self._parse_tag() 968 969 return super()._parse_with_property() 970 971 def _parse_create(self) -> exp.Create | exp.Command: 972 expression = super()._parse_create() 973 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 974 # Replace the Table node with the enclosed Identifier 975 expression.this.replace(expression.this.this) 976 977 return expression 978 979 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 980 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 981 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 982 this = self._parse_var() or self._parse_type() 983 984 if not this: 985 return None 986 987 self._match(TokenType.COMMA) 988 expression = self._parse_bitwise() 989 this = map_date_part(this) 990 name = this.name.upper() 991 992 if name.startswith("EPOCH"): 993 if name == "EPOCH_MILLISECOND": 994 scale = 10**3 995 elif name == "EPOCH_MICROSECOND": 996 scale = 10**6 997 elif name == "EPOCH_NANOSECOND": 998 scale = 10**9 999 else: 1000 scale = None 1001 1002 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 1003 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 1004 1005 if scale: 1006 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 1007 1008 return to_unix 1009 1010 return self.expression(exp.Extract, this=this, expression=expression) 1011 1012 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 1013 if is_map: 1014 # Keys are strings in Snowflake's objects, see also: 1015 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1016 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1017 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1018 1019 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1020 1021 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1022 lateral = super()._parse_lateral() 1023 if not lateral: 1024 return lateral 1025 1026 if isinstance(lateral.this, exp.Explode): 1027 table_alias = lateral.args.get("alias") 1028 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1029 if table_alias and not table_alias.args.get("columns"): 1030 table_alias.set("columns", columns) 1031 elif not table_alias: 1032 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1033 1034 return lateral 1035 1036 def _parse_table_parts( 1037 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1038 ) -> exp.Table: 1039 # https://docs.snowflake.com/en/user-guide/querying-stage 1040 if self._match(TokenType.STRING, advance=False): 1041 table = self._parse_string() 1042 elif self._match_text_seq("@", advance=False): 1043 table = self._parse_location_path() 1044 else: 1045 table = None 1046 1047 if table: 1048 file_format = None 1049 pattern = None 1050 1051 wrapped = self._match(TokenType.L_PAREN) 1052 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1053 if self._match_text_seq("FILE_FORMAT", "=>"): 1054 file_format = self._parse_string() or super()._parse_table_parts( 1055 is_db_reference=is_db_reference 1056 ) 1057 elif self._match_text_seq("PATTERN", "=>"): 1058 pattern = self._parse_string() 1059 else: 1060 break 1061 1062 self._match(TokenType.COMMA) 1063 1064 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1065 else: 1066 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1067 1068 return table 1069 1070 def _parse_table( 1071 self, 1072 schema: bool = False, 1073 joins: bool = False, 1074 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1075 parse_bracket: bool = False, 1076 is_db_reference: bool = False, 1077 parse_partition: bool = False, 1078 consume_pipe: bool = False, 1079 ) -> t.Optional[exp.Expression]: 1080 table = super()._parse_table( 1081 schema=schema, 1082 joins=joins, 1083 alias_tokens=alias_tokens, 1084 parse_bracket=parse_bracket, 1085 is_db_reference=is_db_reference, 1086 parse_partition=parse_partition, 1087 ) 1088 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1089 table_from_rows = table.this 1090 for arg in exp.TableFromRows.arg_types: 1091 if arg != "this": 1092 table_from_rows.set(arg, table.args.get(arg)) 1093 1094 table = table_from_rows 1095 1096 return table 1097 1098 def _parse_id_var( 1099 self, 1100 any_token: bool = True, 1101 tokens: t.Optional[t.Collection[TokenType]] = None, 1102 ) -> t.Optional[exp.Expression]: 1103 if self._match_text_seq("IDENTIFIER", "("): 1104 identifier = ( 1105 super()._parse_id_var(any_token=any_token, tokens=tokens) 1106 or self._parse_string() 1107 ) 1108 self._match_r_paren() 1109 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1110 1111 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1112 1113 def _parse_show_snowflake(self, this: str) -> exp.Show: 1114 scope = None 1115 scope_kind = None 1116 1117 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1118 # which is syntactically valid but has no effect on the output 1119 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1120 1121 history = self._match_text_seq("HISTORY") 1122 1123 like = self._parse_string() if self._match(TokenType.LIKE) else None 1124 1125 if self._match(TokenType.IN): 1126 if self._match_text_seq("ACCOUNT"): 1127 scope_kind = "ACCOUNT" 1128 elif self._match_text_seq("CLASS"): 1129 scope_kind = "CLASS" 1130 scope = self._parse_table_parts() 1131 elif self._match_text_seq("APPLICATION"): 1132 scope_kind = "APPLICATION" 1133 if self._match_text_seq("PACKAGE"): 1134 scope_kind += " PACKAGE" 1135 scope = self._parse_table_parts() 1136 elif self._match_set(self.DB_CREATABLES): 1137 scope_kind = self._prev.text.upper() 1138 if self._curr: 1139 scope = self._parse_table_parts() 1140 elif self._curr: 1141 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1142 scope = self._parse_table_parts() 1143 1144 return self.expression( 1145 exp.Show, 1146 **{ 1147 "terse": terse, 1148 "this": this, 1149 "history": history, 1150 "like": like, 1151 "scope": scope, 1152 "scope_kind": scope_kind, 1153 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1154 "limit": self._parse_limit(), 1155 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1156 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1157 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1158 }, 1159 ) 1160 1161 def _parse_put(self) -> exp.Put | exp.Command: 1162 if self._curr.token_type != TokenType.STRING: 1163 return self._parse_as_command(self._prev) 1164 1165 return self.expression( 1166 exp.Put, 1167 this=self._parse_string(), 1168 target=self._parse_location_path(), 1169 properties=self._parse_properties(), 1170 ) 1171 1172 def _parse_get(self) -> t.Optional[exp.Expression]: 1173 start = self._prev 1174 1175 # If we detect GET( then we need to parse a function, not a statement 1176 if self._match(TokenType.L_PAREN): 1177 self._retreat(self._index - 2) 1178 return self._parse_expression() 1179 1180 target = self._parse_location_path() 1181 1182 # Parse as command if unquoted file path 1183 if self._curr.token_type == TokenType.URI_START: 1184 return self._parse_as_command(start) 1185 1186 return self.expression( 1187 exp.Get, 1188 this=self._parse_string(), 1189 target=target, 1190 properties=self._parse_properties(), 1191 ) 1192 1193 def _parse_location_property(self) -> exp.LocationProperty: 1194 self._match(TokenType.EQ) 1195 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1196 1197 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1198 # Parse either a subquery or a staged file 1199 return ( 1200 self._parse_select(table=True, parse_subquery_alias=False) 1201 if self._match(TokenType.L_PAREN, advance=False) 1202 else self._parse_table_parts() 1203 ) 1204 1205 def _parse_location_path(self) -> exp.Var: 1206 start = self._curr 1207 self._advance_any(ignore_reserved=True) 1208 1209 # We avoid consuming a comma token because external tables like @foo and @bar 1210 # can be joined in a query with a comma separator, as well as closing paren 1211 # in case of subqueries 1212 while self._is_connected() and not self._match_set( 1213 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1214 ): 1215 self._advance_any(ignore_reserved=True) 1216 1217 return exp.var(self._find_sql(start, self._prev)) 1218 1219 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1220 this = super()._parse_lambda_arg() 1221 1222 if not this: 1223 return this 1224 1225 typ = self._parse_types() 1226 1227 if typ: 1228 return self.expression(exp.Cast, this=this, to=typ) 1229 1230 return this 1231 1232 def _parse_foreign_key(self) -> exp.ForeignKey: 1233 # inlineFK, the REFERENCES columns are implied 1234 if self._match(TokenType.REFERENCES, advance=False): 1235 return self.expression(exp.ForeignKey) 1236 1237 # outoflineFK, explicitly names the columns 1238 return super()._parse_foreign_key() 1239 1240 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1241 self._match(TokenType.EQ) 1242 if self._match(TokenType.L_PAREN, advance=False): 1243 expressions = self._parse_wrapped_options() 1244 else: 1245 expressions = [self._parse_format_name()] 1246 1247 return self.expression( 1248 exp.FileFormatProperty, 1249 expressions=expressions, 1250 ) 1251 1252 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1253 return self.expression( 1254 exp.CredentialsProperty, 1255 expressions=self._parse_wrapped_options(), 1256 ) 1257 1258 def _parse_semantic_view(self) -> exp.SemanticView: 1259 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1260 1261 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1262 if self._match_text_seq("DIMENSIONS"): 1263 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1264 if self._match_text_seq("METRICS"): 1265 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1266 if self._match_text_seq("WHERE"): 1267 kwargs["where"] = self._parse_expression() 1268 1269 return self.expression(exp.SemanticView, **kwargs) 1270 1271 class Tokenizer(tokens.Tokenizer): 1272 STRING_ESCAPES = ["\\", "'"] 1273 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1274 RAW_STRINGS = ["$$"] 1275 COMMENTS = ["--", "//", ("/*", "*/")] 1276 NESTED_COMMENTS = False 1277 1278 KEYWORDS = { 1279 **tokens.Tokenizer.KEYWORDS, 1280 "BYTEINT": TokenType.INT, 1281 "FILE://": TokenType.URI_START, 1282 "FILE FORMAT": TokenType.FILE_FORMAT, 1283 "GET": TokenType.GET, 1284 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1285 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1286 "MINUS": TokenType.EXCEPT, 1287 "NCHAR VARYING": TokenType.VARCHAR, 1288 "PUT": TokenType.PUT, 1289 "REMOVE": TokenType.COMMAND, 1290 "RM": TokenType.COMMAND, 1291 "SAMPLE": TokenType.TABLE_SAMPLE, 1292 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1293 "SQL_DOUBLE": TokenType.DOUBLE, 1294 "SQL_VARCHAR": TokenType.VARCHAR, 1295 "STAGE": TokenType.STAGE, 1296 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1297 "STREAMLIT": TokenType.STREAMLIT, 1298 "TAG": TokenType.TAG, 1299 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1300 "TOP": TokenType.TOP, 1301 "WAREHOUSE": TokenType.WAREHOUSE, 1302 } 1303 KEYWORDS.pop("/*+") 1304 1305 SINGLE_TOKENS = { 1306 **tokens.Tokenizer.SINGLE_TOKENS, 1307 "$": TokenType.PARAMETER, 1308 "!": TokenType.EXCLAMATION, 1309 } 1310 1311 VAR_SINGLE_TOKENS = {"$"} 1312 1313 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1314 1315 class Generator(generator.Generator): 1316 PARAMETER_TOKEN = "$" 1317 MATCHED_BY_SOURCE = False 1318 SINGLE_STRING_INTERVAL = True 1319 JOIN_HINTS = False 1320 TABLE_HINTS = False 1321 QUERY_HINTS = False 1322 AGGREGATE_FILTER_SUPPORTED = False 1323 SUPPORTS_TABLE_COPY = False 1324 COLLATE_IS_FUNC = True 1325 LIMIT_ONLY_LITERALS = True 1326 JSON_KEY_VALUE_PAIR_SEP = "," 1327 INSERT_OVERWRITE = " OVERWRITE INTO" 1328 STRUCT_DELIMITER = ("(", ")") 1329 COPY_PARAMS_ARE_WRAPPED = False 1330 COPY_PARAMS_EQ_REQUIRED = True 1331 STAR_EXCEPT = "EXCLUDE" 1332 SUPPORTS_EXPLODING_PROJECTIONS = False 1333 ARRAY_CONCAT_IS_VAR_LEN = False 1334 SUPPORTS_CONVERT_TIMEZONE = True 1335 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1336 SUPPORTS_MEDIAN = True 1337 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1338 SUPPORTS_DECODE_CASE = True 1339 IS_BOOL_ALLOWED = False 1340 1341 TRANSFORMS = { 1342 **generator.Generator.TRANSFORMS, 1343 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1344 exp.ArgMax: rename_func("MAX_BY"), 1345 exp.ArgMin: rename_func("MIN_BY"), 1346 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1347 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1348 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1349 exp.AtTimeZone: lambda self, e: self.func( 1350 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1351 ), 1352 exp.BitwiseOr: rename_func("BITOR"), 1353 exp.BitwiseXor: rename_func("BITXOR"), 1354 exp.BitwiseAnd: rename_func("BITAND"), 1355 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1356 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1357 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1358 exp.BitwiseNot: rename_func("BITNOT"), 1359 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1360 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1361 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1362 exp.DateAdd: date_delta_sql("DATEADD"), 1363 exp.DateDiff: date_delta_sql("DATEDIFF"), 1364 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1365 exp.DatetimeDiff: timestampdiff_sql, 1366 exp.DateStrToDate: datestrtodate_sql, 1367 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1368 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1369 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1370 exp.DayOfYear: rename_func("DAYOFYEAR"), 1371 exp.Explode: rename_func("FLATTEN"), 1372 exp.Extract: lambda self, e: self.func( 1373 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1374 ), 1375 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1376 exp.FileFormatProperty: lambda self, 1377 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1378 exp.FromTimeZone: lambda self, e: self.func( 1379 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1380 ), 1381 exp.GenerateSeries: lambda self, e: self.func( 1382 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1383 ), 1384 exp.GetExtract: rename_func("GET"), 1385 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1386 exp.If: if_sql(name="IFF", false_value="NULL"), 1387 exp.JSONExtractArray: _json_extract_value_array_sql, 1388 exp.JSONExtractScalar: lambda self, e: self.func( 1389 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1390 ), 1391 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1392 exp.JSONPathRoot: lambda *_: "", 1393 exp.JSONValueArray: _json_extract_value_array_sql, 1394 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1395 rename_func("EDITDISTANCE") 1396 ), 1397 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1398 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1399 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1400 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1401 exp.MakeInterval: no_make_interval_sql, 1402 exp.Max: max_or_greatest, 1403 exp.Min: min_or_least, 1404 exp.ParseJSON: lambda self, e: self.func( 1405 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1406 ), 1407 exp.JSONFormat: rename_func("TO_JSON"), 1408 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1409 exp.PercentileCont: transforms.preprocess( 1410 [transforms.add_within_group_for_percentiles] 1411 ), 1412 exp.PercentileDisc: transforms.preprocess( 1413 [transforms.add_within_group_for_percentiles] 1414 ), 1415 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1416 exp.RegexpExtract: _regexpextract_sql, 1417 exp.RegexpExtractAll: _regexpextract_sql, 1418 exp.RegexpILike: _regexpilike_sql, 1419 exp.Rand: rename_func("RANDOM"), 1420 exp.Select: transforms.preprocess( 1421 [ 1422 transforms.eliminate_window_clause, 1423 transforms.eliminate_distinct_on, 1424 transforms.explode_projection_to_unnest(), 1425 transforms.eliminate_semi_and_anti_joins, 1426 _transform_generate_date_array, 1427 _qualify_unnested_columns, 1428 _eliminate_dot_variant_lookup, 1429 ] 1430 ), 1431 exp.SHA: rename_func("SHA1"), 1432 exp.MD5Digest: rename_func("MD5_BINARY"), 1433 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1434 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1435 exp.LowerHex: rename_func("TO_CHAR"), 1436 exp.SortArray: rename_func("ARRAY_SORT"), 1437 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1438 exp.StartsWith: rename_func("STARTSWITH"), 1439 exp.EndsWith: rename_func("ENDSWITH"), 1440 exp.StrPosition: lambda self, e: strposition_sql( 1441 self, e, func_name="CHARINDEX", supports_position=True 1442 ), 1443 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1444 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1445 exp.Stuff: rename_func("INSERT"), 1446 exp.StPoint: rename_func("ST_MAKEPOINT"), 1447 exp.TimeAdd: date_delta_sql("TIMEADD"), 1448 exp.Timestamp: no_timestamp_sql, 1449 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1450 exp.TimestampDiff: lambda self, e: self.func( 1451 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1452 ), 1453 exp.TimestampTrunc: timestamptrunc_sql(), 1454 exp.TimeStrToTime: timestrtotime_sql, 1455 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1456 exp.ToArray: rename_func("TO_ARRAY"), 1457 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1458 exp.ToDouble: rename_func("TO_DOUBLE"), 1459 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1460 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1461 exp.TsOrDsToDate: lambda self, e: self.func( 1462 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1463 ), 1464 exp.TsOrDsToTime: lambda self, e: self.func( 1465 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1466 ), 1467 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1468 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1469 exp.Uuid: rename_func("UUID_STRING"), 1470 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1471 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1472 exp.Xor: rename_func("BOOLXOR"), 1473 exp.ByteLength: rename_func("OCTET_LENGTH"), 1474 } 1475 1476 SUPPORTED_JSON_PATH_PARTS = { 1477 exp.JSONPathKey, 1478 exp.JSONPathRoot, 1479 exp.JSONPathSubscript, 1480 } 1481 1482 TYPE_MAPPING = { 1483 **generator.Generator.TYPE_MAPPING, 1484 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1485 exp.DataType.Type.NESTED: "OBJECT", 1486 exp.DataType.Type.STRUCT: "OBJECT", 1487 exp.DataType.Type.TEXT: "VARCHAR", 1488 } 1489 1490 TOKEN_MAPPING = { 1491 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1492 } 1493 1494 PROPERTIES_LOCATION = { 1495 **generator.Generator.PROPERTIES_LOCATION, 1496 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1497 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1498 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1499 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1500 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1501 } 1502 1503 UNSUPPORTED_VALUES_EXPRESSIONS = { 1504 exp.Map, 1505 exp.StarMap, 1506 exp.Struct, 1507 exp.VarMap, 1508 } 1509 1510 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1511 1512 def with_properties(self, properties: exp.Properties) -> str: 1513 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1514 1515 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1516 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1517 values_as_table = False 1518 1519 return super().values_sql(expression, values_as_table=values_as_table) 1520 1521 def datatype_sql(self, expression: exp.DataType) -> str: 1522 expressions = expression.expressions 1523 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1524 for field_type in expressions: 1525 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1526 if isinstance(field_type, exp.DataType): 1527 return "OBJECT" 1528 if ( 1529 isinstance(field_type, exp.ColumnDef) 1530 and field_type.this 1531 and field_type.this.is_string 1532 ): 1533 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1534 # converting 'foo' into an identifier, we also need to quote it because these 1535 # keys are case-sensitive. For example: 1536 # 1537 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1538 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1539 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1540 1541 return super().datatype_sql(expression) 1542 1543 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1544 return self.func( 1545 "TO_NUMBER", 1546 expression.this, 1547 expression.args.get("format"), 1548 expression.args.get("precision"), 1549 expression.args.get("scale"), 1550 ) 1551 1552 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1553 milli = expression.args.get("milli") 1554 if milli is not None: 1555 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1556 expression.set("nano", milli_to_nano) 1557 1558 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1559 1560 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1561 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1562 return self.func("TO_GEOGRAPHY", expression.this) 1563 if expression.is_type(exp.DataType.Type.GEOMETRY): 1564 return self.func("TO_GEOMETRY", expression.this) 1565 1566 return super().cast_sql(expression, safe_prefix=safe_prefix) 1567 1568 def trycast_sql(self, expression: exp.TryCast) -> str: 1569 value = expression.this 1570 1571 if value.type is None: 1572 from sqlglot.optimizer.annotate_types import annotate_types 1573 1574 value = annotate_types(value, dialect=self.dialect) 1575 1576 # Snowflake requires that TRY_CAST's value be a string 1577 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1578 # if we can deduce that the value is a string, then we can generate TRY_CAST 1579 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1580 return super().trycast_sql(expression) 1581 1582 return self.cast_sql(expression) 1583 1584 def log_sql(self, expression: exp.Log) -> str: 1585 if not expression.expression: 1586 return self.func("LN", expression.this) 1587 1588 return super().log_sql(expression) 1589 1590 def unnest_sql(self, expression: exp.Unnest) -> str: 1591 unnest_alias = expression.args.get("alias") 1592 offset = expression.args.get("offset") 1593 1594 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1595 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1596 1597 columns = [ 1598 exp.to_identifier("seq"), 1599 exp.to_identifier("key"), 1600 exp.to_identifier("path"), 1601 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1602 value, 1603 exp.to_identifier("this"), 1604 ] 1605 1606 if unnest_alias: 1607 unnest_alias.set("columns", columns) 1608 else: 1609 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1610 1611 table_input = self.sql(expression.expressions[0]) 1612 if not table_input.startswith("INPUT =>"): 1613 table_input = f"INPUT => {table_input}" 1614 1615 expression_parent = expression.parent 1616 1617 explode = ( 1618 f"FLATTEN({table_input})" 1619 if isinstance(expression_parent, exp.Lateral) 1620 else f"TABLE(FLATTEN({table_input}))" 1621 ) 1622 alias = self.sql(unnest_alias) 1623 alias = f" AS {alias}" if alias else "" 1624 value = ( 1625 "" 1626 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1627 else f"{value} FROM " 1628 ) 1629 1630 return f"{value}{explode}{alias}" 1631 1632 def show_sql(self, expression: exp.Show) -> str: 1633 terse = "TERSE " if expression.args.get("terse") else "" 1634 history = " HISTORY" if expression.args.get("history") else "" 1635 like = self.sql(expression, "like") 1636 like = f" LIKE {like}" if like else "" 1637 1638 scope = self.sql(expression, "scope") 1639 scope = f" {scope}" if scope else "" 1640 1641 scope_kind = self.sql(expression, "scope_kind") 1642 if scope_kind: 1643 scope_kind = f" IN {scope_kind}" 1644 1645 starts_with = self.sql(expression, "starts_with") 1646 if starts_with: 1647 starts_with = f" STARTS WITH {starts_with}" 1648 1649 limit = self.sql(expression, "limit") 1650 1651 from_ = self.sql(expression, "from") 1652 if from_: 1653 from_ = f" FROM {from_}" 1654 1655 privileges = self.expressions(expression, key="privileges", flat=True) 1656 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1657 1658 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1659 1660 def describe_sql(self, expression: exp.Describe) -> str: 1661 # Default to table if kind is unknown 1662 kind_value = expression.args.get("kind") or "TABLE" 1663 kind = f" {kind_value}" if kind_value else "" 1664 this = f" {self.sql(expression, 'this')}" 1665 expressions = self.expressions(expression, flat=True) 1666 expressions = f" {expressions}" if expressions else "" 1667 return f"DESCRIBE{kind}{this}{expressions}" 1668 1669 def generatedasidentitycolumnconstraint_sql( 1670 self, expression: exp.GeneratedAsIdentityColumnConstraint 1671 ) -> str: 1672 start = expression.args.get("start") 1673 start = f" START {start}" if start else "" 1674 increment = expression.args.get("increment") 1675 increment = f" INCREMENT {increment}" if increment else "" 1676 1677 order = expression.args.get("order") 1678 if order is not None: 1679 order_clause = " ORDER" if order else " NOORDER" 1680 else: 1681 order_clause = "" 1682 1683 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1684 1685 def cluster_sql(self, expression: exp.Cluster) -> str: 1686 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1687 1688 def struct_sql(self, expression: exp.Struct) -> str: 1689 if len(expression.expressions) == 1: 1690 arg = expression.expressions[0] 1691 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1692 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1693 return f"{{{self.sql(expression.expressions[0])}}}" 1694 1695 keys = [] 1696 values = [] 1697 1698 for i, e in enumerate(expression.expressions): 1699 if isinstance(e, exp.PropertyEQ): 1700 keys.append( 1701 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1702 ) 1703 values.append(e.expression) 1704 else: 1705 keys.append(exp.Literal.string(f"_{i}")) 1706 values.append(e) 1707 1708 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1709 1710 @unsupported_args("weight", "accuracy") 1711 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1712 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1713 1714 def alterset_sql(self, expression: exp.AlterSet) -> str: 1715 exprs = self.expressions(expression, flat=True) 1716 exprs = f" {exprs}" if exprs else "" 1717 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1718 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1719 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1720 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1721 tag = self.expressions(expression, key="tag", flat=True) 1722 tag = f" TAG {tag}" if tag else "" 1723 1724 return f"SET{exprs}{file_format}{copy_options}{tag}" 1725 1726 def strtotime_sql(self, expression: exp.StrToTime): 1727 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1728 return self.func( 1729 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1730 ) 1731 1732 def timestampsub_sql(self, expression: exp.TimestampSub): 1733 return self.sql( 1734 exp.TimestampAdd( 1735 this=expression.this, 1736 expression=expression.expression * -1, 1737 unit=expression.unit, 1738 ) 1739 ) 1740 1741 def jsonextract_sql(self, expression: exp.JSONExtract): 1742 this = expression.this 1743 1744 # JSON strings are valid coming from other dialects such as BQ so 1745 # for these cases we PARSE_JSON preemptively 1746 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1747 "requires_json" 1748 ): 1749 this = exp.ParseJSON(this=this) 1750 1751 return self.func( 1752 "GET_PATH", 1753 this, 1754 expression.expression, 1755 ) 1756 1757 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1758 this = expression.this 1759 if this.is_string: 1760 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1761 1762 return self.func("TO_CHAR", this, self.format_time(expression)) 1763 1764 def datesub_sql(self, expression: exp.DateSub) -> str: 1765 value = expression.expression 1766 if value: 1767 value.replace(value * (-1)) 1768 else: 1769 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1770 1771 return date_delta_sql("DATEADD")(self, expression) 1772 1773 def select_sql(self, expression: exp.Select) -> str: 1774 limit = expression.args.get("limit") 1775 offset = expression.args.get("offset") 1776 if offset and not limit: 1777 expression.limit(exp.Null(), copy=False) 1778 return super().select_sql(expression) 1779 1780 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1781 is_materialized = expression.find(exp.MaterializedProperty) 1782 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1783 1784 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1785 # For materialized views, COPY GRANTS is located *before* the columns list 1786 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1787 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1788 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1789 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1790 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1791 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1792 1793 this_name = self.sql(expression.this, "this") 1794 copy_grants = self.sql(copy_grants_property) 1795 this_schema = self.schema_columns_sql(expression.this) 1796 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1797 1798 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1799 1800 return super().createable_sql(expression, locations) 1801 1802 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1803 this = expression.this 1804 1805 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1806 # and add it later as part of the WITHIN GROUP clause 1807 order = this if isinstance(this, exp.Order) else None 1808 if order: 1809 expression.set("this", order.this.pop()) 1810 1811 expr_sql = super().arrayagg_sql(expression) 1812 1813 if order: 1814 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1815 1816 return expr_sql 1817 1818 def array_sql(self, expression: exp.Array) -> str: 1819 expressions = expression.expressions 1820 1821 first_expr = seq_get(expressions, 0) 1822 if isinstance(first_expr, exp.Select): 1823 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1824 if first_expr.text("kind").upper() == "STRUCT": 1825 object_construct_args = [] 1826 for expr in first_expr.expressions: 1827 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1828 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1829 name = expr.this if isinstance(expr, exp.Alias) else expr 1830 1831 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1832 1833 array_agg = exp.ArrayAgg( 1834 this=_build_object_construct(args=object_construct_args) 1835 ) 1836 1837 first_expr.set("kind", None) 1838 first_expr.set("expressions", [array_agg]) 1839 1840 return self.sql(first_expr.subquery()) 1841 1842 return inline_array_sql(self, expression) 1843 1844 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1845 zone = self.sql(expression, "this") 1846 if not zone: 1847 return super().currentdate_sql(expression) 1848 1849 expr = exp.Cast( 1850 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1851 to=exp.DataType(this=exp.DataType.Type.DATE), 1852 ) 1853 return self.sql(expr) 1854 1855 def dot_sql(self, expression: exp.Dot) -> str: 1856 this = expression.this 1857 1858 if not this.type: 1859 from sqlglot.optimizer.annotate_types import annotate_types 1860 1861 this = annotate_types(this, dialect=self.dialect) 1862 1863 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1864 # Generate colon notation for the top level STRUCT 1865 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1866 1867 return super().dot_sql(expression) 1868 1869 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 1870 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 1871 1872 def format_sql(self, expression: exp.Format) -> str: 1873 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 1874 return self.func("TO_CHAR", expression.expressions[0]) 1875 1876 return self.function_fallback_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime formats.
645 def quote_identifier(self, expression: E, identify: bool = True) -> E: 646 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 647 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 648 if ( 649 isinstance(expression, exp.Identifier) 650 and isinstance(expression.parent, exp.Table) 651 and expression.name.lower() == "dual" 652 ): 653 return expression # type: ignore 654 655 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier, this method is a no-op. - identify: If set to
False, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n) to its unescaped version (
).
657 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 658 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 659 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
661 class Parser(parser.Parser): 662 IDENTIFY_PIVOT_STRINGS = True 663 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 664 COLON_IS_VARIANT_EXTRACT = True 665 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 666 667 ID_VAR_TOKENS = { 668 *parser.Parser.ID_VAR_TOKENS, 669 TokenType.EXCEPT, 670 TokenType.MATCH_CONDITION, 671 } 672 673 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 674 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 675 676 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 677 678 FUNCTIONS = { 679 **parser.Parser.FUNCTIONS, 680 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 681 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 682 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 683 this=seq_get(args, 1), expression=seq_get(args, 0) 684 ), 685 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 686 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 687 start=seq_get(args, 0), 688 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 689 step=seq_get(args, 2), 690 ), 691 "ARRAY_SORT": exp.SortArray.from_arg_list, 692 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 693 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 694 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 695 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 696 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 697 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 698 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 699 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 700 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 701 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 702 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 703 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 704 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 705 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 706 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 707 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 708 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 709 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 710 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 711 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 712 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 713 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 714 "DATE_TRUNC": _date_trunc_to_time, 715 "DATEADD": _build_date_time_add(exp.DateAdd), 716 "DATEDIFF": _build_datediff, 717 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 718 "DIV0": _build_if_from_div0, 719 "EDITDISTANCE": lambda args: exp.Levenshtein( 720 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 721 ), 722 "FLATTEN": exp.Explode.from_arg_list, 723 "GET": exp.GetExtract.from_arg_list, 724 "GET_PATH": lambda args, dialect: exp.JSONExtract( 725 this=seq_get(args, 0), 726 expression=dialect.to_json_path(seq_get(args, 1)), 727 requires_json=True, 728 ), 729 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 730 "IFF": exp.If.from_arg_list, 731 "MD5_HEX": exp.MD5.from_arg_list, 732 "MD5_BINARY": exp.MD5Digest.from_arg_list, 733 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 734 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 735 "LAST_DAY": lambda args: exp.LastDay( 736 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 737 ), 738 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 739 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 740 "NULLIFZERO": _build_if_from_nullifzero, 741 "OBJECT_CONSTRUCT": _build_object_construct, 742 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 743 "PARSE_URL": lambda args: exp.ParseUrl( 744 this=seq_get(args, 0), permissive=seq_get(args, 1) 745 ), 746 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 747 "REGEXP_REPLACE": _build_regexp_replace, 748 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 749 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 750 "REPLACE": build_replace_with_optional_replacement, 751 "RLIKE": exp.RegexpLike.from_arg_list, 752 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 753 "SHA1_HEX": exp.SHA.from_arg_list, 754 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 755 "SHA2_HEX": exp.SHA2.from_arg_list, 756 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 757 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 758 "TIMEADD": _build_date_time_add(exp.TimeAdd), 759 "TIMEDIFF": _build_datediff, 760 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 761 "TIMESTAMPDIFF": _build_datediff, 762 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 763 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 764 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 765 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 766 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 767 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 768 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 769 "TRY_TO_TIMESTAMP": _build_datetime( 770 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 771 ), 772 "TO_CHAR": build_timetostr_or_tochar, 773 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 774 "TO_NUMBER": lambda args: exp.ToNumber( 775 this=seq_get(args, 0), 776 format=seq_get(args, 1), 777 precision=seq_get(args, 2), 778 scale=seq_get(args, 3), 779 ), 780 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 781 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 782 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 783 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 784 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 785 "TO_VARCHAR": build_timetostr_or_tochar, 786 "TO_JSON": exp.JSONFormat.from_arg_list, 787 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 788 "ZEROIFNULL": _build_if_from_zeroifnull, 789 "LIKE": _build_like(exp.Like), 790 "ILIKE": _build_like(exp.ILike), 791 } 792 FUNCTIONS.pop("PREDICT") 793 794 FUNCTION_PARSERS = { 795 **parser.Parser.FUNCTION_PARSERS, 796 "DATE_PART": lambda self: self._parse_date_part(), 797 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 798 "LISTAGG": lambda self: self._parse_string_agg(), 799 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 800 } 801 FUNCTION_PARSERS.pop("TRIM") 802 803 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 804 805 ALTER_PARSERS = { 806 **parser.Parser.ALTER_PARSERS, 807 "SESSION": lambda self: self._parse_alter_session(), 808 "UNSET": lambda self: self.expression( 809 exp.Set, 810 tag=self._match_text_seq("TAG"), 811 expressions=self._parse_csv(self._parse_id_var), 812 unset=True, 813 ), 814 } 815 816 STATEMENT_PARSERS = { 817 **parser.Parser.STATEMENT_PARSERS, 818 TokenType.GET: lambda self: self._parse_get(), 819 TokenType.PUT: lambda self: self._parse_put(), 820 TokenType.SHOW: lambda self: self._parse_show(), 821 } 822 823 PROPERTY_PARSERS = { 824 **parser.Parser.PROPERTY_PARSERS, 825 "CREDENTIALS": lambda self: self._parse_credentials_property(), 826 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 827 "LOCATION": lambda self: self._parse_location_property(), 828 "TAG": lambda self: self._parse_tag(), 829 "USING": lambda self: self._match_text_seq("TEMPLATE") 830 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 831 } 832 833 TYPE_CONVERTERS = { 834 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 835 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 836 } 837 838 SHOW_PARSERS = { 839 "DATABASES": _show_parser("DATABASES"), 840 "TERSE DATABASES": _show_parser("DATABASES"), 841 "SCHEMAS": _show_parser("SCHEMAS"), 842 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 843 "OBJECTS": _show_parser("OBJECTS"), 844 "TERSE OBJECTS": _show_parser("OBJECTS"), 845 "TABLES": _show_parser("TABLES"), 846 "TERSE TABLES": _show_parser("TABLES"), 847 "VIEWS": _show_parser("VIEWS"), 848 "TERSE VIEWS": _show_parser("VIEWS"), 849 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 850 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 851 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 852 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 853 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 854 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 855 "SEQUENCES": _show_parser("SEQUENCES"), 856 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 857 "STAGES": _show_parser("STAGES"), 858 "COLUMNS": _show_parser("COLUMNS"), 859 "USERS": _show_parser("USERS"), 860 "TERSE USERS": _show_parser("USERS"), 861 "FILE FORMATS": _show_parser("FILE FORMATS"), 862 "FUNCTIONS": _show_parser("FUNCTIONS"), 863 "PROCEDURES": _show_parser("PROCEDURES"), 864 "WAREHOUSES": _show_parser("WAREHOUSES"), 865 } 866 867 CONSTRAINT_PARSERS = { 868 **parser.Parser.CONSTRAINT_PARSERS, 869 "WITH": lambda self: self._parse_with_constraint(), 870 "MASKING": lambda self: self._parse_with_constraint(), 871 "PROJECTION": lambda self: self._parse_with_constraint(), 872 "TAG": lambda self: self._parse_with_constraint(), 873 } 874 875 STAGED_FILE_SINGLE_TOKENS = { 876 TokenType.DOT, 877 TokenType.MOD, 878 TokenType.SLASH, 879 } 880 881 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 882 883 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 884 885 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 886 887 LAMBDAS = { 888 **parser.Parser.LAMBDAS, 889 TokenType.ARROW: lambda self, expressions: self.expression( 890 exp.Lambda, 891 this=self._replace_lambda( 892 self._parse_assignment(), 893 expressions, 894 ), 895 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 896 ), 897 } 898 899 COLUMN_OPERATORS = { 900 **parser.Parser.COLUMN_OPERATORS, 901 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 902 exp.ModelAttribute, this=this, expression=attr 903 ), 904 } 905 906 def _parse_use(self) -> exp.Use: 907 if self._match_text_seq("SECONDARY", "ROLES"): 908 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 909 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 910 return self.expression( 911 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 912 ) 913 914 return super()._parse_use() 915 916 def _negate_range( 917 self, this: t.Optional[exp.Expression] = None 918 ) -> t.Optional[exp.Expression]: 919 if not this: 920 return this 921 922 query = this.args.get("query") 923 if isinstance(this, exp.In) and isinstance(query, exp.Query): 924 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 925 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 926 # which can produce different results (most likely a SnowFlake bug). 927 # 928 # https://docs.snowflake.com/en/sql-reference/functions/in 929 # Context: https://github.com/tobymao/sqlglot/issues/3890 930 return self.expression( 931 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 932 ) 933 934 return self.expression(exp.Not, this=this) 935 936 def _parse_tag(self) -> exp.Tags: 937 return self.expression( 938 exp.Tags, 939 expressions=self._parse_wrapped_csv(self._parse_property), 940 ) 941 942 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 943 if self._prev.token_type != TokenType.WITH: 944 self._retreat(self._index - 1) 945 946 if self._match_text_seq("MASKING", "POLICY"): 947 policy = self._parse_column() 948 return self.expression( 949 exp.MaskingPolicyColumnConstraint, 950 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 951 expressions=self._match(TokenType.USING) 952 and self._parse_wrapped_csv(self._parse_id_var), 953 ) 954 if self._match_text_seq("PROJECTION", "POLICY"): 955 policy = self._parse_column() 956 return self.expression( 957 exp.ProjectionPolicyColumnConstraint, 958 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 959 ) 960 if self._match(TokenType.TAG): 961 return self._parse_tag() 962 963 return None 964 965 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 966 if self._match(TokenType.TAG): 967 return self._parse_tag() 968 969 return super()._parse_with_property() 970 971 def _parse_create(self) -> exp.Create | exp.Command: 972 expression = super()._parse_create() 973 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 974 # Replace the Table node with the enclosed Identifier 975 expression.this.replace(expression.this.this) 976 977 return expression 978 979 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 980 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 981 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 982 this = self._parse_var() or self._parse_type() 983 984 if not this: 985 return None 986 987 self._match(TokenType.COMMA) 988 expression = self._parse_bitwise() 989 this = map_date_part(this) 990 name = this.name.upper() 991 992 if name.startswith("EPOCH"): 993 if name == "EPOCH_MILLISECOND": 994 scale = 10**3 995 elif name == "EPOCH_MICROSECOND": 996 scale = 10**6 997 elif name == "EPOCH_NANOSECOND": 998 scale = 10**9 999 else: 1000 scale = None 1001 1002 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 1003 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 1004 1005 if scale: 1006 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 1007 1008 return to_unix 1009 1010 return self.expression(exp.Extract, this=this, expression=expression) 1011 1012 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 1013 if is_map: 1014 # Keys are strings in Snowflake's objects, see also: 1015 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1016 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1017 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1018 1019 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1020 1021 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1022 lateral = super()._parse_lateral() 1023 if not lateral: 1024 return lateral 1025 1026 if isinstance(lateral.this, exp.Explode): 1027 table_alias = lateral.args.get("alias") 1028 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1029 if table_alias and not table_alias.args.get("columns"): 1030 table_alias.set("columns", columns) 1031 elif not table_alias: 1032 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1033 1034 return lateral 1035 1036 def _parse_table_parts( 1037 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1038 ) -> exp.Table: 1039 # https://docs.snowflake.com/en/user-guide/querying-stage 1040 if self._match(TokenType.STRING, advance=False): 1041 table = self._parse_string() 1042 elif self._match_text_seq("@", advance=False): 1043 table = self._parse_location_path() 1044 else: 1045 table = None 1046 1047 if table: 1048 file_format = None 1049 pattern = None 1050 1051 wrapped = self._match(TokenType.L_PAREN) 1052 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1053 if self._match_text_seq("FILE_FORMAT", "=>"): 1054 file_format = self._parse_string() or super()._parse_table_parts( 1055 is_db_reference=is_db_reference 1056 ) 1057 elif self._match_text_seq("PATTERN", "=>"): 1058 pattern = self._parse_string() 1059 else: 1060 break 1061 1062 self._match(TokenType.COMMA) 1063 1064 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1065 else: 1066 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1067 1068 return table 1069 1070 def _parse_table( 1071 self, 1072 schema: bool = False, 1073 joins: bool = False, 1074 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1075 parse_bracket: bool = False, 1076 is_db_reference: bool = False, 1077 parse_partition: bool = False, 1078 consume_pipe: bool = False, 1079 ) -> t.Optional[exp.Expression]: 1080 table = super()._parse_table( 1081 schema=schema, 1082 joins=joins, 1083 alias_tokens=alias_tokens, 1084 parse_bracket=parse_bracket, 1085 is_db_reference=is_db_reference, 1086 parse_partition=parse_partition, 1087 ) 1088 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1089 table_from_rows = table.this 1090 for arg in exp.TableFromRows.arg_types: 1091 if arg != "this": 1092 table_from_rows.set(arg, table.args.get(arg)) 1093 1094 table = table_from_rows 1095 1096 return table 1097 1098 def _parse_id_var( 1099 self, 1100 any_token: bool = True, 1101 tokens: t.Optional[t.Collection[TokenType]] = None, 1102 ) -> t.Optional[exp.Expression]: 1103 if self._match_text_seq("IDENTIFIER", "("): 1104 identifier = ( 1105 super()._parse_id_var(any_token=any_token, tokens=tokens) 1106 or self._parse_string() 1107 ) 1108 self._match_r_paren() 1109 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1110 1111 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1112 1113 def _parse_show_snowflake(self, this: str) -> exp.Show: 1114 scope = None 1115 scope_kind = None 1116 1117 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1118 # which is syntactically valid but has no effect on the output 1119 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1120 1121 history = self._match_text_seq("HISTORY") 1122 1123 like = self._parse_string() if self._match(TokenType.LIKE) else None 1124 1125 if self._match(TokenType.IN): 1126 if self._match_text_seq("ACCOUNT"): 1127 scope_kind = "ACCOUNT" 1128 elif self._match_text_seq("CLASS"): 1129 scope_kind = "CLASS" 1130 scope = self._parse_table_parts() 1131 elif self._match_text_seq("APPLICATION"): 1132 scope_kind = "APPLICATION" 1133 if self._match_text_seq("PACKAGE"): 1134 scope_kind += " PACKAGE" 1135 scope = self._parse_table_parts() 1136 elif self._match_set(self.DB_CREATABLES): 1137 scope_kind = self._prev.text.upper() 1138 if self._curr: 1139 scope = self._parse_table_parts() 1140 elif self._curr: 1141 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1142 scope = self._parse_table_parts() 1143 1144 return self.expression( 1145 exp.Show, 1146 **{ 1147 "terse": terse, 1148 "this": this, 1149 "history": history, 1150 "like": like, 1151 "scope": scope, 1152 "scope_kind": scope_kind, 1153 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1154 "limit": self._parse_limit(), 1155 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1156 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1157 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1158 }, 1159 ) 1160 1161 def _parse_put(self) -> exp.Put | exp.Command: 1162 if self._curr.token_type != TokenType.STRING: 1163 return self._parse_as_command(self._prev) 1164 1165 return self.expression( 1166 exp.Put, 1167 this=self._parse_string(), 1168 target=self._parse_location_path(), 1169 properties=self._parse_properties(), 1170 ) 1171 1172 def _parse_get(self) -> t.Optional[exp.Expression]: 1173 start = self._prev 1174 1175 # If we detect GET( then we need to parse a function, not a statement 1176 if self._match(TokenType.L_PAREN): 1177 self._retreat(self._index - 2) 1178 return self._parse_expression() 1179 1180 target = self._parse_location_path() 1181 1182 # Parse as command if unquoted file path 1183 if self._curr.token_type == TokenType.URI_START: 1184 return self._parse_as_command(start) 1185 1186 return self.expression( 1187 exp.Get, 1188 this=self._parse_string(), 1189 target=target, 1190 properties=self._parse_properties(), 1191 ) 1192 1193 def _parse_location_property(self) -> exp.LocationProperty: 1194 self._match(TokenType.EQ) 1195 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1196 1197 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1198 # Parse either a subquery or a staged file 1199 return ( 1200 self._parse_select(table=True, parse_subquery_alias=False) 1201 if self._match(TokenType.L_PAREN, advance=False) 1202 else self._parse_table_parts() 1203 ) 1204 1205 def _parse_location_path(self) -> exp.Var: 1206 start = self._curr 1207 self._advance_any(ignore_reserved=True) 1208 1209 # We avoid consuming a comma token because external tables like @foo and @bar 1210 # can be joined in a query with a comma separator, as well as closing paren 1211 # in case of subqueries 1212 while self._is_connected() and not self._match_set( 1213 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1214 ): 1215 self._advance_any(ignore_reserved=True) 1216 1217 return exp.var(self._find_sql(start, self._prev)) 1218 1219 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1220 this = super()._parse_lambda_arg() 1221 1222 if not this: 1223 return this 1224 1225 typ = self._parse_types() 1226 1227 if typ: 1228 return self.expression(exp.Cast, this=this, to=typ) 1229 1230 return this 1231 1232 def _parse_foreign_key(self) -> exp.ForeignKey: 1233 # inlineFK, the REFERENCES columns are implied 1234 if self._match(TokenType.REFERENCES, advance=False): 1235 return self.expression(exp.ForeignKey) 1236 1237 # outoflineFK, explicitly names the columns 1238 return super()._parse_foreign_key() 1239 1240 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1241 self._match(TokenType.EQ) 1242 if self._match(TokenType.L_PAREN, advance=False): 1243 expressions = self._parse_wrapped_options() 1244 else: 1245 expressions = [self._parse_format_name()] 1246 1247 return self.expression( 1248 exp.FileFormatProperty, 1249 expressions=expressions, 1250 ) 1251 1252 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1253 return self.expression( 1254 exp.CredentialsProperty, 1255 expressions=self._parse_wrapped_options(), 1256 ) 1257 1258 def _parse_semantic_view(self) -> exp.SemanticView: 1259 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1260 1261 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1262 if self._match_text_seq("DIMENSIONS"): 1263 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1264 if self._match_text_seq("METRICS"): 1265 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1266 if self._match_text_seq("WHERE"): 1267 kwargs["where"] = self._parse_expression() 1268 1269 return self.expression(exp.SemanticView, **kwargs)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1271 class Tokenizer(tokens.Tokenizer): 1272 STRING_ESCAPES = ["\\", "'"] 1273 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1274 RAW_STRINGS = ["$$"] 1275 COMMENTS = ["--", "//", ("/*", "*/")] 1276 NESTED_COMMENTS = False 1277 1278 KEYWORDS = { 1279 **tokens.Tokenizer.KEYWORDS, 1280 "BYTEINT": TokenType.INT, 1281 "FILE://": TokenType.URI_START, 1282 "FILE FORMAT": TokenType.FILE_FORMAT, 1283 "GET": TokenType.GET, 1284 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1285 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1286 "MINUS": TokenType.EXCEPT, 1287 "NCHAR VARYING": TokenType.VARCHAR, 1288 "PUT": TokenType.PUT, 1289 "REMOVE": TokenType.COMMAND, 1290 "RM": TokenType.COMMAND, 1291 "SAMPLE": TokenType.TABLE_SAMPLE, 1292 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1293 "SQL_DOUBLE": TokenType.DOUBLE, 1294 "SQL_VARCHAR": TokenType.VARCHAR, 1295 "STAGE": TokenType.STAGE, 1296 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1297 "STREAMLIT": TokenType.STREAMLIT, 1298 "TAG": TokenType.TAG, 1299 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1300 "TOP": TokenType.TOP, 1301 "WAREHOUSE": TokenType.WAREHOUSE, 1302 } 1303 KEYWORDS.pop("/*+") 1304 1305 SINGLE_TOKENS = { 1306 **tokens.Tokenizer.SINGLE_TOKENS, 1307 "$": TokenType.PARAMETER, 1308 "!": TokenType.EXCLAMATION, 1309 } 1310 1311 VAR_SINGLE_TOKENS = {"$"} 1312 1313 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1315 class Generator(generator.Generator): 1316 PARAMETER_TOKEN = "$" 1317 MATCHED_BY_SOURCE = False 1318 SINGLE_STRING_INTERVAL = True 1319 JOIN_HINTS = False 1320 TABLE_HINTS = False 1321 QUERY_HINTS = False 1322 AGGREGATE_FILTER_SUPPORTED = False 1323 SUPPORTS_TABLE_COPY = False 1324 COLLATE_IS_FUNC = True 1325 LIMIT_ONLY_LITERALS = True 1326 JSON_KEY_VALUE_PAIR_SEP = "," 1327 INSERT_OVERWRITE = " OVERWRITE INTO" 1328 STRUCT_DELIMITER = ("(", ")") 1329 COPY_PARAMS_ARE_WRAPPED = False 1330 COPY_PARAMS_EQ_REQUIRED = True 1331 STAR_EXCEPT = "EXCLUDE" 1332 SUPPORTS_EXPLODING_PROJECTIONS = False 1333 ARRAY_CONCAT_IS_VAR_LEN = False 1334 SUPPORTS_CONVERT_TIMEZONE = True 1335 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1336 SUPPORTS_MEDIAN = True 1337 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1338 SUPPORTS_DECODE_CASE = True 1339 IS_BOOL_ALLOWED = False 1340 1341 TRANSFORMS = { 1342 **generator.Generator.TRANSFORMS, 1343 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1344 exp.ArgMax: rename_func("MAX_BY"), 1345 exp.ArgMin: rename_func("MIN_BY"), 1346 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1347 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1348 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1349 exp.AtTimeZone: lambda self, e: self.func( 1350 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1351 ), 1352 exp.BitwiseOr: rename_func("BITOR"), 1353 exp.BitwiseXor: rename_func("BITXOR"), 1354 exp.BitwiseAnd: rename_func("BITAND"), 1355 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1356 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1357 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1358 exp.BitwiseNot: rename_func("BITNOT"), 1359 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1360 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1361 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1362 exp.DateAdd: date_delta_sql("DATEADD"), 1363 exp.DateDiff: date_delta_sql("DATEDIFF"), 1364 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1365 exp.DatetimeDiff: timestampdiff_sql, 1366 exp.DateStrToDate: datestrtodate_sql, 1367 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1368 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1369 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1370 exp.DayOfYear: rename_func("DAYOFYEAR"), 1371 exp.Explode: rename_func("FLATTEN"), 1372 exp.Extract: lambda self, e: self.func( 1373 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1374 ), 1375 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1376 exp.FileFormatProperty: lambda self, 1377 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1378 exp.FromTimeZone: lambda self, e: self.func( 1379 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1380 ), 1381 exp.GenerateSeries: lambda self, e: self.func( 1382 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1383 ), 1384 exp.GetExtract: rename_func("GET"), 1385 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1386 exp.If: if_sql(name="IFF", false_value="NULL"), 1387 exp.JSONExtractArray: _json_extract_value_array_sql, 1388 exp.JSONExtractScalar: lambda self, e: self.func( 1389 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1390 ), 1391 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1392 exp.JSONPathRoot: lambda *_: "", 1393 exp.JSONValueArray: _json_extract_value_array_sql, 1394 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1395 rename_func("EDITDISTANCE") 1396 ), 1397 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1398 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1399 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1400 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1401 exp.MakeInterval: no_make_interval_sql, 1402 exp.Max: max_or_greatest, 1403 exp.Min: min_or_least, 1404 exp.ParseJSON: lambda self, e: self.func( 1405 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1406 ), 1407 exp.JSONFormat: rename_func("TO_JSON"), 1408 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1409 exp.PercentileCont: transforms.preprocess( 1410 [transforms.add_within_group_for_percentiles] 1411 ), 1412 exp.PercentileDisc: transforms.preprocess( 1413 [transforms.add_within_group_for_percentiles] 1414 ), 1415 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1416 exp.RegexpExtract: _regexpextract_sql, 1417 exp.RegexpExtractAll: _regexpextract_sql, 1418 exp.RegexpILike: _regexpilike_sql, 1419 exp.Rand: rename_func("RANDOM"), 1420 exp.Select: transforms.preprocess( 1421 [ 1422 transforms.eliminate_window_clause, 1423 transforms.eliminate_distinct_on, 1424 transforms.explode_projection_to_unnest(), 1425 transforms.eliminate_semi_and_anti_joins, 1426 _transform_generate_date_array, 1427 _qualify_unnested_columns, 1428 _eliminate_dot_variant_lookup, 1429 ] 1430 ), 1431 exp.SHA: rename_func("SHA1"), 1432 exp.MD5Digest: rename_func("MD5_BINARY"), 1433 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1434 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1435 exp.LowerHex: rename_func("TO_CHAR"), 1436 exp.SortArray: rename_func("ARRAY_SORT"), 1437 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1438 exp.StartsWith: rename_func("STARTSWITH"), 1439 exp.EndsWith: rename_func("ENDSWITH"), 1440 exp.StrPosition: lambda self, e: strposition_sql( 1441 self, e, func_name="CHARINDEX", supports_position=True 1442 ), 1443 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1444 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1445 exp.Stuff: rename_func("INSERT"), 1446 exp.StPoint: rename_func("ST_MAKEPOINT"), 1447 exp.TimeAdd: date_delta_sql("TIMEADD"), 1448 exp.Timestamp: no_timestamp_sql, 1449 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1450 exp.TimestampDiff: lambda self, e: self.func( 1451 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1452 ), 1453 exp.TimestampTrunc: timestamptrunc_sql(), 1454 exp.TimeStrToTime: timestrtotime_sql, 1455 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1456 exp.ToArray: rename_func("TO_ARRAY"), 1457 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1458 exp.ToDouble: rename_func("TO_DOUBLE"), 1459 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1460 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1461 exp.TsOrDsToDate: lambda self, e: self.func( 1462 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1463 ), 1464 exp.TsOrDsToTime: lambda self, e: self.func( 1465 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1466 ), 1467 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1468 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1469 exp.Uuid: rename_func("UUID_STRING"), 1470 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1471 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1472 exp.Xor: rename_func("BOOLXOR"), 1473 exp.ByteLength: rename_func("OCTET_LENGTH"), 1474 } 1475 1476 SUPPORTED_JSON_PATH_PARTS = { 1477 exp.JSONPathKey, 1478 exp.JSONPathRoot, 1479 exp.JSONPathSubscript, 1480 } 1481 1482 TYPE_MAPPING = { 1483 **generator.Generator.TYPE_MAPPING, 1484 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1485 exp.DataType.Type.NESTED: "OBJECT", 1486 exp.DataType.Type.STRUCT: "OBJECT", 1487 exp.DataType.Type.TEXT: "VARCHAR", 1488 } 1489 1490 TOKEN_MAPPING = { 1491 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1492 } 1493 1494 PROPERTIES_LOCATION = { 1495 **generator.Generator.PROPERTIES_LOCATION, 1496 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1497 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1498 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1499 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1500 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1501 } 1502 1503 UNSUPPORTED_VALUES_EXPRESSIONS = { 1504 exp.Map, 1505 exp.StarMap, 1506 exp.Struct, 1507 exp.VarMap, 1508 } 1509 1510 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1511 1512 def with_properties(self, properties: exp.Properties) -> str: 1513 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1514 1515 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1516 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1517 values_as_table = False 1518 1519 return super().values_sql(expression, values_as_table=values_as_table) 1520 1521 def datatype_sql(self, expression: exp.DataType) -> str: 1522 expressions = expression.expressions 1523 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1524 for field_type in expressions: 1525 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1526 if isinstance(field_type, exp.DataType): 1527 return "OBJECT" 1528 if ( 1529 isinstance(field_type, exp.ColumnDef) 1530 and field_type.this 1531 and field_type.this.is_string 1532 ): 1533 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1534 # converting 'foo' into an identifier, we also need to quote it because these 1535 # keys are case-sensitive. For example: 1536 # 1537 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1538 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1539 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1540 1541 return super().datatype_sql(expression) 1542 1543 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1544 return self.func( 1545 "TO_NUMBER", 1546 expression.this, 1547 expression.args.get("format"), 1548 expression.args.get("precision"), 1549 expression.args.get("scale"), 1550 ) 1551 1552 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1553 milli = expression.args.get("milli") 1554 if milli is not None: 1555 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1556 expression.set("nano", milli_to_nano) 1557 1558 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1559 1560 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1561 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1562 return self.func("TO_GEOGRAPHY", expression.this) 1563 if expression.is_type(exp.DataType.Type.GEOMETRY): 1564 return self.func("TO_GEOMETRY", expression.this) 1565 1566 return super().cast_sql(expression, safe_prefix=safe_prefix) 1567 1568 def trycast_sql(self, expression: exp.TryCast) -> str: 1569 value = expression.this 1570 1571 if value.type is None: 1572 from sqlglot.optimizer.annotate_types import annotate_types 1573 1574 value = annotate_types(value, dialect=self.dialect) 1575 1576 # Snowflake requires that TRY_CAST's value be a string 1577 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1578 # if we can deduce that the value is a string, then we can generate TRY_CAST 1579 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1580 return super().trycast_sql(expression) 1581 1582 return self.cast_sql(expression) 1583 1584 def log_sql(self, expression: exp.Log) -> str: 1585 if not expression.expression: 1586 return self.func("LN", expression.this) 1587 1588 return super().log_sql(expression) 1589 1590 def unnest_sql(self, expression: exp.Unnest) -> str: 1591 unnest_alias = expression.args.get("alias") 1592 offset = expression.args.get("offset") 1593 1594 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1595 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1596 1597 columns = [ 1598 exp.to_identifier("seq"), 1599 exp.to_identifier("key"), 1600 exp.to_identifier("path"), 1601 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1602 value, 1603 exp.to_identifier("this"), 1604 ] 1605 1606 if unnest_alias: 1607 unnest_alias.set("columns", columns) 1608 else: 1609 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1610 1611 table_input = self.sql(expression.expressions[0]) 1612 if not table_input.startswith("INPUT =>"): 1613 table_input = f"INPUT => {table_input}" 1614 1615 expression_parent = expression.parent 1616 1617 explode = ( 1618 f"FLATTEN({table_input})" 1619 if isinstance(expression_parent, exp.Lateral) 1620 else f"TABLE(FLATTEN({table_input}))" 1621 ) 1622 alias = self.sql(unnest_alias) 1623 alias = f" AS {alias}" if alias else "" 1624 value = ( 1625 "" 1626 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1627 else f"{value} FROM " 1628 ) 1629 1630 return f"{value}{explode}{alias}" 1631 1632 def show_sql(self, expression: exp.Show) -> str: 1633 terse = "TERSE " if expression.args.get("terse") else "" 1634 history = " HISTORY" if expression.args.get("history") else "" 1635 like = self.sql(expression, "like") 1636 like = f" LIKE {like}" if like else "" 1637 1638 scope = self.sql(expression, "scope") 1639 scope = f" {scope}" if scope else "" 1640 1641 scope_kind = self.sql(expression, "scope_kind") 1642 if scope_kind: 1643 scope_kind = f" IN {scope_kind}" 1644 1645 starts_with = self.sql(expression, "starts_with") 1646 if starts_with: 1647 starts_with = f" STARTS WITH {starts_with}" 1648 1649 limit = self.sql(expression, "limit") 1650 1651 from_ = self.sql(expression, "from") 1652 if from_: 1653 from_ = f" FROM {from_}" 1654 1655 privileges = self.expressions(expression, key="privileges", flat=True) 1656 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1657 1658 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1659 1660 def describe_sql(self, expression: exp.Describe) -> str: 1661 # Default to table if kind is unknown 1662 kind_value = expression.args.get("kind") or "TABLE" 1663 kind = f" {kind_value}" if kind_value else "" 1664 this = f" {self.sql(expression, 'this')}" 1665 expressions = self.expressions(expression, flat=True) 1666 expressions = f" {expressions}" if expressions else "" 1667 return f"DESCRIBE{kind}{this}{expressions}" 1668 1669 def generatedasidentitycolumnconstraint_sql( 1670 self, expression: exp.GeneratedAsIdentityColumnConstraint 1671 ) -> str: 1672 start = expression.args.get("start") 1673 start = f" START {start}" if start else "" 1674 increment = expression.args.get("increment") 1675 increment = f" INCREMENT {increment}" if increment else "" 1676 1677 order = expression.args.get("order") 1678 if order is not None: 1679 order_clause = " ORDER" if order else " NOORDER" 1680 else: 1681 order_clause = "" 1682 1683 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1684 1685 def cluster_sql(self, expression: exp.Cluster) -> str: 1686 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1687 1688 def struct_sql(self, expression: exp.Struct) -> str: 1689 if len(expression.expressions) == 1: 1690 arg = expression.expressions[0] 1691 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1692 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1693 return f"{{{self.sql(expression.expressions[0])}}}" 1694 1695 keys = [] 1696 values = [] 1697 1698 for i, e in enumerate(expression.expressions): 1699 if isinstance(e, exp.PropertyEQ): 1700 keys.append( 1701 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1702 ) 1703 values.append(e.expression) 1704 else: 1705 keys.append(exp.Literal.string(f"_{i}")) 1706 values.append(e) 1707 1708 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1709 1710 @unsupported_args("weight", "accuracy") 1711 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1712 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1713 1714 def alterset_sql(self, expression: exp.AlterSet) -> str: 1715 exprs = self.expressions(expression, flat=True) 1716 exprs = f" {exprs}" if exprs else "" 1717 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1718 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1719 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1720 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1721 tag = self.expressions(expression, key="tag", flat=True) 1722 tag = f" TAG {tag}" if tag else "" 1723 1724 return f"SET{exprs}{file_format}{copy_options}{tag}" 1725 1726 def strtotime_sql(self, expression: exp.StrToTime): 1727 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1728 return self.func( 1729 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1730 ) 1731 1732 def timestampsub_sql(self, expression: exp.TimestampSub): 1733 return self.sql( 1734 exp.TimestampAdd( 1735 this=expression.this, 1736 expression=expression.expression * -1, 1737 unit=expression.unit, 1738 ) 1739 ) 1740 1741 def jsonextract_sql(self, expression: exp.JSONExtract): 1742 this = expression.this 1743 1744 # JSON strings are valid coming from other dialects such as BQ so 1745 # for these cases we PARSE_JSON preemptively 1746 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1747 "requires_json" 1748 ): 1749 this = exp.ParseJSON(this=this) 1750 1751 return self.func( 1752 "GET_PATH", 1753 this, 1754 expression.expression, 1755 ) 1756 1757 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1758 this = expression.this 1759 if this.is_string: 1760 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1761 1762 return self.func("TO_CHAR", this, self.format_time(expression)) 1763 1764 def datesub_sql(self, expression: exp.DateSub) -> str: 1765 value = expression.expression 1766 if value: 1767 value.replace(value * (-1)) 1768 else: 1769 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1770 1771 return date_delta_sql("DATEADD")(self, expression) 1772 1773 def select_sql(self, expression: exp.Select) -> str: 1774 limit = expression.args.get("limit") 1775 offset = expression.args.get("offset") 1776 if offset and not limit: 1777 expression.limit(exp.Null(), copy=False) 1778 return super().select_sql(expression) 1779 1780 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1781 is_materialized = expression.find(exp.MaterializedProperty) 1782 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1783 1784 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1785 # For materialized views, COPY GRANTS is located *before* the columns list 1786 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1787 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1788 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1789 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1790 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1791 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1792 1793 this_name = self.sql(expression.this, "this") 1794 copy_grants = self.sql(copy_grants_property) 1795 this_schema = self.schema_columns_sql(expression.this) 1796 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1797 1798 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1799 1800 return super().createable_sql(expression, locations) 1801 1802 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1803 this = expression.this 1804 1805 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1806 # and add it later as part of the WITHIN GROUP clause 1807 order = this if isinstance(this, exp.Order) else None 1808 if order: 1809 expression.set("this", order.this.pop()) 1810 1811 expr_sql = super().arrayagg_sql(expression) 1812 1813 if order: 1814 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1815 1816 return expr_sql 1817 1818 def array_sql(self, expression: exp.Array) -> str: 1819 expressions = expression.expressions 1820 1821 first_expr = seq_get(expressions, 0) 1822 if isinstance(first_expr, exp.Select): 1823 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1824 if first_expr.text("kind").upper() == "STRUCT": 1825 object_construct_args = [] 1826 for expr in first_expr.expressions: 1827 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1828 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1829 name = expr.this if isinstance(expr, exp.Alias) else expr 1830 1831 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1832 1833 array_agg = exp.ArrayAgg( 1834 this=_build_object_construct(args=object_construct_args) 1835 ) 1836 1837 first_expr.set("kind", None) 1838 first_expr.set("expressions", [array_agg]) 1839 1840 return self.sql(first_expr.subquery()) 1841 1842 return inline_array_sql(self, expression) 1843 1844 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1845 zone = self.sql(expression, "this") 1846 if not zone: 1847 return super().currentdate_sql(expression) 1848 1849 expr = exp.Cast( 1850 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1851 to=exp.DataType(this=exp.DataType.Type.DATE), 1852 ) 1853 return self.sql(expr) 1854 1855 def dot_sql(self, expression: exp.Dot) -> str: 1856 this = expression.this 1857 1858 if not this.type: 1859 from sqlglot.optimizer.annotate_types import annotate_types 1860 1861 this = annotate_types(this, dialect=self.dialect) 1862 1863 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1864 # Generate colon notation for the top level STRUCT 1865 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1866 1867 return super().dot_sql(expression) 1868 1869 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 1870 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 1871 1872 def format_sql(self, expression: exp.Format) -> str: 1873 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 1874 return self.func("TO_CHAR", expression.expressions[0]) 1875 1876 return self.function_fallback_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1521 def datatype_sql(self, expression: exp.DataType) -> str: 1522 expressions = expression.expressions 1523 if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): 1524 for field_type in expressions: 1525 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1526 if isinstance(field_type, exp.DataType): 1527 return "OBJECT" 1528 if ( 1529 isinstance(field_type, exp.ColumnDef) 1530 and field_type.this 1531 and field_type.this.is_string 1532 ): 1533 # Doing OBJECT('foo' VARCHAR) is invalid snowflake Syntax. Moreover, besides 1534 # converting 'foo' into an identifier, we also need to quote it because these 1535 # keys are case-sensitive. For example: 1536 # 1537 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:x FROM t -- correct 1538 # WITH t AS (SELECT OBJECT_CONSTRUCT('x', 'y') AS c) SELECT c:X FROM t -- incorrect, returns NULL 1539 field_type.this.replace(exp.to_identifier(field_type.name, quoted=True)) 1540 1541 return super().datatype_sql(expression)
1552 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1553 milli = expression.args.get("milli") 1554 if milli is not None: 1555 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1556 expression.set("nano", milli_to_nano) 1557 1558 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1560 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1561 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1562 return self.func("TO_GEOGRAPHY", expression.this) 1563 if expression.is_type(exp.DataType.Type.GEOMETRY): 1564 return self.func("TO_GEOMETRY", expression.this) 1565 1566 return super().cast_sql(expression, safe_prefix=safe_prefix)
1568 def trycast_sql(self, expression: exp.TryCast) -> str: 1569 value = expression.this 1570 1571 if value.type is None: 1572 from sqlglot.optimizer.annotate_types import annotate_types 1573 1574 value = annotate_types(value, dialect=self.dialect) 1575 1576 # Snowflake requires that TRY_CAST's value be a string 1577 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1578 # if we can deduce that the value is a string, then we can generate TRY_CAST 1579 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1580 return super().trycast_sql(expression) 1581 1582 return self.cast_sql(expression)
1590 def unnest_sql(self, expression: exp.Unnest) -> str: 1591 unnest_alias = expression.args.get("alias") 1592 offset = expression.args.get("offset") 1593 1594 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1595 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1596 1597 columns = [ 1598 exp.to_identifier("seq"), 1599 exp.to_identifier("key"), 1600 exp.to_identifier("path"), 1601 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1602 value, 1603 exp.to_identifier("this"), 1604 ] 1605 1606 if unnest_alias: 1607 unnest_alias.set("columns", columns) 1608 else: 1609 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1610 1611 table_input = self.sql(expression.expressions[0]) 1612 if not table_input.startswith("INPUT =>"): 1613 table_input = f"INPUT => {table_input}" 1614 1615 expression_parent = expression.parent 1616 1617 explode = ( 1618 f"FLATTEN({table_input})" 1619 if isinstance(expression_parent, exp.Lateral) 1620 else f"TABLE(FLATTEN({table_input}))" 1621 ) 1622 alias = self.sql(unnest_alias) 1623 alias = f" AS {alias}" if alias else "" 1624 value = ( 1625 "" 1626 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1627 else f"{value} FROM " 1628 ) 1629 1630 return f"{value}{explode}{alias}"
1632 def show_sql(self, expression: exp.Show) -> str: 1633 terse = "TERSE " if expression.args.get("terse") else "" 1634 history = " HISTORY" if expression.args.get("history") else "" 1635 like = self.sql(expression, "like") 1636 like = f" LIKE {like}" if like else "" 1637 1638 scope = self.sql(expression, "scope") 1639 scope = f" {scope}" if scope else "" 1640 1641 scope_kind = self.sql(expression, "scope_kind") 1642 if scope_kind: 1643 scope_kind = f" IN {scope_kind}" 1644 1645 starts_with = self.sql(expression, "starts_with") 1646 if starts_with: 1647 starts_with = f" STARTS WITH {starts_with}" 1648 1649 limit = self.sql(expression, "limit") 1650 1651 from_ = self.sql(expression, "from") 1652 if from_: 1653 from_ = f" FROM {from_}" 1654 1655 privileges = self.expressions(expression, key="privileges", flat=True) 1656 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1657 1658 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1660 def describe_sql(self, expression: exp.Describe) -> str: 1661 # Default to table if kind is unknown 1662 kind_value = expression.args.get("kind") or "TABLE" 1663 kind = f" {kind_value}" if kind_value else "" 1664 this = f" {self.sql(expression, 'this')}" 1665 expressions = self.expressions(expression, flat=True) 1666 expressions = f" {expressions}" if expressions else "" 1667 return f"DESCRIBE{kind}{this}{expressions}"
1669 def generatedasidentitycolumnconstraint_sql( 1670 self, expression: exp.GeneratedAsIdentityColumnConstraint 1671 ) -> str: 1672 start = expression.args.get("start") 1673 start = f" START {start}" if start else "" 1674 increment = expression.args.get("increment") 1675 increment = f" INCREMENT {increment}" if increment else "" 1676 1677 order = expression.args.get("order") 1678 if order is not None: 1679 order_clause = " ORDER" if order else " NOORDER" 1680 else: 1681 order_clause = "" 1682 1683 return f"AUTOINCREMENT{start}{increment}{order_clause}"
1688 def struct_sql(self, expression: exp.Struct) -> str: 1689 if len(expression.expressions) == 1: 1690 arg = expression.expressions[0] 1691 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1692 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1693 return f"{{{self.sql(expression.expressions[0])}}}" 1694 1695 keys = [] 1696 values = [] 1697 1698 for i, e in enumerate(expression.expressions): 1699 if isinstance(e, exp.PropertyEQ): 1700 keys.append( 1701 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1702 ) 1703 values.append(e.expression) 1704 else: 1705 keys.append(exp.Literal.string(f"_{i}")) 1706 values.append(e) 1707 1708 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1714 def alterset_sql(self, expression: exp.AlterSet) -> str: 1715 exprs = self.expressions(expression, flat=True) 1716 exprs = f" {exprs}" if exprs else "" 1717 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1718 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1719 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1720 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1721 tag = self.expressions(expression, key="tag", flat=True) 1722 tag = f" TAG {tag}" if tag else "" 1723 1724 return f"SET{exprs}{file_format}{copy_options}{tag}"
1741 def jsonextract_sql(self, expression: exp.JSONExtract): 1742 this = expression.this 1743 1744 # JSON strings are valid coming from other dialects such as BQ so 1745 # for these cases we PARSE_JSON preemptively 1746 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1747 "requires_json" 1748 ): 1749 this = exp.ParseJSON(this=this) 1750 1751 return self.func( 1752 "GET_PATH", 1753 this, 1754 expression.expression, 1755 )
1764 def datesub_sql(self, expression: exp.DateSub) -> str: 1765 value = expression.expression 1766 if value: 1767 value.replace(value * (-1)) 1768 else: 1769 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1770 1771 return date_delta_sql("DATEADD")(self, expression)
1780 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1781 is_materialized = expression.find(exp.MaterializedProperty) 1782 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1783 1784 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1785 # For materialized views, COPY GRANTS is located *before* the columns list 1786 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1787 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1788 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1789 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1790 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1791 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1792 1793 this_name = self.sql(expression.this, "this") 1794 copy_grants = self.sql(copy_grants_property) 1795 this_schema = self.schema_columns_sql(expression.this) 1796 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1797 1798 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1799 1800 return super().createable_sql(expression, locations)
1802 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1803 this = expression.this 1804 1805 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1806 # and add it later as part of the WITHIN GROUP clause 1807 order = this if isinstance(this, exp.Order) else None 1808 if order: 1809 expression.set("this", order.this.pop()) 1810 1811 expr_sql = super().arrayagg_sql(expression) 1812 1813 if order: 1814 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1815 1816 return expr_sql
1818 def array_sql(self, expression: exp.Array) -> str: 1819 expressions = expression.expressions 1820 1821 first_expr = seq_get(expressions, 0) 1822 if isinstance(first_expr, exp.Select): 1823 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1824 if first_expr.text("kind").upper() == "STRUCT": 1825 object_construct_args = [] 1826 for expr in first_expr.expressions: 1827 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1828 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1829 name = expr.this if isinstance(expr, exp.Alias) else expr 1830 1831 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1832 1833 array_agg = exp.ArrayAgg( 1834 this=_build_object_construct(args=object_construct_args) 1835 ) 1836 1837 first_expr.set("kind", None) 1838 first_expr.set("expressions", [array_agg]) 1839 1840 return self.sql(first_expr.subquery()) 1841 1842 return inline_array_sql(self, expression)
1844 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1845 zone = self.sql(expression, "this") 1846 if not zone: 1847 return super().currentdate_sql(expression) 1848 1849 expr = exp.Cast( 1850 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1851 to=exp.DataType(this=exp.DataType.Type.DATE), 1852 ) 1853 return self.sql(expr)
1855 def dot_sql(self, expression: exp.Dot) -> str: 1856 this = expression.this 1857 1858 if not this.type: 1859 from sqlglot.optimizer.annotate_types import annotate_types 1860 1861 this = annotate_types(this, dialect=self.dialect) 1862 1863 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1864 # Generate colon notation for the top level STRUCT 1865 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1866 1867 return super().dot_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- UNICODE_SUBSTITUTE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql