sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 annotate_with_type_lambda, 10 build_timetostr_or_tochar, 11 binary_from_function, 12 build_default_decimal_type, 13 build_replace_with_optional_replacement, 14 build_timestamp_from_parts, 15 date_delta_sql, 16 date_trunc_to_time, 17 datestrtodate_sql, 18 build_formatted_time, 19 if_sql, 20 inline_array_sql, 21 max_or_greatest, 22 min_or_least, 23 rename_func, 24 timestamptrunc_sql, 25 timestrtotime_sql, 26 var_map_sql, 27 map_date_part, 28 no_timestamp_sql, 29 strposition_sql, 30 timestampdiff_sql, 31 no_make_interval_sql, 32 groupconcat_sql, 33) 34from sqlglot.generator import unsupported_args 35from sqlglot.helper import find_new_name, flatten, is_float, is_int, seq_get 36from sqlglot.optimizer.annotate_types import TypeAnnotator 37from sqlglot.optimizer.scope import build_scope, find_all_in_scope 38from sqlglot.tokens import TokenType 39 40if t.TYPE_CHECKING: 41 from sqlglot._typing import E, B 42 43 44# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 45def _build_datetime( 46 name: str, kind: exp.DataType.Type, safe: bool = False 47) -> t.Callable[[t.List], exp.Func]: 48 def _builder(args: t.List) -> exp.Func: 49 value = seq_get(args, 0) 50 scale_or_fmt = seq_get(args, 1) 51 52 int_value = value is not None and is_int(value.name) 53 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 54 55 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 56 # Converts calls like `TO_TIME('01:02:03')` into casts 57 if len(args) == 1 and value.is_string and not int_value: 58 return ( 59 exp.TryCast(this=value, to=exp.DataType.build(kind), requires_string=True) 60 if safe 61 else exp.cast(value, kind) 62 ) 63 64 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 65 # cases so we can transpile them, since they're relatively common 66 if kind == exp.DataType.Type.TIMESTAMP: 67 if not safe and (int_value or int_scale_or_fmt): 68 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 69 # it's not easily transpilable 70 return exp.UnixToTime(this=value, scale=scale_or_fmt) 71 if not int_scale_or_fmt and not is_float(value.name): 72 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 73 expr.set("safe", safe) 74 return expr 75 76 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 77 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 78 formatted_exp = build_formatted_time(klass, "snowflake")(args) 79 formatted_exp.set("safe", safe) 80 return formatted_exp 81 82 return exp.Anonymous(this=name, expressions=args) 83 84 return _builder 85 86 87def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 88 expression = parser.build_var_map(args) 89 90 if isinstance(expression, exp.StarMap): 91 return expression 92 93 return exp.Struct( 94 expressions=[ 95 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 96 ] 97 ) 98 99 100def _build_datediff(args: t.List) -> exp.DateDiff: 101 return exp.DateDiff( 102 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 103 ) 104 105 106def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 107 def _builder(args: t.List) -> E: 108 return expr_type( 109 this=seq_get(args, 2), 110 expression=seq_get(args, 1), 111 unit=map_date_part(seq_get(args, 0)), 112 ) 113 114 return _builder 115 116 117def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 118 def _builder(args: t.List) -> B | exp.Anonymous: 119 if len(args) == 3: 120 return exp.Anonymous(this=name, expressions=args) 121 122 return binary_from_function(expr_type)(args) 123 124 return _builder 125 126 127# https://docs.snowflake.com/en/sql-reference/functions/div0 128def _build_if_from_div0(args: t.List) -> exp.If: 129 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 130 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 131 132 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 133 exp.Is(this=lhs, expression=exp.null()).not_() 134 ) 135 true = exp.Literal.number(0) 136 false = exp.Div(this=lhs, expression=rhs) 137 return exp.If(this=cond, true=true, false=false) 138 139 140# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 141def _build_if_from_zeroifnull(args: t.List) -> exp.If: 142 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 143 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 144 145 146# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 147def _build_if_from_nullifzero(args: t.List) -> exp.If: 148 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 149 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 150 151 152def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 153 flag = expression.text("flag") 154 155 if "i" not in flag: 156 flag += "i" 157 158 return self.func( 159 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 160 ) 161 162 163def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 164 regexp_replace = exp.RegexpReplace.from_arg_list(args) 165 166 if not regexp_replace.args.get("replacement"): 167 regexp_replace.set("replacement", exp.Literal.string("")) 168 169 return regexp_replace 170 171 172def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 173 def _parse(self: Snowflake.Parser) -> exp.Show: 174 return self._parse_show_snowflake(*args, **kwargs) 175 176 return _parse 177 178 179def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 180 trunc = date_trunc_to_time(args) 181 trunc.set("unit", map_date_part(trunc.args["unit"])) 182 return trunc 183 184 185def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 186 """ 187 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 188 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 189 190 Example: 191 >>> from sqlglot import parse_one 192 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 193 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 194 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 195 """ 196 if isinstance(expression, exp.Pivot): 197 if expression.unpivot: 198 expression = transforms.unqualify_columns(expression) 199 else: 200 for field in expression.fields: 201 field_expr = seq_get(field.expressions if field else [], 0) 202 203 if isinstance(field_expr, exp.PivotAny): 204 unqualified_field_expr = transforms.unqualify_columns(field_expr) 205 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 206 207 return expression 208 209 210def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 211 assert isinstance(expression, exp.Create) 212 213 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 214 if expression.this in exp.DataType.NESTED_TYPES: 215 expression.set("expressions", None) 216 return expression 217 218 props = expression.args.get("properties") 219 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 220 for schema_expression in expression.this.expressions: 221 if isinstance(schema_expression, exp.ColumnDef): 222 column_type = schema_expression.kind 223 if isinstance(column_type, exp.DataType): 224 column_type.transform(_flatten_structured_type, copy=False) 225 226 return expression 227 228 229def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 230 generate_date_array = unnest.expressions[0] 231 start = generate_date_array.args.get("start") 232 end = generate_date_array.args.get("end") 233 step = generate_date_array.args.get("step") 234 235 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 236 return 237 238 unit = step.args.get("unit") 239 240 unnest_alias = unnest.args.get("alias") 241 if unnest_alias: 242 unnest_alias = unnest_alias.copy() 243 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 244 else: 245 sequence_value_name = "value" 246 247 # We'll add the next sequence value to the starting date and project the result 248 date_add = _build_date_time_add(exp.DateAdd)( 249 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 250 ) 251 252 # We use DATEDIFF to compute the number of sequence values needed 253 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 254 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 255 ) 256 257 unnest.set("expressions", [number_sequence]) 258 259 unnest_parent = unnest.parent 260 if isinstance(unnest_parent, exp.Join): 261 select = unnest_parent.parent 262 if isinstance(select, exp.Select): 263 replace_column_name = ( 264 sequence_value_name 265 if isinstance(sequence_value_name, str) 266 else sequence_value_name.name 267 ) 268 269 scope = build_scope(select) 270 if scope: 271 for column in scope.columns: 272 if column.name.lower() == replace_column_name.lower(): 273 column.replace( 274 date_add.as_(replace_column_name) 275 if isinstance(column.parent, exp.Select) 276 else date_add 277 ) 278 279 lateral = exp.Lateral(this=unnest_parent.this.pop()) 280 unnest_parent.replace(exp.Join(this=lateral)) 281 else: 282 unnest.replace( 283 exp.select(date_add.as_(sequence_value_name)) 284 .from_(unnest.copy()) 285 .subquery(unnest_alias) 286 ) 287 288 289def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 290 if isinstance(expression, exp.Select): 291 for generate_date_array in expression.find_all(exp.GenerateDateArray): 292 parent = generate_date_array.parent 293 294 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 295 # query is the following (it'll be unnested properly on the next iteration due to copy): 296 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 297 if not isinstance(parent, exp.Unnest): 298 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 299 generate_date_array.replace( 300 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 301 ) 302 303 if ( 304 isinstance(parent, exp.Unnest) 305 and isinstance(parent.parent, (exp.From, exp.Join)) 306 and len(parent.expressions) == 1 307 ): 308 _unnest_generate_date_array(parent) 309 310 return expression 311 312 313def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 314 def _builder(args: t.List) -> E: 315 return expr_type( 316 this=seq_get(args, 0), 317 expression=seq_get(args, 1), 318 position=seq_get(args, 2), 319 occurrence=seq_get(args, 3), 320 parameters=seq_get(args, 4), 321 group=seq_get(args, 5) or exp.Literal.number(0), 322 ) 323 324 return _builder 325 326 327def _build_like(expr_type: t.Type[E]) -> t.Callable[[t.List], E | exp.Escape]: 328 def _builder(args: t.List) -> E | exp.Escape: 329 like_expr = expr_type(this=args[0], expression=args[1]) 330 escape = seq_get(args, 2) 331 return exp.Escape(this=like_expr, expression=escape) if escape else like_expr 332 333 return _builder 334 335 336def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 337 # Other dialects don't support all of the following parameters, so we need to 338 # generate default values as necessary to ensure the transpilation is correct 339 group = expression.args.get("group") 340 341 # To avoid generating all these default values, we set group to None if 342 # it's 0 (also default value) which doesn't trigger the following chain 343 if group and group.name == "0": 344 group = None 345 346 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 347 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 348 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 349 350 return self.func( 351 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 352 expression.this, 353 expression.expression, 354 position, 355 occurrence, 356 parameters, 357 group, 358 ) 359 360 361def _json_extract_value_array_sql( 362 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 363) -> str: 364 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 365 ident = exp.to_identifier("x") 366 367 if isinstance(expression, exp.JSONValueArray): 368 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 369 else: 370 this = exp.ParseJSON(this=f"TO_JSON({ident})") 371 372 transform_lambda = exp.Lambda(expressions=[ident], this=this) 373 374 return self.func("TRANSFORM", json_extract, transform_lambda) 375 376 377def _qualify_unnested_columns(expression: exp.Expression) -> exp.Expression: 378 if isinstance(expression, exp.Select): 379 scope = build_scope(expression) 380 if not scope: 381 return expression 382 383 unnests = list(scope.find_all(exp.Unnest)) 384 385 if not unnests: 386 return expression 387 388 taken_source_names = set(scope.sources) 389 column_source: t.Dict[str, exp.Identifier] = {} 390 unnest_to_identifier: t.Dict[exp.Unnest, exp.Identifier] = {} 391 392 unnest_identifier: t.Optional[exp.Identifier] = None 393 orig_expression = expression.copy() 394 395 for unnest in unnests: 396 if not isinstance(unnest.parent, (exp.From, exp.Join)): 397 continue 398 399 # Try to infer column names produced by an unnest operator. This is only possible 400 # when we can peek into the (statically known) contents of the unnested value. 401 unnest_columns: t.Set[str] = set() 402 for unnest_expr in unnest.expressions: 403 if not isinstance(unnest_expr, exp.Array): 404 continue 405 406 for array_expr in unnest_expr.expressions: 407 if not ( 408 isinstance(array_expr, exp.Struct) 409 and array_expr.expressions 410 and all( 411 isinstance(struct_expr, exp.PropertyEQ) 412 for struct_expr in array_expr.expressions 413 ) 414 ): 415 continue 416 417 unnest_columns.update( 418 struct_expr.this.name.lower() for struct_expr in array_expr.expressions 419 ) 420 break 421 422 if unnest_columns: 423 break 424 425 unnest_alias = unnest.args.get("alias") 426 if not unnest_alias: 427 alias_name = find_new_name(taken_source_names, "value") 428 taken_source_names.add(alias_name) 429 430 # Produce a `TableAlias` AST similar to what is produced for BigQuery. This 431 # will be corrected later, when we generate SQL for the `Unnest` AST node. 432 aliased_unnest = exp.alias_(unnest, None, table=[alias_name]) 433 scope.replace(unnest, aliased_unnest) 434 435 unnest_identifier = aliased_unnest.args["alias"].columns[0] 436 else: 437 alias_columns = getattr(unnest_alias, "columns", []) 438 unnest_identifier = unnest_alias.this or seq_get(alias_columns, 0) 439 440 if not isinstance(unnest_identifier, exp.Identifier): 441 return orig_expression 442 443 unnest_to_identifier[unnest] = unnest_identifier 444 column_source.update({c.lower(): unnest_identifier for c in unnest_columns}) 445 446 for column in scope.columns: 447 if column.table: 448 continue 449 450 table = column_source.get(column.name.lower()) 451 if ( 452 unnest_identifier 453 and not table 454 and len(scope.sources) == 1 455 and column.name.lower() != unnest_identifier.name.lower() 456 ): 457 unnest_ancestor = column.find_ancestor(exp.Unnest, exp.Select) 458 ancestor_identifier = unnest_to_identifier.get(unnest_ancestor) 459 if ( 460 isinstance(unnest_ancestor, exp.Unnest) 461 and ancestor_identifier 462 and ancestor_identifier.name.lower() == unnest_identifier.name.lower() 463 ): 464 continue 465 466 table = unnest_identifier 467 468 column.set("table", table and table.copy()) 469 470 return expression 471 472 473def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 474 if isinstance(expression, exp.Select): 475 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 476 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 477 # by Snowflake's parser. 478 # 479 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 480 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 481 unnest_aliases = set() 482 for unnest in find_all_in_scope(expression, exp.Unnest): 483 unnest_alias = unnest.args.get("alias") 484 if ( 485 isinstance(unnest_alias, exp.TableAlias) 486 and not unnest_alias.this 487 and len(unnest_alias.columns) == 1 488 ): 489 unnest_aliases.add(unnest_alias.columns[0].name) 490 491 if unnest_aliases: 492 for c in find_all_in_scope(expression, exp.Column): 493 if c.table in unnest_aliases: 494 bracket_lhs = c.args["table"] 495 bracket_rhs = exp.Literal.string(c.name) 496 bracket = exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]) 497 498 if c.parent is expression: 499 # Retain column projection names by using aliases 500 c.replace(exp.alias_(bracket, c.this.copy())) 501 else: 502 c.replace(bracket) 503 504 return expression 505 506 507def _annotate_reverse(self: TypeAnnotator, expression: exp.Reverse) -> exp.Reverse: 508 expression = self._annotate_by_args(expression, "this") 509 if expression.is_type(exp.DataType.Type.NULL): 510 # Snowflake treats REVERSE(NULL) as a VARCHAR 511 self._set_type(expression, exp.DataType.Type.VARCHAR) 512 513 return expression 514 515 516class Snowflake(Dialect): 517 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 518 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 519 NULL_ORDERING = "nulls_are_large" 520 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 521 SUPPORTS_USER_DEFINED_TYPES = False 522 SUPPORTS_SEMI_ANTI_JOIN = False 523 PREFER_CTE_ALIAS_COLUMN = True 524 TABLESAMPLE_SIZE_IS_PERCENT = True 525 COPY_PARAMS_ARE_CSV = False 526 ARRAY_AGG_INCLUDES_NULLS = None 527 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 528 TRY_CAST_REQUIRES_STRING = True 529 530 TYPE_TO_EXPRESSIONS = { 531 **Dialect.TYPE_TO_EXPRESSIONS, 532 exp.DataType.Type.INT: { 533 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.INT], 534 exp.Ascii, 535 exp.Length, 536 exp.BitLength, 537 exp.Levenshtein, 538 }, 539 exp.DataType.Type.VARCHAR: { 540 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.VARCHAR], 541 exp.Base64DecodeString, 542 exp.Base64Encode, 543 exp.MD5, 544 exp.AIAgg, 545 exp.AIClassify, 546 exp.AISummarizeAgg, 547 exp.Chr, 548 exp.Collate, 549 exp.HexDecodeString, 550 exp.HexEncode, 551 exp.Initcap, 552 exp.RegexpExtract, 553 exp.RegexpReplace, 554 exp.Repeat, 555 exp.Replace, 556 exp.SHA, 557 exp.SHA2, 558 exp.Space, 559 exp.Uuid, 560 }, 561 exp.DataType.Type.BINARY: { 562 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BINARY], 563 exp.Base64DecodeBinary, 564 exp.Compress, 565 exp.MD5Digest, 566 exp.SHA1Digest, 567 exp.SHA2Digest, 568 exp.Unhex, 569 }, 570 exp.DataType.Type.BIGINT: { 571 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BIGINT], 572 exp.MD5NumberLower64, 573 exp.MD5NumberUpper64, 574 }, 575 exp.DataType.Type.ARRAY: { 576 exp.Split, 577 }, 578 } 579 580 ANNOTATORS = { 581 **Dialect.ANNOTATORS, 582 **{ 583 expr_type: annotate_with_type_lambda(data_type) 584 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 585 for expr_type in expressions 586 }, 587 **{ 588 expr_type: lambda self, e: self._annotate_by_args(e, "this") 589 for expr_type in ( 590 exp.Left, 591 exp.Right, 592 exp.Substring, 593 ) 594 }, 595 exp.ConcatWs: lambda self, e: self._annotate_by_args(e, "expressions"), 596 exp.Reverse: _annotate_reverse, 597 } 598 599 TIME_MAPPING = { 600 "YYYY": "%Y", 601 "yyyy": "%Y", 602 "YY": "%y", 603 "yy": "%y", 604 "MMMM": "%B", 605 "mmmm": "%B", 606 "MON": "%b", 607 "mon": "%b", 608 "MM": "%m", 609 "mm": "%m", 610 "DD": "%d", 611 "dd": "%-d", 612 "DY": "%a", 613 "dy": "%w", 614 "HH24": "%H", 615 "hh24": "%H", 616 "HH12": "%I", 617 "hh12": "%I", 618 "MI": "%M", 619 "mi": "%M", 620 "SS": "%S", 621 "ss": "%S", 622 "FF6": "%f", 623 "ff6": "%f", 624 } 625 626 DATE_PART_MAPPING = { 627 **Dialect.DATE_PART_MAPPING, 628 "ISOWEEK": "WEEKISO", 629 } 630 631 def quote_identifier(self, expression: E, identify: bool = True) -> E: 632 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 633 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 634 if ( 635 isinstance(expression, exp.Identifier) 636 and isinstance(expression.parent, exp.Table) 637 and expression.name.lower() == "dual" 638 ): 639 return expression # type: ignore 640 641 return super().quote_identifier(expression, identify=identify) 642 643 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 644 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 645 SINGLE_TOKENS.pop("$") 646 647 class Parser(parser.Parser): 648 IDENTIFY_PIVOT_STRINGS = True 649 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 650 COLON_IS_VARIANT_EXTRACT = True 651 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 652 653 ID_VAR_TOKENS = { 654 *parser.Parser.ID_VAR_TOKENS, 655 TokenType.EXCEPT, 656 TokenType.MATCH_CONDITION, 657 } 658 659 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 660 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 661 662 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 663 664 FUNCTIONS = { 665 **parser.Parser.FUNCTIONS, 666 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 667 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 668 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 669 this=seq_get(args, 1), expression=seq_get(args, 0) 670 ), 671 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 672 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 673 start=seq_get(args, 0), 674 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 675 step=seq_get(args, 2), 676 ), 677 "ARRAY_SORT": exp.SortArray.from_arg_list, 678 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 679 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 680 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 681 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 682 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 683 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 684 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 685 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 686 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 687 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 688 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 689 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 690 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 691 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 692 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 693 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 694 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 695 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 696 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 697 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 698 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 699 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 700 "DATE_TRUNC": _date_trunc_to_time, 701 "DATEADD": _build_date_time_add(exp.DateAdd), 702 "DATEDIFF": _build_datediff, 703 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 704 "DIV0": _build_if_from_div0, 705 "EDITDISTANCE": lambda args: exp.Levenshtein( 706 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 707 ), 708 "FLATTEN": exp.Explode.from_arg_list, 709 "GET": exp.GetExtract.from_arg_list, 710 "GET_PATH": lambda args, dialect: exp.JSONExtract( 711 this=seq_get(args, 0), 712 expression=dialect.to_json_path(seq_get(args, 1)), 713 requires_json=True, 714 ), 715 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 716 "IFF": exp.If.from_arg_list, 717 "MD5_HEX": exp.MD5.from_arg_list, 718 "MD5_BINARY": exp.MD5Digest.from_arg_list, 719 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 720 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 721 "LAST_DAY": lambda args: exp.LastDay( 722 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 723 ), 724 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 725 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 726 "NULLIFZERO": _build_if_from_nullifzero, 727 "OBJECT_CONSTRUCT": _build_object_construct, 728 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 729 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 730 "REGEXP_REPLACE": _build_regexp_replace, 731 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 732 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 733 "REPLACE": build_replace_with_optional_replacement, 734 "RLIKE": exp.RegexpLike.from_arg_list, 735 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 736 "SHA1_HEX": exp.SHA.from_arg_list, 737 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 738 "SHA2_HEX": exp.SHA2.from_arg_list, 739 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 740 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 741 "TIMEADD": _build_date_time_add(exp.TimeAdd), 742 "TIMEDIFF": _build_datediff, 743 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 744 "TIMESTAMPDIFF": _build_datediff, 745 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 746 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 747 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 748 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 749 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 750 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 751 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 752 "TRY_TO_TIMESTAMP": _build_datetime( 753 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 754 ), 755 "TO_CHAR": build_timetostr_or_tochar, 756 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 757 "TO_NUMBER": lambda args: exp.ToNumber( 758 this=seq_get(args, 0), 759 format=seq_get(args, 1), 760 precision=seq_get(args, 2), 761 scale=seq_get(args, 3), 762 ), 763 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 764 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 765 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 766 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 767 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 768 "TO_VARCHAR": build_timetostr_or_tochar, 769 "TO_JSON": exp.JSONFormat.from_arg_list, 770 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 771 "ZEROIFNULL": _build_if_from_zeroifnull, 772 "LIKE": _build_like(exp.Like), 773 "ILIKE": _build_like(exp.ILike), 774 } 775 FUNCTIONS.pop("PREDICT") 776 777 FUNCTION_PARSERS = { 778 **parser.Parser.FUNCTION_PARSERS, 779 "DATE_PART": lambda self: self._parse_date_part(), 780 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 781 "LISTAGG": lambda self: self._parse_string_agg(), 782 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 783 } 784 FUNCTION_PARSERS.pop("TRIM") 785 786 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 787 788 ALTER_PARSERS = { 789 **parser.Parser.ALTER_PARSERS, 790 "SESSION": lambda self: self._parse_alter_session(), 791 "UNSET": lambda self: self.expression( 792 exp.Set, 793 tag=self._match_text_seq("TAG"), 794 expressions=self._parse_csv(self._parse_id_var), 795 unset=True, 796 ), 797 } 798 799 STATEMENT_PARSERS = { 800 **parser.Parser.STATEMENT_PARSERS, 801 TokenType.GET: lambda self: self._parse_get(), 802 TokenType.PUT: lambda self: self._parse_put(), 803 TokenType.SHOW: lambda self: self._parse_show(), 804 } 805 806 PROPERTY_PARSERS = { 807 **parser.Parser.PROPERTY_PARSERS, 808 "CREDENTIALS": lambda self: self._parse_credentials_property(), 809 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 810 "LOCATION": lambda self: self._parse_location_property(), 811 "TAG": lambda self: self._parse_tag(), 812 "USING": lambda self: self._match_text_seq("TEMPLATE") 813 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 814 } 815 816 TYPE_CONVERTERS = { 817 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 818 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 819 } 820 821 SHOW_PARSERS = { 822 "DATABASES": _show_parser("DATABASES"), 823 "TERSE DATABASES": _show_parser("DATABASES"), 824 "SCHEMAS": _show_parser("SCHEMAS"), 825 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 826 "OBJECTS": _show_parser("OBJECTS"), 827 "TERSE OBJECTS": _show_parser("OBJECTS"), 828 "TABLES": _show_parser("TABLES"), 829 "TERSE TABLES": _show_parser("TABLES"), 830 "VIEWS": _show_parser("VIEWS"), 831 "TERSE VIEWS": _show_parser("VIEWS"), 832 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 833 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 834 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 835 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 836 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 837 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 838 "SEQUENCES": _show_parser("SEQUENCES"), 839 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 840 "STAGES": _show_parser("STAGES"), 841 "COLUMNS": _show_parser("COLUMNS"), 842 "USERS": _show_parser("USERS"), 843 "TERSE USERS": _show_parser("USERS"), 844 "FILE FORMATS": _show_parser("FILE FORMATS"), 845 "FUNCTIONS": _show_parser("FUNCTIONS"), 846 "PROCEDURES": _show_parser("PROCEDURES"), 847 "WAREHOUSES": _show_parser("WAREHOUSES"), 848 } 849 850 CONSTRAINT_PARSERS = { 851 **parser.Parser.CONSTRAINT_PARSERS, 852 "WITH": lambda self: self._parse_with_constraint(), 853 "MASKING": lambda self: self._parse_with_constraint(), 854 "PROJECTION": lambda self: self._parse_with_constraint(), 855 "TAG": lambda self: self._parse_with_constraint(), 856 } 857 858 STAGED_FILE_SINGLE_TOKENS = { 859 TokenType.DOT, 860 TokenType.MOD, 861 TokenType.SLASH, 862 } 863 864 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 865 866 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 867 868 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 869 870 LAMBDAS = { 871 **parser.Parser.LAMBDAS, 872 TokenType.ARROW: lambda self, expressions: self.expression( 873 exp.Lambda, 874 this=self._replace_lambda( 875 self._parse_assignment(), 876 expressions, 877 ), 878 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 879 ), 880 } 881 882 COLUMN_OPERATORS = { 883 **parser.Parser.COLUMN_OPERATORS, 884 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 885 exp.ModelAttribute, this=this, expression=attr 886 ), 887 } 888 889 def _parse_use(self) -> exp.Use: 890 if self._match_text_seq("SECONDARY", "ROLES"): 891 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 892 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 893 return self.expression( 894 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 895 ) 896 897 return super()._parse_use() 898 899 def _negate_range( 900 self, this: t.Optional[exp.Expression] = None 901 ) -> t.Optional[exp.Expression]: 902 if not this: 903 return this 904 905 query = this.args.get("query") 906 if isinstance(this, exp.In) and isinstance(query, exp.Query): 907 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 908 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 909 # which can produce different results (most likely a SnowFlake bug). 910 # 911 # https://docs.snowflake.com/en/sql-reference/functions/in 912 # Context: https://github.com/tobymao/sqlglot/issues/3890 913 return self.expression( 914 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 915 ) 916 917 return self.expression(exp.Not, this=this) 918 919 def _parse_tag(self) -> exp.Tags: 920 return self.expression( 921 exp.Tags, 922 expressions=self._parse_wrapped_csv(self._parse_property), 923 ) 924 925 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 926 if self._prev.token_type != TokenType.WITH: 927 self._retreat(self._index - 1) 928 929 if self._match_text_seq("MASKING", "POLICY"): 930 policy = self._parse_column() 931 return self.expression( 932 exp.MaskingPolicyColumnConstraint, 933 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 934 expressions=self._match(TokenType.USING) 935 and self._parse_wrapped_csv(self._parse_id_var), 936 ) 937 if self._match_text_seq("PROJECTION", "POLICY"): 938 policy = self._parse_column() 939 return self.expression( 940 exp.ProjectionPolicyColumnConstraint, 941 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 942 ) 943 if self._match(TokenType.TAG): 944 return self._parse_tag() 945 946 return None 947 948 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 949 if self._match(TokenType.TAG): 950 return self._parse_tag() 951 952 return super()._parse_with_property() 953 954 def _parse_create(self) -> exp.Create | exp.Command: 955 expression = super()._parse_create() 956 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 957 # Replace the Table node with the enclosed Identifier 958 expression.this.replace(expression.this.this) 959 960 return expression 961 962 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 963 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 964 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 965 this = self._parse_var() or self._parse_type() 966 967 if not this: 968 return None 969 970 self._match(TokenType.COMMA) 971 expression = self._parse_bitwise() 972 this = map_date_part(this) 973 name = this.name.upper() 974 975 if name.startswith("EPOCH"): 976 if name == "EPOCH_MILLISECOND": 977 scale = 10**3 978 elif name == "EPOCH_MICROSECOND": 979 scale = 10**6 980 elif name == "EPOCH_NANOSECOND": 981 scale = 10**9 982 else: 983 scale = None 984 985 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 986 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 987 988 if scale: 989 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 990 991 return to_unix 992 993 return self.expression(exp.Extract, this=this, expression=expression) 994 995 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 996 if is_map: 997 # Keys are strings in Snowflake's objects, see also: 998 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 999 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1000 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1001 1002 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1003 1004 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1005 lateral = super()._parse_lateral() 1006 if not lateral: 1007 return lateral 1008 1009 if isinstance(lateral.this, exp.Explode): 1010 table_alias = lateral.args.get("alias") 1011 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1012 if table_alias and not table_alias.args.get("columns"): 1013 table_alias.set("columns", columns) 1014 elif not table_alias: 1015 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1016 1017 return lateral 1018 1019 def _parse_table_parts( 1020 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1021 ) -> exp.Table: 1022 # https://docs.snowflake.com/en/user-guide/querying-stage 1023 if self._match(TokenType.STRING, advance=False): 1024 table = self._parse_string() 1025 elif self._match_text_seq("@", advance=False): 1026 table = self._parse_location_path() 1027 else: 1028 table = None 1029 1030 if table: 1031 file_format = None 1032 pattern = None 1033 1034 wrapped = self._match(TokenType.L_PAREN) 1035 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1036 if self._match_text_seq("FILE_FORMAT", "=>"): 1037 file_format = self._parse_string() or super()._parse_table_parts( 1038 is_db_reference=is_db_reference 1039 ) 1040 elif self._match_text_seq("PATTERN", "=>"): 1041 pattern = self._parse_string() 1042 else: 1043 break 1044 1045 self._match(TokenType.COMMA) 1046 1047 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1048 else: 1049 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1050 1051 return table 1052 1053 def _parse_table( 1054 self, 1055 schema: bool = False, 1056 joins: bool = False, 1057 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1058 parse_bracket: bool = False, 1059 is_db_reference: bool = False, 1060 parse_partition: bool = False, 1061 consume_pipe: bool = False, 1062 ) -> t.Optional[exp.Expression]: 1063 table = super()._parse_table( 1064 schema=schema, 1065 joins=joins, 1066 alias_tokens=alias_tokens, 1067 parse_bracket=parse_bracket, 1068 is_db_reference=is_db_reference, 1069 parse_partition=parse_partition, 1070 ) 1071 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1072 table_from_rows = table.this 1073 for arg in exp.TableFromRows.arg_types: 1074 if arg != "this": 1075 table_from_rows.set(arg, table.args.get(arg)) 1076 1077 table = table_from_rows 1078 1079 return table 1080 1081 def _parse_id_var( 1082 self, 1083 any_token: bool = True, 1084 tokens: t.Optional[t.Collection[TokenType]] = None, 1085 ) -> t.Optional[exp.Expression]: 1086 if self._match_text_seq("IDENTIFIER", "("): 1087 identifier = ( 1088 super()._parse_id_var(any_token=any_token, tokens=tokens) 1089 or self._parse_string() 1090 ) 1091 self._match_r_paren() 1092 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1093 1094 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1095 1096 def _parse_show_snowflake(self, this: str) -> exp.Show: 1097 scope = None 1098 scope_kind = None 1099 1100 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1101 # which is syntactically valid but has no effect on the output 1102 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1103 1104 history = self._match_text_seq("HISTORY") 1105 1106 like = self._parse_string() if self._match(TokenType.LIKE) else None 1107 1108 if self._match(TokenType.IN): 1109 if self._match_text_seq("ACCOUNT"): 1110 scope_kind = "ACCOUNT" 1111 elif self._match_text_seq("CLASS"): 1112 scope_kind = "CLASS" 1113 scope = self._parse_table_parts() 1114 elif self._match_text_seq("APPLICATION"): 1115 scope_kind = "APPLICATION" 1116 if self._match_text_seq("PACKAGE"): 1117 scope_kind += " PACKAGE" 1118 scope = self._parse_table_parts() 1119 elif self._match_set(self.DB_CREATABLES): 1120 scope_kind = self._prev.text.upper() 1121 if self._curr: 1122 scope = self._parse_table_parts() 1123 elif self._curr: 1124 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1125 scope = self._parse_table_parts() 1126 1127 return self.expression( 1128 exp.Show, 1129 **{ 1130 "terse": terse, 1131 "this": this, 1132 "history": history, 1133 "like": like, 1134 "scope": scope, 1135 "scope_kind": scope_kind, 1136 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1137 "limit": self._parse_limit(), 1138 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1139 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1140 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1141 }, 1142 ) 1143 1144 def _parse_put(self) -> exp.Put | exp.Command: 1145 if self._curr.token_type != TokenType.STRING: 1146 return self._parse_as_command(self._prev) 1147 1148 return self.expression( 1149 exp.Put, 1150 this=self._parse_string(), 1151 target=self._parse_location_path(), 1152 properties=self._parse_properties(), 1153 ) 1154 1155 def _parse_get(self) -> t.Optional[exp.Expression]: 1156 start = self._prev 1157 1158 # If we detect GET( then we need to parse a function, not a statement 1159 if self._match(TokenType.L_PAREN): 1160 self._retreat(self._index - 2) 1161 return self._parse_expression() 1162 1163 target = self._parse_location_path() 1164 1165 # Parse as command if unquoted file path 1166 if self._curr.token_type == TokenType.URI_START: 1167 return self._parse_as_command(start) 1168 1169 return self.expression( 1170 exp.Get, 1171 this=self._parse_string(), 1172 target=target, 1173 properties=self._parse_properties(), 1174 ) 1175 1176 def _parse_location_property(self) -> exp.LocationProperty: 1177 self._match(TokenType.EQ) 1178 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1179 1180 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1181 # Parse either a subquery or a staged file 1182 return ( 1183 self._parse_select(table=True, parse_subquery_alias=False) 1184 if self._match(TokenType.L_PAREN, advance=False) 1185 else self._parse_table_parts() 1186 ) 1187 1188 def _parse_location_path(self) -> exp.Var: 1189 start = self._curr 1190 self._advance_any(ignore_reserved=True) 1191 1192 # We avoid consuming a comma token because external tables like @foo and @bar 1193 # can be joined in a query with a comma separator, as well as closing paren 1194 # in case of subqueries 1195 while self._is_connected() and not self._match_set( 1196 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1197 ): 1198 self._advance_any(ignore_reserved=True) 1199 1200 return exp.var(self._find_sql(start, self._prev)) 1201 1202 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1203 this = super()._parse_lambda_arg() 1204 1205 if not this: 1206 return this 1207 1208 typ = self._parse_types() 1209 1210 if typ: 1211 return self.expression(exp.Cast, this=this, to=typ) 1212 1213 return this 1214 1215 def _parse_foreign_key(self) -> exp.ForeignKey: 1216 # inlineFK, the REFERENCES columns are implied 1217 if self._match(TokenType.REFERENCES, advance=False): 1218 return self.expression(exp.ForeignKey) 1219 1220 # outoflineFK, explicitly names the columns 1221 return super()._parse_foreign_key() 1222 1223 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1224 self._match(TokenType.EQ) 1225 if self._match(TokenType.L_PAREN, advance=False): 1226 expressions = self._parse_wrapped_options() 1227 else: 1228 expressions = [self._parse_format_name()] 1229 1230 return self.expression( 1231 exp.FileFormatProperty, 1232 expressions=expressions, 1233 ) 1234 1235 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1236 return self.expression( 1237 exp.CredentialsProperty, 1238 expressions=self._parse_wrapped_options(), 1239 ) 1240 1241 def _parse_semantic_view(self) -> exp.SemanticView: 1242 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1243 1244 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1245 if self._match_text_seq("DIMENSIONS"): 1246 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1247 if self._match_text_seq("METRICS"): 1248 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1249 if self._match_text_seq("WHERE"): 1250 kwargs["where"] = self._parse_expression() 1251 1252 return self.expression(exp.SemanticView, **kwargs) 1253 1254 class Tokenizer(tokens.Tokenizer): 1255 STRING_ESCAPES = ["\\", "'"] 1256 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1257 RAW_STRINGS = ["$$"] 1258 COMMENTS = ["--", "//", ("/*", "*/")] 1259 NESTED_COMMENTS = False 1260 1261 KEYWORDS = { 1262 **tokens.Tokenizer.KEYWORDS, 1263 "BYTEINT": TokenType.INT, 1264 "FILE://": TokenType.URI_START, 1265 "FILE FORMAT": TokenType.FILE_FORMAT, 1266 "GET": TokenType.GET, 1267 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1268 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1269 "MINUS": TokenType.EXCEPT, 1270 "NCHAR VARYING": TokenType.VARCHAR, 1271 "PUT": TokenType.PUT, 1272 "REMOVE": TokenType.COMMAND, 1273 "RM": TokenType.COMMAND, 1274 "SAMPLE": TokenType.TABLE_SAMPLE, 1275 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1276 "SQL_DOUBLE": TokenType.DOUBLE, 1277 "SQL_VARCHAR": TokenType.VARCHAR, 1278 "STAGE": TokenType.STAGE, 1279 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1280 "STREAMLIT": TokenType.STREAMLIT, 1281 "TAG": TokenType.TAG, 1282 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1283 "TOP": TokenType.TOP, 1284 "WAREHOUSE": TokenType.WAREHOUSE, 1285 } 1286 KEYWORDS.pop("/*+") 1287 1288 SINGLE_TOKENS = { 1289 **tokens.Tokenizer.SINGLE_TOKENS, 1290 "$": TokenType.PARAMETER, 1291 "!": TokenType.EXCLAMATION, 1292 } 1293 1294 VAR_SINGLE_TOKENS = {"$"} 1295 1296 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1297 1298 class Generator(generator.Generator): 1299 PARAMETER_TOKEN = "$" 1300 MATCHED_BY_SOURCE = False 1301 SINGLE_STRING_INTERVAL = True 1302 JOIN_HINTS = False 1303 TABLE_HINTS = False 1304 QUERY_HINTS = False 1305 AGGREGATE_FILTER_SUPPORTED = False 1306 SUPPORTS_TABLE_COPY = False 1307 COLLATE_IS_FUNC = True 1308 LIMIT_ONLY_LITERALS = True 1309 JSON_KEY_VALUE_PAIR_SEP = "," 1310 INSERT_OVERWRITE = " OVERWRITE INTO" 1311 STRUCT_DELIMITER = ("(", ")") 1312 COPY_PARAMS_ARE_WRAPPED = False 1313 COPY_PARAMS_EQ_REQUIRED = True 1314 STAR_EXCEPT = "EXCLUDE" 1315 SUPPORTS_EXPLODING_PROJECTIONS = False 1316 ARRAY_CONCAT_IS_VAR_LEN = False 1317 SUPPORTS_CONVERT_TIMEZONE = True 1318 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1319 SUPPORTS_MEDIAN = True 1320 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1321 SUPPORTS_DECODE_CASE = True 1322 IS_BOOL_ALLOWED = False 1323 1324 TRANSFORMS = { 1325 **generator.Generator.TRANSFORMS, 1326 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1327 exp.ArgMax: rename_func("MAX_BY"), 1328 exp.ArgMin: rename_func("MIN_BY"), 1329 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1330 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1331 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1332 exp.AtTimeZone: lambda self, e: self.func( 1333 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1334 ), 1335 exp.BitwiseOr: rename_func("BITOR"), 1336 exp.BitwiseXor: rename_func("BITXOR"), 1337 exp.BitwiseAnd: rename_func("BITAND"), 1338 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1339 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1340 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1341 exp.BitwiseNot: rename_func("BITNOT"), 1342 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1343 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1344 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1345 exp.DateAdd: date_delta_sql("DATEADD"), 1346 exp.DateDiff: date_delta_sql("DATEDIFF"), 1347 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1348 exp.DatetimeDiff: timestampdiff_sql, 1349 exp.DateStrToDate: datestrtodate_sql, 1350 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1351 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1352 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1353 exp.DayOfYear: rename_func("DAYOFYEAR"), 1354 exp.Explode: rename_func("FLATTEN"), 1355 exp.Extract: lambda self, e: self.func( 1356 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1357 ), 1358 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1359 exp.FileFormatProperty: lambda self, 1360 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1361 exp.FromTimeZone: lambda self, e: self.func( 1362 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1363 ), 1364 exp.GenerateSeries: lambda self, e: self.func( 1365 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1366 ), 1367 exp.GetExtract: rename_func("GET"), 1368 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1369 exp.If: if_sql(name="IFF", false_value="NULL"), 1370 exp.JSONExtractArray: _json_extract_value_array_sql, 1371 exp.JSONExtractScalar: lambda self, e: self.func( 1372 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1373 ), 1374 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1375 exp.JSONPathRoot: lambda *_: "", 1376 exp.JSONValueArray: _json_extract_value_array_sql, 1377 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1378 rename_func("EDITDISTANCE") 1379 ), 1380 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1381 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1382 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1383 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1384 exp.MakeInterval: no_make_interval_sql, 1385 exp.Max: max_or_greatest, 1386 exp.Min: min_or_least, 1387 exp.ParseJSON: lambda self, e: self.func( 1388 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1389 ), 1390 exp.JSONFormat: rename_func("TO_JSON"), 1391 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1392 exp.PercentileCont: transforms.preprocess( 1393 [transforms.add_within_group_for_percentiles] 1394 ), 1395 exp.PercentileDisc: transforms.preprocess( 1396 [transforms.add_within_group_for_percentiles] 1397 ), 1398 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1399 exp.RegexpExtract: _regexpextract_sql, 1400 exp.RegexpExtractAll: _regexpextract_sql, 1401 exp.RegexpILike: _regexpilike_sql, 1402 exp.Rand: rename_func("RANDOM"), 1403 exp.Select: transforms.preprocess( 1404 [ 1405 transforms.eliminate_window_clause, 1406 transforms.eliminate_distinct_on, 1407 transforms.explode_projection_to_unnest(), 1408 transforms.eliminate_semi_and_anti_joins, 1409 _transform_generate_date_array, 1410 _qualify_unnested_columns, 1411 _eliminate_dot_variant_lookup, 1412 ] 1413 ), 1414 exp.SHA: rename_func("SHA1"), 1415 exp.MD5Digest: rename_func("MD5_BINARY"), 1416 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1417 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1418 exp.LowerHex: rename_func("TO_CHAR"), 1419 exp.SortArray: rename_func("ARRAY_SORT"), 1420 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1421 exp.StartsWith: rename_func("STARTSWITH"), 1422 exp.EndsWith: rename_func("ENDSWITH"), 1423 exp.StrPosition: lambda self, e: strposition_sql( 1424 self, e, func_name="CHARINDEX", supports_position=True 1425 ), 1426 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1427 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1428 exp.Stuff: rename_func("INSERT"), 1429 exp.StPoint: rename_func("ST_MAKEPOINT"), 1430 exp.TimeAdd: date_delta_sql("TIMEADD"), 1431 exp.Timestamp: no_timestamp_sql, 1432 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1433 exp.TimestampDiff: lambda self, e: self.func( 1434 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1435 ), 1436 exp.TimestampTrunc: timestamptrunc_sql(), 1437 exp.TimeStrToTime: timestrtotime_sql, 1438 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1439 exp.ToArray: rename_func("TO_ARRAY"), 1440 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1441 exp.ToDouble: rename_func("TO_DOUBLE"), 1442 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1443 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1444 exp.TsOrDsToDate: lambda self, e: self.func( 1445 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1446 ), 1447 exp.TsOrDsToTime: lambda self, e: self.func( 1448 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1449 ), 1450 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1451 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1452 exp.Uuid: rename_func("UUID_STRING"), 1453 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1454 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1455 exp.Xor: rename_func("BOOLXOR"), 1456 exp.ByteLength: rename_func("OCTET_LENGTH"), 1457 } 1458 1459 SUPPORTED_JSON_PATH_PARTS = { 1460 exp.JSONPathKey, 1461 exp.JSONPathRoot, 1462 exp.JSONPathSubscript, 1463 } 1464 1465 TYPE_MAPPING = { 1466 **generator.Generator.TYPE_MAPPING, 1467 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1468 exp.DataType.Type.NESTED: "OBJECT", 1469 exp.DataType.Type.STRUCT: "OBJECT", 1470 exp.DataType.Type.TEXT: "VARCHAR", 1471 } 1472 1473 TOKEN_MAPPING = { 1474 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1475 } 1476 1477 PROPERTIES_LOCATION = { 1478 **generator.Generator.PROPERTIES_LOCATION, 1479 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1480 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1481 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1482 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1483 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1484 } 1485 1486 UNSUPPORTED_VALUES_EXPRESSIONS = { 1487 exp.Map, 1488 exp.StarMap, 1489 exp.Struct, 1490 exp.VarMap, 1491 } 1492 1493 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1494 1495 def with_properties(self, properties: exp.Properties) -> str: 1496 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1497 1498 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1499 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1500 values_as_table = False 1501 1502 return super().values_sql(expression, values_as_table=values_as_table) 1503 1504 def datatype_sql(self, expression: exp.DataType) -> str: 1505 expressions = expression.expressions 1506 if ( 1507 expressions 1508 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1509 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1510 ): 1511 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1512 return "OBJECT" 1513 1514 return super().datatype_sql(expression) 1515 1516 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1517 return self.func( 1518 "TO_NUMBER", 1519 expression.this, 1520 expression.args.get("format"), 1521 expression.args.get("precision"), 1522 expression.args.get("scale"), 1523 ) 1524 1525 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1526 milli = expression.args.get("milli") 1527 if milli is not None: 1528 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1529 expression.set("nano", milli_to_nano) 1530 1531 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1532 1533 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1534 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1535 return self.func("TO_GEOGRAPHY", expression.this) 1536 if expression.is_type(exp.DataType.Type.GEOMETRY): 1537 return self.func("TO_GEOMETRY", expression.this) 1538 1539 return super().cast_sql(expression, safe_prefix=safe_prefix) 1540 1541 def trycast_sql(self, expression: exp.TryCast) -> str: 1542 value = expression.this 1543 1544 if value.type is None: 1545 from sqlglot.optimizer.annotate_types import annotate_types 1546 1547 value = annotate_types(value, dialect=self.dialect) 1548 1549 # Snowflake requires that TRY_CAST's value be a string 1550 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1551 # if we can deduce that the value is a string, then we can generate TRY_CAST 1552 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1553 return super().trycast_sql(expression) 1554 1555 return self.cast_sql(expression) 1556 1557 def log_sql(self, expression: exp.Log) -> str: 1558 if not expression.expression: 1559 return self.func("LN", expression.this) 1560 1561 return super().log_sql(expression) 1562 1563 def unnest_sql(self, expression: exp.Unnest) -> str: 1564 unnest_alias = expression.args.get("alias") 1565 offset = expression.args.get("offset") 1566 1567 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1568 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1569 1570 columns = [ 1571 exp.to_identifier("seq"), 1572 exp.to_identifier("key"), 1573 exp.to_identifier("path"), 1574 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1575 value, 1576 exp.to_identifier("this"), 1577 ] 1578 1579 if unnest_alias: 1580 unnest_alias.set("columns", columns) 1581 else: 1582 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1583 1584 table_input = self.sql(expression.expressions[0]) 1585 if not table_input.startswith("INPUT =>"): 1586 table_input = f"INPUT => {table_input}" 1587 1588 expression_parent = expression.parent 1589 1590 explode = ( 1591 f"FLATTEN({table_input})" 1592 if isinstance(expression_parent, exp.Lateral) 1593 else f"TABLE(FLATTEN({table_input}))" 1594 ) 1595 alias = self.sql(unnest_alias) 1596 alias = f" AS {alias}" if alias else "" 1597 value = ( 1598 "" 1599 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1600 else f"{value} FROM " 1601 ) 1602 1603 return f"{value}{explode}{alias}" 1604 1605 def show_sql(self, expression: exp.Show) -> str: 1606 terse = "TERSE " if expression.args.get("terse") else "" 1607 history = " HISTORY" if expression.args.get("history") else "" 1608 like = self.sql(expression, "like") 1609 like = f" LIKE {like}" if like else "" 1610 1611 scope = self.sql(expression, "scope") 1612 scope = f" {scope}" if scope else "" 1613 1614 scope_kind = self.sql(expression, "scope_kind") 1615 if scope_kind: 1616 scope_kind = f" IN {scope_kind}" 1617 1618 starts_with = self.sql(expression, "starts_with") 1619 if starts_with: 1620 starts_with = f" STARTS WITH {starts_with}" 1621 1622 limit = self.sql(expression, "limit") 1623 1624 from_ = self.sql(expression, "from") 1625 if from_: 1626 from_ = f" FROM {from_}" 1627 1628 privileges = self.expressions(expression, key="privileges", flat=True) 1629 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1630 1631 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1632 1633 def describe_sql(self, expression: exp.Describe) -> str: 1634 # Default to table if kind is unknown 1635 kind_value = expression.args.get("kind") or "TABLE" 1636 kind = f" {kind_value}" if kind_value else "" 1637 this = f" {self.sql(expression, 'this')}" 1638 expressions = self.expressions(expression, flat=True) 1639 expressions = f" {expressions}" if expressions else "" 1640 return f"DESCRIBE{kind}{this}{expressions}" 1641 1642 def generatedasidentitycolumnconstraint_sql( 1643 self, expression: exp.GeneratedAsIdentityColumnConstraint 1644 ) -> str: 1645 start = expression.args.get("start") 1646 start = f" START {start}" if start else "" 1647 increment = expression.args.get("increment") 1648 increment = f" INCREMENT {increment}" if increment else "" 1649 1650 order = expression.args.get("order") 1651 if order is not None: 1652 order_clause = " ORDER" if order else " NOORDER" 1653 else: 1654 order_clause = "" 1655 1656 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1657 1658 def cluster_sql(self, expression: exp.Cluster) -> str: 1659 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1660 1661 def struct_sql(self, expression: exp.Struct) -> str: 1662 if len(expression.expressions) == 1: 1663 arg = expression.expressions[0] 1664 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1665 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1666 return f"{{{self.sql(expression.expressions[0])}}}" 1667 1668 keys = [] 1669 values = [] 1670 1671 for i, e in enumerate(expression.expressions): 1672 if isinstance(e, exp.PropertyEQ): 1673 keys.append( 1674 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1675 ) 1676 values.append(e.expression) 1677 else: 1678 keys.append(exp.Literal.string(f"_{i}")) 1679 values.append(e) 1680 1681 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1682 1683 @unsupported_args("weight", "accuracy") 1684 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1685 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1686 1687 def alterset_sql(self, expression: exp.AlterSet) -> str: 1688 exprs = self.expressions(expression, flat=True) 1689 exprs = f" {exprs}" if exprs else "" 1690 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1691 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1692 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1693 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1694 tag = self.expressions(expression, key="tag", flat=True) 1695 tag = f" TAG {tag}" if tag else "" 1696 1697 return f"SET{exprs}{file_format}{copy_options}{tag}" 1698 1699 def strtotime_sql(self, expression: exp.StrToTime): 1700 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1701 return self.func( 1702 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1703 ) 1704 1705 def timestampsub_sql(self, expression: exp.TimestampSub): 1706 return self.sql( 1707 exp.TimestampAdd( 1708 this=expression.this, 1709 expression=expression.expression * -1, 1710 unit=expression.unit, 1711 ) 1712 ) 1713 1714 def jsonextract_sql(self, expression: exp.JSONExtract): 1715 this = expression.this 1716 1717 # JSON strings are valid coming from other dialects such as BQ so 1718 # for these cases we PARSE_JSON preemptively 1719 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1720 "requires_json" 1721 ): 1722 this = exp.ParseJSON(this=this) 1723 1724 return self.func( 1725 "GET_PATH", 1726 this, 1727 expression.expression, 1728 ) 1729 1730 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1731 this = expression.this 1732 if this.is_string: 1733 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1734 1735 return self.func("TO_CHAR", this, self.format_time(expression)) 1736 1737 def datesub_sql(self, expression: exp.DateSub) -> str: 1738 value = expression.expression 1739 if value: 1740 value.replace(value * (-1)) 1741 else: 1742 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1743 1744 return date_delta_sql("DATEADD")(self, expression) 1745 1746 def select_sql(self, expression: exp.Select) -> str: 1747 limit = expression.args.get("limit") 1748 offset = expression.args.get("offset") 1749 if offset and not limit: 1750 expression.limit(exp.Null(), copy=False) 1751 return super().select_sql(expression) 1752 1753 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1754 is_materialized = expression.find(exp.MaterializedProperty) 1755 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1756 1757 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1758 # For materialized views, COPY GRANTS is located *before* the columns list 1759 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1760 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1761 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1762 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1763 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1764 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1765 1766 this_name = self.sql(expression.this, "this") 1767 copy_grants = self.sql(copy_grants_property) 1768 this_schema = self.schema_columns_sql(expression.this) 1769 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1770 1771 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1772 1773 return super().createable_sql(expression, locations) 1774 1775 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1776 this = expression.this 1777 1778 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1779 # and add it later as part of the WITHIN GROUP clause 1780 order = this if isinstance(this, exp.Order) else None 1781 if order: 1782 expression.set("this", order.this.pop()) 1783 1784 expr_sql = super().arrayagg_sql(expression) 1785 1786 if order: 1787 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1788 1789 return expr_sql 1790 1791 def array_sql(self, expression: exp.Array) -> str: 1792 expressions = expression.expressions 1793 1794 first_expr = seq_get(expressions, 0) 1795 if isinstance(first_expr, exp.Select): 1796 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1797 if first_expr.text("kind").upper() == "STRUCT": 1798 object_construct_args = [] 1799 for expr in first_expr.expressions: 1800 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1801 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1802 name = expr.this if isinstance(expr, exp.Alias) else expr 1803 1804 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1805 1806 array_agg = exp.ArrayAgg( 1807 this=_build_object_construct(args=object_construct_args) 1808 ) 1809 1810 first_expr.set("kind", None) 1811 first_expr.set("expressions", [array_agg]) 1812 1813 return self.sql(first_expr.subquery()) 1814 1815 return inline_array_sql(self, expression) 1816 1817 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1818 zone = self.sql(expression, "this") 1819 if not zone: 1820 return super().currentdate_sql(expression) 1821 1822 expr = exp.Cast( 1823 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1824 to=exp.DataType(this=exp.DataType.Type.DATE), 1825 ) 1826 return self.sql(expr) 1827 1828 def dot_sql(self, expression: exp.Dot) -> str: 1829 this = expression.this 1830 1831 if not this.type: 1832 from sqlglot.optimizer.annotate_types import annotate_types 1833 1834 this = annotate_types(this, dialect=self.dialect) 1835 1836 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1837 # Generate colon notation for the top level STRUCT 1838 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1839 1840 return super().dot_sql(expression) 1841 1842 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 1843 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 1844 1845 def format_sql(self, expression: exp.Format) -> str: 1846 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 1847 return self.func("TO_CHAR", expression.expressions[0]) 1848 1849 return self.function_fallback_sql(expression)
517class Snowflake(Dialect): 518 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 519 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 520 NULL_ORDERING = "nulls_are_large" 521 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 522 SUPPORTS_USER_DEFINED_TYPES = False 523 SUPPORTS_SEMI_ANTI_JOIN = False 524 PREFER_CTE_ALIAS_COLUMN = True 525 TABLESAMPLE_SIZE_IS_PERCENT = True 526 COPY_PARAMS_ARE_CSV = False 527 ARRAY_AGG_INCLUDES_NULLS = None 528 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 529 TRY_CAST_REQUIRES_STRING = True 530 531 TYPE_TO_EXPRESSIONS = { 532 **Dialect.TYPE_TO_EXPRESSIONS, 533 exp.DataType.Type.INT: { 534 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.INT], 535 exp.Ascii, 536 exp.Length, 537 exp.BitLength, 538 exp.Levenshtein, 539 }, 540 exp.DataType.Type.VARCHAR: { 541 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.VARCHAR], 542 exp.Base64DecodeString, 543 exp.Base64Encode, 544 exp.MD5, 545 exp.AIAgg, 546 exp.AIClassify, 547 exp.AISummarizeAgg, 548 exp.Chr, 549 exp.Collate, 550 exp.HexDecodeString, 551 exp.HexEncode, 552 exp.Initcap, 553 exp.RegexpExtract, 554 exp.RegexpReplace, 555 exp.Repeat, 556 exp.Replace, 557 exp.SHA, 558 exp.SHA2, 559 exp.Space, 560 exp.Uuid, 561 }, 562 exp.DataType.Type.BINARY: { 563 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BINARY], 564 exp.Base64DecodeBinary, 565 exp.Compress, 566 exp.MD5Digest, 567 exp.SHA1Digest, 568 exp.SHA2Digest, 569 exp.Unhex, 570 }, 571 exp.DataType.Type.BIGINT: { 572 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BIGINT], 573 exp.MD5NumberLower64, 574 exp.MD5NumberUpper64, 575 }, 576 exp.DataType.Type.ARRAY: { 577 exp.Split, 578 }, 579 } 580 581 ANNOTATORS = { 582 **Dialect.ANNOTATORS, 583 **{ 584 expr_type: annotate_with_type_lambda(data_type) 585 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 586 for expr_type in expressions 587 }, 588 **{ 589 expr_type: lambda self, e: self._annotate_by_args(e, "this") 590 for expr_type in ( 591 exp.Left, 592 exp.Right, 593 exp.Substring, 594 ) 595 }, 596 exp.ConcatWs: lambda self, e: self._annotate_by_args(e, "expressions"), 597 exp.Reverse: _annotate_reverse, 598 } 599 600 TIME_MAPPING = { 601 "YYYY": "%Y", 602 "yyyy": "%Y", 603 "YY": "%y", 604 "yy": "%y", 605 "MMMM": "%B", 606 "mmmm": "%B", 607 "MON": "%b", 608 "mon": "%b", 609 "MM": "%m", 610 "mm": "%m", 611 "DD": "%d", 612 "dd": "%-d", 613 "DY": "%a", 614 "dy": "%w", 615 "HH24": "%H", 616 "hh24": "%H", 617 "HH12": "%I", 618 "hh12": "%I", 619 "MI": "%M", 620 "mi": "%M", 621 "SS": "%S", 622 "ss": "%S", 623 "FF6": "%f", 624 "ff6": "%f", 625 } 626 627 DATE_PART_MAPPING = { 628 **Dialect.DATE_PART_MAPPING, 629 "ISOWEEK": "WEEKISO", 630 } 631 632 def quote_identifier(self, expression: E, identify: bool = True) -> E: 633 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 634 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 635 if ( 636 isinstance(expression, exp.Identifier) 637 and isinstance(expression.parent, exp.Table) 638 and expression.name.lower() == "dual" 639 ): 640 return expression # type: ignore 641 642 return super().quote_identifier(expression, identify=identify) 643 644 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 645 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 646 SINGLE_TOKENS.pop("$") 647 648 class Parser(parser.Parser): 649 IDENTIFY_PIVOT_STRINGS = True 650 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 651 COLON_IS_VARIANT_EXTRACT = True 652 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 653 654 ID_VAR_TOKENS = { 655 *parser.Parser.ID_VAR_TOKENS, 656 TokenType.EXCEPT, 657 TokenType.MATCH_CONDITION, 658 } 659 660 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 661 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 662 663 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 664 665 FUNCTIONS = { 666 **parser.Parser.FUNCTIONS, 667 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 668 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 669 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 670 this=seq_get(args, 1), expression=seq_get(args, 0) 671 ), 672 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 673 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 674 start=seq_get(args, 0), 675 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 676 step=seq_get(args, 2), 677 ), 678 "ARRAY_SORT": exp.SortArray.from_arg_list, 679 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 680 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 681 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 682 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 683 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 684 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 685 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 686 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 687 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 688 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 689 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 690 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 691 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 692 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 693 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 694 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 695 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 696 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 697 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 698 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 699 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 700 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 701 "DATE_TRUNC": _date_trunc_to_time, 702 "DATEADD": _build_date_time_add(exp.DateAdd), 703 "DATEDIFF": _build_datediff, 704 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 705 "DIV0": _build_if_from_div0, 706 "EDITDISTANCE": lambda args: exp.Levenshtein( 707 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 708 ), 709 "FLATTEN": exp.Explode.from_arg_list, 710 "GET": exp.GetExtract.from_arg_list, 711 "GET_PATH": lambda args, dialect: exp.JSONExtract( 712 this=seq_get(args, 0), 713 expression=dialect.to_json_path(seq_get(args, 1)), 714 requires_json=True, 715 ), 716 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 717 "IFF": exp.If.from_arg_list, 718 "MD5_HEX": exp.MD5.from_arg_list, 719 "MD5_BINARY": exp.MD5Digest.from_arg_list, 720 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 721 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 722 "LAST_DAY": lambda args: exp.LastDay( 723 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 724 ), 725 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 726 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 727 "NULLIFZERO": _build_if_from_nullifzero, 728 "OBJECT_CONSTRUCT": _build_object_construct, 729 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 730 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 731 "REGEXP_REPLACE": _build_regexp_replace, 732 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 733 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 734 "REPLACE": build_replace_with_optional_replacement, 735 "RLIKE": exp.RegexpLike.from_arg_list, 736 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 737 "SHA1_HEX": exp.SHA.from_arg_list, 738 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 739 "SHA2_HEX": exp.SHA2.from_arg_list, 740 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 741 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 742 "TIMEADD": _build_date_time_add(exp.TimeAdd), 743 "TIMEDIFF": _build_datediff, 744 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 745 "TIMESTAMPDIFF": _build_datediff, 746 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 747 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 748 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 749 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 750 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 751 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 752 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 753 "TRY_TO_TIMESTAMP": _build_datetime( 754 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 755 ), 756 "TO_CHAR": build_timetostr_or_tochar, 757 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 758 "TO_NUMBER": lambda args: exp.ToNumber( 759 this=seq_get(args, 0), 760 format=seq_get(args, 1), 761 precision=seq_get(args, 2), 762 scale=seq_get(args, 3), 763 ), 764 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 765 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 766 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 767 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 768 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 769 "TO_VARCHAR": build_timetostr_or_tochar, 770 "TO_JSON": exp.JSONFormat.from_arg_list, 771 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 772 "ZEROIFNULL": _build_if_from_zeroifnull, 773 "LIKE": _build_like(exp.Like), 774 "ILIKE": _build_like(exp.ILike), 775 } 776 FUNCTIONS.pop("PREDICT") 777 778 FUNCTION_PARSERS = { 779 **parser.Parser.FUNCTION_PARSERS, 780 "DATE_PART": lambda self: self._parse_date_part(), 781 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 782 "LISTAGG": lambda self: self._parse_string_agg(), 783 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 784 } 785 FUNCTION_PARSERS.pop("TRIM") 786 787 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 788 789 ALTER_PARSERS = { 790 **parser.Parser.ALTER_PARSERS, 791 "SESSION": lambda self: self._parse_alter_session(), 792 "UNSET": lambda self: self.expression( 793 exp.Set, 794 tag=self._match_text_seq("TAG"), 795 expressions=self._parse_csv(self._parse_id_var), 796 unset=True, 797 ), 798 } 799 800 STATEMENT_PARSERS = { 801 **parser.Parser.STATEMENT_PARSERS, 802 TokenType.GET: lambda self: self._parse_get(), 803 TokenType.PUT: lambda self: self._parse_put(), 804 TokenType.SHOW: lambda self: self._parse_show(), 805 } 806 807 PROPERTY_PARSERS = { 808 **parser.Parser.PROPERTY_PARSERS, 809 "CREDENTIALS": lambda self: self._parse_credentials_property(), 810 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 811 "LOCATION": lambda self: self._parse_location_property(), 812 "TAG": lambda self: self._parse_tag(), 813 "USING": lambda self: self._match_text_seq("TEMPLATE") 814 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 815 } 816 817 TYPE_CONVERTERS = { 818 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 819 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 820 } 821 822 SHOW_PARSERS = { 823 "DATABASES": _show_parser("DATABASES"), 824 "TERSE DATABASES": _show_parser("DATABASES"), 825 "SCHEMAS": _show_parser("SCHEMAS"), 826 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 827 "OBJECTS": _show_parser("OBJECTS"), 828 "TERSE OBJECTS": _show_parser("OBJECTS"), 829 "TABLES": _show_parser("TABLES"), 830 "TERSE TABLES": _show_parser("TABLES"), 831 "VIEWS": _show_parser("VIEWS"), 832 "TERSE VIEWS": _show_parser("VIEWS"), 833 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 834 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 835 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 836 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 837 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 838 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 839 "SEQUENCES": _show_parser("SEQUENCES"), 840 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 841 "STAGES": _show_parser("STAGES"), 842 "COLUMNS": _show_parser("COLUMNS"), 843 "USERS": _show_parser("USERS"), 844 "TERSE USERS": _show_parser("USERS"), 845 "FILE FORMATS": _show_parser("FILE FORMATS"), 846 "FUNCTIONS": _show_parser("FUNCTIONS"), 847 "PROCEDURES": _show_parser("PROCEDURES"), 848 "WAREHOUSES": _show_parser("WAREHOUSES"), 849 } 850 851 CONSTRAINT_PARSERS = { 852 **parser.Parser.CONSTRAINT_PARSERS, 853 "WITH": lambda self: self._parse_with_constraint(), 854 "MASKING": lambda self: self._parse_with_constraint(), 855 "PROJECTION": lambda self: self._parse_with_constraint(), 856 "TAG": lambda self: self._parse_with_constraint(), 857 } 858 859 STAGED_FILE_SINGLE_TOKENS = { 860 TokenType.DOT, 861 TokenType.MOD, 862 TokenType.SLASH, 863 } 864 865 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 866 867 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 868 869 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 870 871 LAMBDAS = { 872 **parser.Parser.LAMBDAS, 873 TokenType.ARROW: lambda self, expressions: self.expression( 874 exp.Lambda, 875 this=self._replace_lambda( 876 self._parse_assignment(), 877 expressions, 878 ), 879 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 880 ), 881 } 882 883 COLUMN_OPERATORS = { 884 **parser.Parser.COLUMN_OPERATORS, 885 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 886 exp.ModelAttribute, this=this, expression=attr 887 ), 888 } 889 890 def _parse_use(self) -> exp.Use: 891 if self._match_text_seq("SECONDARY", "ROLES"): 892 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 893 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 894 return self.expression( 895 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 896 ) 897 898 return super()._parse_use() 899 900 def _negate_range( 901 self, this: t.Optional[exp.Expression] = None 902 ) -> t.Optional[exp.Expression]: 903 if not this: 904 return this 905 906 query = this.args.get("query") 907 if isinstance(this, exp.In) and isinstance(query, exp.Query): 908 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 909 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 910 # which can produce different results (most likely a SnowFlake bug). 911 # 912 # https://docs.snowflake.com/en/sql-reference/functions/in 913 # Context: https://github.com/tobymao/sqlglot/issues/3890 914 return self.expression( 915 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 916 ) 917 918 return self.expression(exp.Not, this=this) 919 920 def _parse_tag(self) -> exp.Tags: 921 return self.expression( 922 exp.Tags, 923 expressions=self._parse_wrapped_csv(self._parse_property), 924 ) 925 926 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 927 if self._prev.token_type != TokenType.WITH: 928 self._retreat(self._index - 1) 929 930 if self._match_text_seq("MASKING", "POLICY"): 931 policy = self._parse_column() 932 return self.expression( 933 exp.MaskingPolicyColumnConstraint, 934 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 935 expressions=self._match(TokenType.USING) 936 and self._parse_wrapped_csv(self._parse_id_var), 937 ) 938 if self._match_text_seq("PROJECTION", "POLICY"): 939 policy = self._parse_column() 940 return self.expression( 941 exp.ProjectionPolicyColumnConstraint, 942 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 943 ) 944 if self._match(TokenType.TAG): 945 return self._parse_tag() 946 947 return None 948 949 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 950 if self._match(TokenType.TAG): 951 return self._parse_tag() 952 953 return super()._parse_with_property() 954 955 def _parse_create(self) -> exp.Create | exp.Command: 956 expression = super()._parse_create() 957 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 958 # Replace the Table node with the enclosed Identifier 959 expression.this.replace(expression.this.this) 960 961 return expression 962 963 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 964 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 965 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 966 this = self._parse_var() or self._parse_type() 967 968 if not this: 969 return None 970 971 self._match(TokenType.COMMA) 972 expression = self._parse_bitwise() 973 this = map_date_part(this) 974 name = this.name.upper() 975 976 if name.startswith("EPOCH"): 977 if name == "EPOCH_MILLISECOND": 978 scale = 10**3 979 elif name == "EPOCH_MICROSECOND": 980 scale = 10**6 981 elif name == "EPOCH_NANOSECOND": 982 scale = 10**9 983 else: 984 scale = None 985 986 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 987 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 988 989 if scale: 990 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 991 992 return to_unix 993 994 return self.expression(exp.Extract, this=this, expression=expression) 995 996 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 997 if is_map: 998 # Keys are strings in Snowflake's objects, see also: 999 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1000 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1001 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1002 1003 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1004 1005 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1006 lateral = super()._parse_lateral() 1007 if not lateral: 1008 return lateral 1009 1010 if isinstance(lateral.this, exp.Explode): 1011 table_alias = lateral.args.get("alias") 1012 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1013 if table_alias and not table_alias.args.get("columns"): 1014 table_alias.set("columns", columns) 1015 elif not table_alias: 1016 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1017 1018 return lateral 1019 1020 def _parse_table_parts( 1021 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1022 ) -> exp.Table: 1023 # https://docs.snowflake.com/en/user-guide/querying-stage 1024 if self._match(TokenType.STRING, advance=False): 1025 table = self._parse_string() 1026 elif self._match_text_seq("@", advance=False): 1027 table = self._parse_location_path() 1028 else: 1029 table = None 1030 1031 if table: 1032 file_format = None 1033 pattern = None 1034 1035 wrapped = self._match(TokenType.L_PAREN) 1036 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1037 if self._match_text_seq("FILE_FORMAT", "=>"): 1038 file_format = self._parse_string() or super()._parse_table_parts( 1039 is_db_reference=is_db_reference 1040 ) 1041 elif self._match_text_seq("PATTERN", "=>"): 1042 pattern = self._parse_string() 1043 else: 1044 break 1045 1046 self._match(TokenType.COMMA) 1047 1048 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1049 else: 1050 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1051 1052 return table 1053 1054 def _parse_table( 1055 self, 1056 schema: bool = False, 1057 joins: bool = False, 1058 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1059 parse_bracket: bool = False, 1060 is_db_reference: bool = False, 1061 parse_partition: bool = False, 1062 consume_pipe: bool = False, 1063 ) -> t.Optional[exp.Expression]: 1064 table = super()._parse_table( 1065 schema=schema, 1066 joins=joins, 1067 alias_tokens=alias_tokens, 1068 parse_bracket=parse_bracket, 1069 is_db_reference=is_db_reference, 1070 parse_partition=parse_partition, 1071 ) 1072 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1073 table_from_rows = table.this 1074 for arg in exp.TableFromRows.arg_types: 1075 if arg != "this": 1076 table_from_rows.set(arg, table.args.get(arg)) 1077 1078 table = table_from_rows 1079 1080 return table 1081 1082 def _parse_id_var( 1083 self, 1084 any_token: bool = True, 1085 tokens: t.Optional[t.Collection[TokenType]] = None, 1086 ) -> t.Optional[exp.Expression]: 1087 if self._match_text_seq("IDENTIFIER", "("): 1088 identifier = ( 1089 super()._parse_id_var(any_token=any_token, tokens=tokens) 1090 or self._parse_string() 1091 ) 1092 self._match_r_paren() 1093 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1094 1095 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1096 1097 def _parse_show_snowflake(self, this: str) -> exp.Show: 1098 scope = None 1099 scope_kind = None 1100 1101 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1102 # which is syntactically valid but has no effect on the output 1103 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1104 1105 history = self._match_text_seq("HISTORY") 1106 1107 like = self._parse_string() if self._match(TokenType.LIKE) else None 1108 1109 if self._match(TokenType.IN): 1110 if self._match_text_seq("ACCOUNT"): 1111 scope_kind = "ACCOUNT" 1112 elif self._match_text_seq("CLASS"): 1113 scope_kind = "CLASS" 1114 scope = self._parse_table_parts() 1115 elif self._match_text_seq("APPLICATION"): 1116 scope_kind = "APPLICATION" 1117 if self._match_text_seq("PACKAGE"): 1118 scope_kind += " PACKAGE" 1119 scope = self._parse_table_parts() 1120 elif self._match_set(self.DB_CREATABLES): 1121 scope_kind = self._prev.text.upper() 1122 if self._curr: 1123 scope = self._parse_table_parts() 1124 elif self._curr: 1125 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1126 scope = self._parse_table_parts() 1127 1128 return self.expression( 1129 exp.Show, 1130 **{ 1131 "terse": terse, 1132 "this": this, 1133 "history": history, 1134 "like": like, 1135 "scope": scope, 1136 "scope_kind": scope_kind, 1137 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1138 "limit": self._parse_limit(), 1139 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1140 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1141 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1142 }, 1143 ) 1144 1145 def _parse_put(self) -> exp.Put | exp.Command: 1146 if self._curr.token_type != TokenType.STRING: 1147 return self._parse_as_command(self._prev) 1148 1149 return self.expression( 1150 exp.Put, 1151 this=self._parse_string(), 1152 target=self._parse_location_path(), 1153 properties=self._parse_properties(), 1154 ) 1155 1156 def _parse_get(self) -> t.Optional[exp.Expression]: 1157 start = self._prev 1158 1159 # If we detect GET( then we need to parse a function, not a statement 1160 if self._match(TokenType.L_PAREN): 1161 self._retreat(self._index - 2) 1162 return self._parse_expression() 1163 1164 target = self._parse_location_path() 1165 1166 # Parse as command if unquoted file path 1167 if self._curr.token_type == TokenType.URI_START: 1168 return self._parse_as_command(start) 1169 1170 return self.expression( 1171 exp.Get, 1172 this=self._parse_string(), 1173 target=target, 1174 properties=self._parse_properties(), 1175 ) 1176 1177 def _parse_location_property(self) -> exp.LocationProperty: 1178 self._match(TokenType.EQ) 1179 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1180 1181 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1182 # Parse either a subquery or a staged file 1183 return ( 1184 self._parse_select(table=True, parse_subquery_alias=False) 1185 if self._match(TokenType.L_PAREN, advance=False) 1186 else self._parse_table_parts() 1187 ) 1188 1189 def _parse_location_path(self) -> exp.Var: 1190 start = self._curr 1191 self._advance_any(ignore_reserved=True) 1192 1193 # We avoid consuming a comma token because external tables like @foo and @bar 1194 # can be joined in a query with a comma separator, as well as closing paren 1195 # in case of subqueries 1196 while self._is_connected() and not self._match_set( 1197 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1198 ): 1199 self._advance_any(ignore_reserved=True) 1200 1201 return exp.var(self._find_sql(start, self._prev)) 1202 1203 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1204 this = super()._parse_lambda_arg() 1205 1206 if not this: 1207 return this 1208 1209 typ = self._parse_types() 1210 1211 if typ: 1212 return self.expression(exp.Cast, this=this, to=typ) 1213 1214 return this 1215 1216 def _parse_foreign_key(self) -> exp.ForeignKey: 1217 # inlineFK, the REFERENCES columns are implied 1218 if self._match(TokenType.REFERENCES, advance=False): 1219 return self.expression(exp.ForeignKey) 1220 1221 # outoflineFK, explicitly names the columns 1222 return super()._parse_foreign_key() 1223 1224 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1225 self._match(TokenType.EQ) 1226 if self._match(TokenType.L_PAREN, advance=False): 1227 expressions = self._parse_wrapped_options() 1228 else: 1229 expressions = [self._parse_format_name()] 1230 1231 return self.expression( 1232 exp.FileFormatProperty, 1233 expressions=expressions, 1234 ) 1235 1236 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1237 return self.expression( 1238 exp.CredentialsProperty, 1239 expressions=self._parse_wrapped_options(), 1240 ) 1241 1242 def _parse_semantic_view(self) -> exp.SemanticView: 1243 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1244 1245 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1246 if self._match_text_seq("DIMENSIONS"): 1247 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1248 if self._match_text_seq("METRICS"): 1249 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1250 if self._match_text_seq("WHERE"): 1251 kwargs["where"] = self._parse_expression() 1252 1253 return self.expression(exp.SemanticView, **kwargs) 1254 1255 class Tokenizer(tokens.Tokenizer): 1256 STRING_ESCAPES = ["\\", "'"] 1257 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1258 RAW_STRINGS = ["$$"] 1259 COMMENTS = ["--", "//", ("/*", "*/")] 1260 NESTED_COMMENTS = False 1261 1262 KEYWORDS = { 1263 **tokens.Tokenizer.KEYWORDS, 1264 "BYTEINT": TokenType.INT, 1265 "FILE://": TokenType.URI_START, 1266 "FILE FORMAT": TokenType.FILE_FORMAT, 1267 "GET": TokenType.GET, 1268 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1269 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1270 "MINUS": TokenType.EXCEPT, 1271 "NCHAR VARYING": TokenType.VARCHAR, 1272 "PUT": TokenType.PUT, 1273 "REMOVE": TokenType.COMMAND, 1274 "RM": TokenType.COMMAND, 1275 "SAMPLE": TokenType.TABLE_SAMPLE, 1276 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1277 "SQL_DOUBLE": TokenType.DOUBLE, 1278 "SQL_VARCHAR": TokenType.VARCHAR, 1279 "STAGE": TokenType.STAGE, 1280 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1281 "STREAMLIT": TokenType.STREAMLIT, 1282 "TAG": TokenType.TAG, 1283 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1284 "TOP": TokenType.TOP, 1285 "WAREHOUSE": TokenType.WAREHOUSE, 1286 } 1287 KEYWORDS.pop("/*+") 1288 1289 SINGLE_TOKENS = { 1290 **tokens.Tokenizer.SINGLE_TOKENS, 1291 "$": TokenType.PARAMETER, 1292 "!": TokenType.EXCLAMATION, 1293 } 1294 1295 VAR_SINGLE_TOKENS = {"$"} 1296 1297 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1298 1299 class Generator(generator.Generator): 1300 PARAMETER_TOKEN = "$" 1301 MATCHED_BY_SOURCE = False 1302 SINGLE_STRING_INTERVAL = True 1303 JOIN_HINTS = False 1304 TABLE_HINTS = False 1305 QUERY_HINTS = False 1306 AGGREGATE_FILTER_SUPPORTED = False 1307 SUPPORTS_TABLE_COPY = False 1308 COLLATE_IS_FUNC = True 1309 LIMIT_ONLY_LITERALS = True 1310 JSON_KEY_VALUE_PAIR_SEP = "," 1311 INSERT_OVERWRITE = " OVERWRITE INTO" 1312 STRUCT_DELIMITER = ("(", ")") 1313 COPY_PARAMS_ARE_WRAPPED = False 1314 COPY_PARAMS_EQ_REQUIRED = True 1315 STAR_EXCEPT = "EXCLUDE" 1316 SUPPORTS_EXPLODING_PROJECTIONS = False 1317 ARRAY_CONCAT_IS_VAR_LEN = False 1318 SUPPORTS_CONVERT_TIMEZONE = True 1319 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1320 SUPPORTS_MEDIAN = True 1321 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1322 SUPPORTS_DECODE_CASE = True 1323 IS_BOOL_ALLOWED = False 1324 1325 TRANSFORMS = { 1326 **generator.Generator.TRANSFORMS, 1327 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1328 exp.ArgMax: rename_func("MAX_BY"), 1329 exp.ArgMin: rename_func("MIN_BY"), 1330 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1331 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1332 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1333 exp.AtTimeZone: lambda self, e: self.func( 1334 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1335 ), 1336 exp.BitwiseOr: rename_func("BITOR"), 1337 exp.BitwiseXor: rename_func("BITXOR"), 1338 exp.BitwiseAnd: rename_func("BITAND"), 1339 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1340 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1341 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1342 exp.BitwiseNot: rename_func("BITNOT"), 1343 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1344 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1345 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1346 exp.DateAdd: date_delta_sql("DATEADD"), 1347 exp.DateDiff: date_delta_sql("DATEDIFF"), 1348 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1349 exp.DatetimeDiff: timestampdiff_sql, 1350 exp.DateStrToDate: datestrtodate_sql, 1351 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1352 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1353 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1354 exp.DayOfYear: rename_func("DAYOFYEAR"), 1355 exp.Explode: rename_func("FLATTEN"), 1356 exp.Extract: lambda self, e: self.func( 1357 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1358 ), 1359 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1360 exp.FileFormatProperty: lambda self, 1361 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1362 exp.FromTimeZone: lambda self, e: self.func( 1363 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1364 ), 1365 exp.GenerateSeries: lambda self, e: self.func( 1366 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1367 ), 1368 exp.GetExtract: rename_func("GET"), 1369 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1370 exp.If: if_sql(name="IFF", false_value="NULL"), 1371 exp.JSONExtractArray: _json_extract_value_array_sql, 1372 exp.JSONExtractScalar: lambda self, e: self.func( 1373 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1374 ), 1375 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1376 exp.JSONPathRoot: lambda *_: "", 1377 exp.JSONValueArray: _json_extract_value_array_sql, 1378 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1379 rename_func("EDITDISTANCE") 1380 ), 1381 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1382 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1383 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1384 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1385 exp.MakeInterval: no_make_interval_sql, 1386 exp.Max: max_or_greatest, 1387 exp.Min: min_or_least, 1388 exp.ParseJSON: lambda self, e: self.func( 1389 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1390 ), 1391 exp.JSONFormat: rename_func("TO_JSON"), 1392 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1393 exp.PercentileCont: transforms.preprocess( 1394 [transforms.add_within_group_for_percentiles] 1395 ), 1396 exp.PercentileDisc: transforms.preprocess( 1397 [transforms.add_within_group_for_percentiles] 1398 ), 1399 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1400 exp.RegexpExtract: _regexpextract_sql, 1401 exp.RegexpExtractAll: _regexpextract_sql, 1402 exp.RegexpILike: _regexpilike_sql, 1403 exp.Rand: rename_func("RANDOM"), 1404 exp.Select: transforms.preprocess( 1405 [ 1406 transforms.eliminate_window_clause, 1407 transforms.eliminate_distinct_on, 1408 transforms.explode_projection_to_unnest(), 1409 transforms.eliminate_semi_and_anti_joins, 1410 _transform_generate_date_array, 1411 _qualify_unnested_columns, 1412 _eliminate_dot_variant_lookup, 1413 ] 1414 ), 1415 exp.SHA: rename_func("SHA1"), 1416 exp.MD5Digest: rename_func("MD5_BINARY"), 1417 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1418 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1419 exp.LowerHex: rename_func("TO_CHAR"), 1420 exp.SortArray: rename_func("ARRAY_SORT"), 1421 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1422 exp.StartsWith: rename_func("STARTSWITH"), 1423 exp.EndsWith: rename_func("ENDSWITH"), 1424 exp.StrPosition: lambda self, e: strposition_sql( 1425 self, e, func_name="CHARINDEX", supports_position=True 1426 ), 1427 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1428 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1429 exp.Stuff: rename_func("INSERT"), 1430 exp.StPoint: rename_func("ST_MAKEPOINT"), 1431 exp.TimeAdd: date_delta_sql("TIMEADD"), 1432 exp.Timestamp: no_timestamp_sql, 1433 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1434 exp.TimestampDiff: lambda self, e: self.func( 1435 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1436 ), 1437 exp.TimestampTrunc: timestamptrunc_sql(), 1438 exp.TimeStrToTime: timestrtotime_sql, 1439 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1440 exp.ToArray: rename_func("TO_ARRAY"), 1441 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1442 exp.ToDouble: rename_func("TO_DOUBLE"), 1443 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1444 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1445 exp.TsOrDsToDate: lambda self, e: self.func( 1446 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1447 ), 1448 exp.TsOrDsToTime: lambda self, e: self.func( 1449 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1450 ), 1451 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1452 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1453 exp.Uuid: rename_func("UUID_STRING"), 1454 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1455 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1456 exp.Xor: rename_func("BOOLXOR"), 1457 exp.ByteLength: rename_func("OCTET_LENGTH"), 1458 } 1459 1460 SUPPORTED_JSON_PATH_PARTS = { 1461 exp.JSONPathKey, 1462 exp.JSONPathRoot, 1463 exp.JSONPathSubscript, 1464 } 1465 1466 TYPE_MAPPING = { 1467 **generator.Generator.TYPE_MAPPING, 1468 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1469 exp.DataType.Type.NESTED: "OBJECT", 1470 exp.DataType.Type.STRUCT: "OBJECT", 1471 exp.DataType.Type.TEXT: "VARCHAR", 1472 } 1473 1474 TOKEN_MAPPING = { 1475 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1476 } 1477 1478 PROPERTIES_LOCATION = { 1479 **generator.Generator.PROPERTIES_LOCATION, 1480 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1481 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1482 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1483 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1484 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1485 } 1486 1487 UNSUPPORTED_VALUES_EXPRESSIONS = { 1488 exp.Map, 1489 exp.StarMap, 1490 exp.Struct, 1491 exp.VarMap, 1492 } 1493 1494 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1495 1496 def with_properties(self, properties: exp.Properties) -> str: 1497 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1498 1499 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1500 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1501 values_as_table = False 1502 1503 return super().values_sql(expression, values_as_table=values_as_table) 1504 1505 def datatype_sql(self, expression: exp.DataType) -> str: 1506 expressions = expression.expressions 1507 if ( 1508 expressions 1509 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1510 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1511 ): 1512 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1513 return "OBJECT" 1514 1515 return super().datatype_sql(expression) 1516 1517 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1518 return self.func( 1519 "TO_NUMBER", 1520 expression.this, 1521 expression.args.get("format"), 1522 expression.args.get("precision"), 1523 expression.args.get("scale"), 1524 ) 1525 1526 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1527 milli = expression.args.get("milli") 1528 if milli is not None: 1529 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1530 expression.set("nano", milli_to_nano) 1531 1532 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1533 1534 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1535 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1536 return self.func("TO_GEOGRAPHY", expression.this) 1537 if expression.is_type(exp.DataType.Type.GEOMETRY): 1538 return self.func("TO_GEOMETRY", expression.this) 1539 1540 return super().cast_sql(expression, safe_prefix=safe_prefix) 1541 1542 def trycast_sql(self, expression: exp.TryCast) -> str: 1543 value = expression.this 1544 1545 if value.type is None: 1546 from sqlglot.optimizer.annotate_types import annotate_types 1547 1548 value = annotate_types(value, dialect=self.dialect) 1549 1550 # Snowflake requires that TRY_CAST's value be a string 1551 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1552 # if we can deduce that the value is a string, then we can generate TRY_CAST 1553 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1554 return super().trycast_sql(expression) 1555 1556 return self.cast_sql(expression) 1557 1558 def log_sql(self, expression: exp.Log) -> str: 1559 if not expression.expression: 1560 return self.func("LN", expression.this) 1561 1562 return super().log_sql(expression) 1563 1564 def unnest_sql(self, expression: exp.Unnest) -> str: 1565 unnest_alias = expression.args.get("alias") 1566 offset = expression.args.get("offset") 1567 1568 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1569 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1570 1571 columns = [ 1572 exp.to_identifier("seq"), 1573 exp.to_identifier("key"), 1574 exp.to_identifier("path"), 1575 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1576 value, 1577 exp.to_identifier("this"), 1578 ] 1579 1580 if unnest_alias: 1581 unnest_alias.set("columns", columns) 1582 else: 1583 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1584 1585 table_input = self.sql(expression.expressions[0]) 1586 if not table_input.startswith("INPUT =>"): 1587 table_input = f"INPUT => {table_input}" 1588 1589 expression_parent = expression.parent 1590 1591 explode = ( 1592 f"FLATTEN({table_input})" 1593 if isinstance(expression_parent, exp.Lateral) 1594 else f"TABLE(FLATTEN({table_input}))" 1595 ) 1596 alias = self.sql(unnest_alias) 1597 alias = f" AS {alias}" if alias else "" 1598 value = ( 1599 "" 1600 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1601 else f"{value} FROM " 1602 ) 1603 1604 return f"{value}{explode}{alias}" 1605 1606 def show_sql(self, expression: exp.Show) -> str: 1607 terse = "TERSE " if expression.args.get("terse") else "" 1608 history = " HISTORY" if expression.args.get("history") else "" 1609 like = self.sql(expression, "like") 1610 like = f" LIKE {like}" if like else "" 1611 1612 scope = self.sql(expression, "scope") 1613 scope = f" {scope}" if scope else "" 1614 1615 scope_kind = self.sql(expression, "scope_kind") 1616 if scope_kind: 1617 scope_kind = f" IN {scope_kind}" 1618 1619 starts_with = self.sql(expression, "starts_with") 1620 if starts_with: 1621 starts_with = f" STARTS WITH {starts_with}" 1622 1623 limit = self.sql(expression, "limit") 1624 1625 from_ = self.sql(expression, "from") 1626 if from_: 1627 from_ = f" FROM {from_}" 1628 1629 privileges = self.expressions(expression, key="privileges", flat=True) 1630 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1631 1632 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1633 1634 def describe_sql(self, expression: exp.Describe) -> str: 1635 # Default to table if kind is unknown 1636 kind_value = expression.args.get("kind") or "TABLE" 1637 kind = f" {kind_value}" if kind_value else "" 1638 this = f" {self.sql(expression, 'this')}" 1639 expressions = self.expressions(expression, flat=True) 1640 expressions = f" {expressions}" if expressions else "" 1641 return f"DESCRIBE{kind}{this}{expressions}" 1642 1643 def generatedasidentitycolumnconstraint_sql( 1644 self, expression: exp.GeneratedAsIdentityColumnConstraint 1645 ) -> str: 1646 start = expression.args.get("start") 1647 start = f" START {start}" if start else "" 1648 increment = expression.args.get("increment") 1649 increment = f" INCREMENT {increment}" if increment else "" 1650 1651 order = expression.args.get("order") 1652 if order is not None: 1653 order_clause = " ORDER" if order else " NOORDER" 1654 else: 1655 order_clause = "" 1656 1657 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1658 1659 def cluster_sql(self, expression: exp.Cluster) -> str: 1660 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1661 1662 def struct_sql(self, expression: exp.Struct) -> str: 1663 if len(expression.expressions) == 1: 1664 arg = expression.expressions[0] 1665 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1666 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1667 return f"{{{self.sql(expression.expressions[0])}}}" 1668 1669 keys = [] 1670 values = [] 1671 1672 for i, e in enumerate(expression.expressions): 1673 if isinstance(e, exp.PropertyEQ): 1674 keys.append( 1675 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1676 ) 1677 values.append(e.expression) 1678 else: 1679 keys.append(exp.Literal.string(f"_{i}")) 1680 values.append(e) 1681 1682 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1683 1684 @unsupported_args("weight", "accuracy") 1685 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1686 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1687 1688 def alterset_sql(self, expression: exp.AlterSet) -> str: 1689 exprs = self.expressions(expression, flat=True) 1690 exprs = f" {exprs}" if exprs else "" 1691 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1692 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1693 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1694 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1695 tag = self.expressions(expression, key="tag", flat=True) 1696 tag = f" TAG {tag}" if tag else "" 1697 1698 return f"SET{exprs}{file_format}{copy_options}{tag}" 1699 1700 def strtotime_sql(self, expression: exp.StrToTime): 1701 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1702 return self.func( 1703 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1704 ) 1705 1706 def timestampsub_sql(self, expression: exp.TimestampSub): 1707 return self.sql( 1708 exp.TimestampAdd( 1709 this=expression.this, 1710 expression=expression.expression * -1, 1711 unit=expression.unit, 1712 ) 1713 ) 1714 1715 def jsonextract_sql(self, expression: exp.JSONExtract): 1716 this = expression.this 1717 1718 # JSON strings are valid coming from other dialects such as BQ so 1719 # for these cases we PARSE_JSON preemptively 1720 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1721 "requires_json" 1722 ): 1723 this = exp.ParseJSON(this=this) 1724 1725 return self.func( 1726 "GET_PATH", 1727 this, 1728 expression.expression, 1729 ) 1730 1731 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1732 this = expression.this 1733 if this.is_string: 1734 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1735 1736 return self.func("TO_CHAR", this, self.format_time(expression)) 1737 1738 def datesub_sql(self, expression: exp.DateSub) -> str: 1739 value = expression.expression 1740 if value: 1741 value.replace(value * (-1)) 1742 else: 1743 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1744 1745 return date_delta_sql("DATEADD")(self, expression) 1746 1747 def select_sql(self, expression: exp.Select) -> str: 1748 limit = expression.args.get("limit") 1749 offset = expression.args.get("offset") 1750 if offset and not limit: 1751 expression.limit(exp.Null(), copy=False) 1752 return super().select_sql(expression) 1753 1754 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1755 is_materialized = expression.find(exp.MaterializedProperty) 1756 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1757 1758 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1759 # For materialized views, COPY GRANTS is located *before* the columns list 1760 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1761 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1762 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1763 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1764 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1765 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1766 1767 this_name = self.sql(expression.this, "this") 1768 copy_grants = self.sql(copy_grants_property) 1769 this_schema = self.schema_columns_sql(expression.this) 1770 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1771 1772 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1773 1774 return super().createable_sql(expression, locations) 1775 1776 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1777 this = expression.this 1778 1779 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1780 # and add it later as part of the WITHIN GROUP clause 1781 order = this if isinstance(this, exp.Order) else None 1782 if order: 1783 expression.set("this", order.this.pop()) 1784 1785 expr_sql = super().arrayagg_sql(expression) 1786 1787 if order: 1788 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1789 1790 return expr_sql 1791 1792 def array_sql(self, expression: exp.Array) -> str: 1793 expressions = expression.expressions 1794 1795 first_expr = seq_get(expressions, 0) 1796 if isinstance(first_expr, exp.Select): 1797 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1798 if first_expr.text("kind").upper() == "STRUCT": 1799 object_construct_args = [] 1800 for expr in first_expr.expressions: 1801 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1802 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1803 name = expr.this if isinstance(expr, exp.Alias) else expr 1804 1805 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1806 1807 array_agg = exp.ArrayAgg( 1808 this=_build_object_construct(args=object_construct_args) 1809 ) 1810 1811 first_expr.set("kind", None) 1812 first_expr.set("expressions", [array_agg]) 1813 1814 return self.sql(first_expr.subquery()) 1815 1816 return inline_array_sql(self, expression) 1817 1818 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1819 zone = self.sql(expression, "this") 1820 if not zone: 1821 return super().currentdate_sql(expression) 1822 1823 expr = exp.Cast( 1824 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1825 to=exp.DataType(this=exp.DataType.Type.DATE), 1826 ) 1827 return self.sql(expr) 1828 1829 def dot_sql(self, expression: exp.Dot) -> str: 1830 this = expression.this 1831 1832 if not this.type: 1833 from sqlglot.optimizer.annotate_types import annotate_types 1834 1835 this = annotate_types(this, dialect=self.dialect) 1836 1837 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1838 # Generate colon notation for the top level STRUCT 1839 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1840 1841 return super().dot_sql(expression) 1842 1843 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 1844 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 1845 1846 def format_sql(self, expression: exp.Format) -> str: 1847 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 1848 return self.func("TO_CHAR", expression.expressions[0]) 1849 1850 return self.function_fallback_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime formats.
632 def quote_identifier(self, expression: E, identify: bool = True) -> E: 633 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 634 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 635 if ( 636 isinstance(expression, exp.Identifier) 637 and isinstance(expression.parent, exp.Table) 638 and expression.name.lower() == "dual" 639 ): 640 return expression # type: ignore 641 642 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier, this method is a no-op. - identify: If set to
False, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n) to its unescaped version (
).
644 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 645 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 646 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
648 class Parser(parser.Parser): 649 IDENTIFY_PIVOT_STRINGS = True 650 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 651 COLON_IS_VARIANT_EXTRACT = True 652 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 653 654 ID_VAR_TOKENS = { 655 *parser.Parser.ID_VAR_TOKENS, 656 TokenType.EXCEPT, 657 TokenType.MATCH_CONDITION, 658 } 659 660 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 661 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 662 663 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 664 665 FUNCTIONS = { 666 **parser.Parser.FUNCTIONS, 667 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 668 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 669 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 670 this=seq_get(args, 1), expression=seq_get(args, 0) 671 ), 672 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 673 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 674 start=seq_get(args, 0), 675 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 676 step=seq_get(args, 2), 677 ), 678 "ARRAY_SORT": exp.SortArray.from_arg_list, 679 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 680 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 681 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 682 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 683 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 684 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 685 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 686 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 687 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 688 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 689 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 690 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 691 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 692 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 693 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 694 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 695 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 696 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 697 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 698 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 699 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 700 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 701 "DATE_TRUNC": _date_trunc_to_time, 702 "DATEADD": _build_date_time_add(exp.DateAdd), 703 "DATEDIFF": _build_datediff, 704 "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, 705 "DIV0": _build_if_from_div0, 706 "EDITDISTANCE": lambda args: exp.Levenshtein( 707 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 708 ), 709 "FLATTEN": exp.Explode.from_arg_list, 710 "GET": exp.GetExtract.from_arg_list, 711 "GET_PATH": lambda args, dialect: exp.JSONExtract( 712 this=seq_get(args, 0), 713 expression=dialect.to_json_path(seq_get(args, 1)), 714 requires_json=True, 715 ), 716 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 717 "IFF": exp.If.from_arg_list, 718 "MD5_HEX": exp.MD5.from_arg_list, 719 "MD5_BINARY": exp.MD5Digest.from_arg_list, 720 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 721 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 722 "LAST_DAY": lambda args: exp.LastDay( 723 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 724 ), 725 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 726 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 727 "NULLIFZERO": _build_if_from_nullifzero, 728 "OBJECT_CONSTRUCT": _build_object_construct, 729 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 730 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 731 "REGEXP_REPLACE": _build_regexp_replace, 732 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 733 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 734 "REPLACE": build_replace_with_optional_replacement, 735 "RLIKE": exp.RegexpLike.from_arg_list, 736 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 737 "SHA1_HEX": exp.SHA.from_arg_list, 738 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 739 "SHA2_HEX": exp.SHA2.from_arg_list, 740 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 741 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 742 "TIMEADD": _build_date_time_add(exp.TimeAdd), 743 "TIMEDIFF": _build_datediff, 744 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 745 "TIMESTAMPDIFF": _build_datediff, 746 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 747 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 748 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 749 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 750 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 751 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 752 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 753 "TRY_TO_TIMESTAMP": _build_datetime( 754 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 755 ), 756 "TO_CHAR": build_timetostr_or_tochar, 757 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 758 "TO_NUMBER": lambda args: exp.ToNumber( 759 this=seq_get(args, 0), 760 format=seq_get(args, 1), 761 precision=seq_get(args, 2), 762 scale=seq_get(args, 3), 763 ), 764 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 765 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 766 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 767 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 768 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 769 "TO_VARCHAR": build_timetostr_or_tochar, 770 "TO_JSON": exp.JSONFormat.from_arg_list, 771 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 772 "ZEROIFNULL": _build_if_from_zeroifnull, 773 "LIKE": _build_like(exp.Like), 774 "ILIKE": _build_like(exp.ILike), 775 } 776 FUNCTIONS.pop("PREDICT") 777 778 FUNCTION_PARSERS = { 779 **parser.Parser.FUNCTION_PARSERS, 780 "DATE_PART": lambda self: self._parse_date_part(), 781 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 782 "LISTAGG": lambda self: self._parse_string_agg(), 783 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 784 } 785 FUNCTION_PARSERS.pop("TRIM") 786 787 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 788 789 ALTER_PARSERS = { 790 **parser.Parser.ALTER_PARSERS, 791 "SESSION": lambda self: self._parse_alter_session(), 792 "UNSET": lambda self: self.expression( 793 exp.Set, 794 tag=self._match_text_seq("TAG"), 795 expressions=self._parse_csv(self._parse_id_var), 796 unset=True, 797 ), 798 } 799 800 STATEMENT_PARSERS = { 801 **parser.Parser.STATEMENT_PARSERS, 802 TokenType.GET: lambda self: self._parse_get(), 803 TokenType.PUT: lambda self: self._parse_put(), 804 TokenType.SHOW: lambda self: self._parse_show(), 805 } 806 807 PROPERTY_PARSERS = { 808 **parser.Parser.PROPERTY_PARSERS, 809 "CREDENTIALS": lambda self: self._parse_credentials_property(), 810 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 811 "LOCATION": lambda self: self._parse_location_property(), 812 "TAG": lambda self: self._parse_tag(), 813 "USING": lambda self: self._match_text_seq("TEMPLATE") 814 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 815 } 816 817 TYPE_CONVERTERS = { 818 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 819 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 820 } 821 822 SHOW_PARSERS = { 823 "DATABASES": _show_parser("DATABASES"), 824 "TERSE DATABASES": _show_parser("DATABASES"), 825 "SCHEMAS": _show_parser("SCHEMAS"), 826 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 827 "OBJECTS": _show_parser("OBJECTS"), 828 "TERSE OBJECTS": _show_parser("OBJECTS"), 829 "TABLES": _show_parser("TABLES"), 830 "TERSE TABLES": _show_parser("TABLES"), 831 "VIEWS": _show_parser("VIEWS"), 832 "TERSE VIEWS": _show_parser("VIEWS"), 833 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 834 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 835 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 836 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 837 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 838 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 839 "SEQUENCES": _show_parser("SEQUENCES"), 840 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 841 "STAGES": _show_parser("STAGES"), 842 "COLUMNS": _show_parser("COLUMNS"), 843 "USERS": _show_parser("USERS"), 844 "TERSE USERS": _show_parser("USERS"), 845 "FILE FORMATS": _show_parser("FILE FORMATS"), 846 "FUNCTIONS": _show_parser("FUNCTIONS"), 847 "PROCEDURES": _show_parser("PROCEDURES"), 848 "WAREHOUSES": _show_parser("WAREHOUSES"), 849 } 850 851 CONSTRAINT_PARSERS = { 852 **parser.Parser.CONSTRAINT_PARSERS, 853 "WITH": lambda self: self._parse_with_constraint(), 854 "MASKING": lambda self: self._parse_with_constraint(), 855 "PROJECTION": lambda self: self._parse_with_constraint(), 856 "TAG": lambda self: self._parse_with_constraint(), 857 } 858 859 STAGED_FILE_SINGLE_TOKENS = { 860 TokenType.DOT, 861 TokenType.MOD, 862 TokenType.SLASH, 863 } 864 865 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 866 867 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 868 869 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 870 871 LAMBDAS = { 872 **parser.Parser.LAMBDAS, 873 TokenType.ARROW: lambda self, expressions: self.expression( 874 exp.Lambda, 875 this=self._replace_lambda( 876 self._parse_assignment(), 877 expressions, 878 ), 879 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 880 ), 881 } 882 883 COLUMN_OPERATORS = { 884 **parser.Parser.COLUMN_OPERATORS, 885 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 886 exp.ModelAttribute, this=this, expression=attr 887 ), 888 } 889 890 def _parse_use(self) -> exp.Use: 891 if self._match_text_seq("SECONDARY", "ROLES"): 892 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 893 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 894 return self.expression( 895 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 896 ) 897 898 return super()._parse_use() 899 900 def _negate_range( 901 self, this: t.Optional[exp.Expression] = None 902 ) -> t.Optional[exp.Expression]: 903 if not this: 904 return this 905 906 query = this.args.get("query") 907 if isinstance(this, exp.In) and isinstance(query, exp.Query): 908 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 909 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 910 # which can produce different results (most likely a SnowFlake bug). 911 # 912 # https://docs.snowflake.com/en/sql-reference/functions/in 913 # Context: https://github.com/tobymao/sqlglot/issues/3890 914 return self.expression( 915 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 916 ) 917 918 return self.expression(exp.Not, this=this) 919 920 def _parse_tag(self) -> exp.Tags: 921 return self.expression( 922 exp.Tags, 923 expressions=self._parse_wrapped_csv(self._parse_property), 924 ) 925 926 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 927 if self._prev.token_type != TokenType.WITH: 928 self._retreat(self._index - 1) 929 930 if self._match_text_seq("MASKING", "POLICY"): 931 policy = self._parse_column() 932 return self.expression( 933 exp.MaskingPolicyColumnConstraint, 934 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 935 expressions=self._match(TokenType.USING) 936 and self._parse_wrapped_csv(self._parse_id_var), 937 ) 938 if self._match_text_seq("PROJECTION", "POLICY"): 939 policy = self._parse_column() 940 return self.expression( 941 exp.ProjectionPolicyColumnConstraint, 942 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 943 ) 944 if self._match(TokenType.TAG): 945 return self._parse_tag() 946 947 return None 948 949 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 950 if self._match(TokenType.TAG): 951 return self._parse_tag() 952 953 return super()._parse_with_property() 954 955 def _parse_create(self) -> exp.Create | exp.Command: 956 expression = super()._parse_create() 957 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 958 # Replace the Table node with the enclosed Identifier 959 expression.this.replace(expression.this.this) 960 961 return expression 962 963 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 964 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 965 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 966 this = self._parse_var() or self._parse_type() 967 968 if not this: 969 return None 970 971 self._match(TokenType.COMMA) 972 expression = self._parse_bitwise() 973 this = map_date_part(this) 974 name = this.name.upper() 975 976 if name.startswith("EPOCH"): 977 if name == "EPOCH_MILLISECOND": 978 scale = 10**3 979 elif name == "EPOCH_MICROSECOND": 980 scale = 10**6 981 elif name == "EPOCH_NANOSECOND": 982 scale = 10**9 983 else: 984 scale = None 985 986 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 987 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 988 989 if scale: 990 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 991 992 return to_unix 993 994 return self.expression(exp.Extract, this=this, expression=expression) 995 996 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 997 if is_map: 998 # Keys are strings in Snowflake's objects, see also: 999 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 1000 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 1001 return self._parse_slice(self._parse_string()) or self._parse_assignment() 1002 1003 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 1004 1005 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 1006 lateral = super()._parse_lateral() 1007 if not lateral: 1008 return lateral 1009 1010 if isinstance(lateral.this, exp.Explode): 1011 table_alias = lateral.args.get("alias") 1012 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 1013 if table_alias and not table_alias.args.get("columns"): 1014 table_alias.set("columns", columns) 1015 elif not table_alias: 1016 exp.alias_(lateral, "_flattened", table=columns, copy=False) 1017 1018 return lateral 1019 1020 def _parse_table_parts( 1021 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 1022 ) -> exp.Table: 1023 # https://docs.snowflake.com/en/user-guide/querying-stage 1024 if self._match(TokenType.STRING, advance=False): 1025 table = self._parse_string() 1026 elif self._match_text_seq("@", advance=False): 1027 table = self._parse_location_path() 1028 else: 1029 table = None 1030 1031 if table: 1032 file_format = None 1033 pattern = None 1034 1035 wrapped = self._match(TokenType.L_PAREN) 1036 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1037 if self._match_text_seq("FILE_FORMAT", "=>"): 1038 file_format = self._parse_string() or super()._parse_table_parts( 1039 is_db_reference=is_db_reference 1040 ) 1041 elif self._match_text_seq("PATTERN", "=>"): 1042 pattern = self._parse_string() 1043 else: 1044 break 1045 1046 self._match(TokenType.COMMA) 1047 1048 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1049 else: 1050 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1051 1052 return table 1053 1054 def _parse_table( 1055 self, 1056 schema: bool = False, 1057 joins: bool = False, 1058 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1059 parse_bracket: bool = False, 1060 is_db_reference: bool = False, 1061 parse_partition: bool = False, 1062 consume_pipe: bool = False, 1063 ) -> t.Optional[exp.Expression]: 1064 table = super()._parse_table( 1065 schema=schema, 1066 joins=joins, 1067 alias_tokens=alias_tokens, 1068 parse_bracket=parse_bracket, 1069 is_db_reference=is_db_reference, 1070 parse_partition=parse_partition, 1071 ) 1072 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1073 table_from_rows = table.this 1074 for arg in exp.TableFromRows.arg_types: 1075 if arg != "this": 1076 table_from_rows.set(arg, table.args.get(arg)) 1077 1078 table = table_from_rows 1079 1080 return table 1081 1082 def _parse_id_var( 1083 self, 1084 any_token: bool = True, 1085 tokens: t.Optional[t.Collection[TokenType]] = None, 1086 ) -> t.Optional[exp.Expression]: 1087 if self._match_text_seq("IDENTIFIER", "("): 1088 identifier = ( 1089 super()._parse_id_var(any_token=any_token, tokens=tokens) 1090 or self._parse_string() 1091 ) 1092 self._match_r_paren() 1093 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1094 1095 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1096 1097 def _parse_show_snowflake(self, this: str) -> exp.Show: 1098 scope = None 1099 scope_kind = None 1100 1101 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1102 # which is syntactically valid but has no effect on the output 1103 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1104 1105 history = self._match_text_seq("HISTORY") 1106 1107 like = self._parse_string() if self._match(TokenType.LIKE) else None 1108 1109 if self._match(TokenType.IN): 1110 if self._match_text_seq("ACCOUNT"): 1111 scope_kind = "ACCOUNT" 1112 elif self._match_text_seq("CLASS"): 1113 scope_kind = "CLASS" 1114 scope = self._parse_table_parts() 1115 elif self._match_text_seq("APPLICATION"): 1116 scope_kind = "APPLICATION" 1117 if self._match_text_seq("PACKAGE"): 1118 scope_kind += " PACKAGE" 1119 scope = self._parse_table_parts() 1120 elif self._match_set(self.DB_CREATABLES): 1121 scope_kind = self._prev.text.upper() 1122 if self._curr: 1123 scope = self._parse_table_parts() 1124 elif self._curr: 1125 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1126 scope = self._parse_table_parts() 1127 1128 return self.expression( 1129 exp.Show, 1130 **{ 1131 "terse": terse, 1132 "this": this, 1133 "history": history, 1134 "like": like, 1135 "scope": scope, 1136 "scope_kind": scope_kind, 1137 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1138 "limit": self._parse_limit(), 1139 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1140 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1141 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1142 }, 1143 ) 1144 1145 def _parse_put(self) -> exp.Put | exp.Command: 1146 if self._curr.token_type != TokenType.STRING: 1147 return self._parse_as_command(self._prev) 1148 1149 return self.expression( 1150 exp.Put, 1151 this=self._parse_string(), 1152 target=self._parse_location_path(), 1153 properties=self._parse_properties(), 1154 ) 1155 1156 def _parse_get(self) -> t.Optional[exp.Expression]: 1157 start = self._prev 1158 1159 # If we detect GET( then we need to parse a function, not a statement 1160 if self._match(TokenType.L_PAREN): 1161 self._retreat(self._index - 2) 1162 return self._parse_expression() 1163 1164 target = self._parse_location_path() 1165 1166 # Parse as command if unquoted file path 1167 if self._curr.token_type == TokenType.URI_START: 1168 return self._parse_as_command(start) 1169 1170 return self.expression( 1171 exp.Get, 1172 this=self._parse_string(), 1173 target=target, 1174 properties=self._parse_properties(), 1175 ) 1176 1177 def _parse_location_property(self) -> exp.LocationProperty: 1178 self._match(TokenType.EQ) 1179 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1180 1181 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1182 # Parse either a subquery or a staged file 1183 return ( 1184 self._parse_select(table=True, parse_subquery_alias=False) 1185 if self._match(TokenType.L_PAREN, advance=False) 1186 else self._parse_table_parts() 1187 ) 1188 1189 def _parse_location_path(self) -> exp.Var: 1190 start = self._curr 1191 self._advance_any(ignore_reserved=True) 1192 1193 # We avoid consuming a comma token because external tables like @foo and @bar 1194 # can be joined in a query with a comma separator, as well as closing paren 1195 # in case of subqueries 1196 while self._is_connected() and not self._match_set( 1197 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1198 ): 1199 self._advance_any(ignore_reserved=True) 1200 1201 return exp.var(self._find_sql(start, self._prev)) 1202 1203 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1204 this = super()._parse_lambda_arg() 1205 1206 if not this: 1207 return this 1208 1209 typ = self._parse_types() 1210 1211 if typ: 1212 return self.expression(exp.Cast, this=this, to=typ) 1213 1214 return this 1215 1216 def _parse_foreign_key(self) -> exp.ForeignKey: 1217 # inlineFK, the REFERENCES columns are implied 1218 if self._match(TokenType.REFERENCES, advance=False): 1219 return self.expression(exp.ForeignKey) 1220 1221 # outoflineFK, explicitly names the columns 1222 return super()._parse_foreign_key() 1223 1224 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1225 self._match(TokenType.EQ) 1226 if self._match(TokenType.L_PAREN, advance=False): 1227 expressions = self._parse_wrapped_options() 1228 else: 1229 expressions = [self._parse_format_name()] 1230 1231 return self.expression( 1232 exp.FileFormatProperty, 1233 expressions=expressions, 1234 ) 1235 1236 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1237 return self.expression( 1238 exp.CredentialsProperty, 1239 expressions=self._parse_wrapped_options(), 1240 ) 1241 1242 def _parse_semantic_view(self) -> exp.SemanticView: 1243 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1244 1245 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1246 if self._match_text_seq("DIMENSIONS"): 1247 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1248 if self._match_text_seq("METRICS"): 1249 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1250 if self._match_text_seq("WHERE"): 1251 kwargs["where"] = self._parse_expression() 1252 1253 return self.expression(exp.SemanticView, **kwargs)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1255 class Tokenizer(tokens.Tokenizer): 1256 STRING_ESCAPES = ["\\", "'"] 1257 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1258 RAW_STRINGS = ["$$"] 1259 COMMENTS = ["--", "//", ("/*", "*/")] 1260 NESTED_COMMENTS = False 1261 1262 KEYWORDS = { 1263 **tokens.Tokenizer.KEYWORDS, 1264 "BYTEINT": TokenType.INT, 1265 "FILE://": TokenType.URI_START, 1266 "FILE FORMAT": TokenType.FILE_FORMAT, 1267 "GET": TokenType.GET, 1268 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1269 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1270 "MINUS": TokenType.EXCEPT, 1271 "NCHAR VARYING": TokenType.VARCHAR, 1272 "PUT": TokenType.PUT, 1273 "REMOVE": TokenType.COMMAND, 1274 "RM": TokenType.COMMAND, 1275 "SAMPLE": TokenType.TABLE_SAMPLE, 1276 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1277 "SQL_DOUBLE": TokenType.DOUBLE, 1278 "SQL_VARCHAR": TokenType.VARCHAR, 1279 "STAGE": TokenType.STAGE, 1280 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1281 "STREAMLIT": TokenType.STREAMLIT, 1282 "TAG": TokenType.TAG, 1283 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1284 "TOP": TokenType.TOP, 1285 "WAREHOUSE": TokenType.WAREHOUSE, 1286 } 1287 KEYWORDS.pop("/*+") 1288 1289 SINGLE_TOKENS = { 1290 **tokens.Tokenizer.SINGLE_TOKENS, 1291 "$": TokenType.PARAMETER, 1292 "!": TokenType.EXCLAMATION, 1293 } 1294 1295 VAR_SINGLE_TOKENS = {"$"} 1296 1297 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1299 class Generator(generator.Generator): 1300 PARAMETER_TOKEN = "$" 1301 MATCHED_BY_SOURCE = False 1302 SINGLE_STRING_INTERVAL = True 1303 JOIN_HINTS = False 1304 TABLE_HINTS = False 1305 QUERY_HINTS = False 1306 AGGREGATE_FILTER_SUPPORTED = False 1307 SUPPORTS_TABLE_COPY = False 1308 COLLATE_IS_FUNC = True 1309 LIMIT_ONLY_LITERALS = True 1310 JSON_KEY_VALUE_PAIR_SEP = "," 1311 INSERT_OVERWRITE = " OVERWRITE INTO" 1312 STRUCT_DELIMITER = ("(", ")") 1313 COPY_PARAMS_ARE_WRAPPED = False 1314 COPY_PARAMS_EQ_REQUIRED = True 1315 STAR_EXCEPT = "EXCLUDE" 1316 SUPPORTS_EXPLODING_PROJECTIONS = False 1317 ARRAY_CONCAT_IS_VAR_LEN = False 1318 SUPPORTS_CONVERT_TIMEZONE = True 1319 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1320 SUPPORTS_MEDIAN = True 1321 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1322 SUPPORTS_DECODE_CASE = True 1323 IS_BOOL_ALLOWED = False 1324 1325 TRANSFORMS = { 1326 **generator.Generator.TRANSFORMS, 1327 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1328 exp.ArgMax: rename_func("MAX_BY"), 1329 exp.ArgMin: rename_func("MIN_BY"), 1330 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1331 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1332 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1333 exp.AtTimeZone: lambda self, e: self.func( 1334 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1335 ), 1336 exp.BitwiseOr: rename_func("BITOR"), 1337 exp.BitwiseXor: rename_func("BITXOR"), 1338 exp.BitwiseAnd: rename_func("BITAND"), 1339 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1340 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1341 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1342 exp.BitwiseNot: rename_func("BITNOT"), 1343 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1344 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1345 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1346 exp.DateAdd: date_delta_sql("DATEADD"), 1347 exp.DateDiff: date_delta_sql("DATEDIFF"), 1348 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1349 exp.DatetimeDiff: timestampdiff_sql, 1350 exp.DateStrToDate: datestrtodate_sql, 1351 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1352 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1353 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1354 exp.DayOfYear: rename_func("DAYOFYEAR"), 1355 exp.Explode: rename_func("FLATTEN"), 1356 exp.Extract: lambda self, e: self.func( 1357 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1358 ), 1359 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1360 exp.FileFormatProperty: lambda self, 1361 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1362 exp.FromTimeZone: lambda self, e: self.func( 1363 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1364 ), 1365 exp.GenerateSeries: lambda self, e: self.func( 1366 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1367 ), 1368 exp.GetExtract: rename_func("GET"), 1369 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1370 exp.If: if_sql(name="IFF", false_value="NULL"), 1371 exp.JSONExtractArray: _json_extract_value_array_sql, 1372 exp.JSONExtractScalar: lambda self, e: self.func( 1373 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1374 ), 1375 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1376 exp.JSONPathRoot: lambda *_: "", 1377 exp.JSONValueArray: _json_extract_value_array_sql, 1378 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1379 rename_func("EDITDISTANCE") 1380 ), 1381 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1382 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1383 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1384 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1385 exp.MakeInterval: no_make_interval_sql, 1386 exp.Max: max_or_greatest, 1387 exp.Min: min_or_least, 1388 exp.ParseJSON: lambda self, e: self.func( 1389 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1390 ), 1391 exp.JSONFormat: rename_func("TO_JSON"), 1392 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1393 exp.PercentileCont: transforms.preprocess( 1394 [transforms.add_within_group_for_percentiles] 1395 ), 1396 exp.PercentileDisc: transforms.preprocess( 1397 [transforms.add_within_group_for_percentiles] 1398 ), 1399 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1400 exp.RegexpExtract: _regexpextract_sql, 1401 exp.RegexpExtractAll: _regexpextract_sql, 1402 exp.RegexpILike: _regexpilike_sql, 1403 exp.Rand: rename_func("RANDOM"), 1404 exp.Select: transforms.preprocess( 1405 [ 1406 transforms.eliminate_window_clause, 1407 transforms.eliminate_distinct_on, 1408 transforms.explode_projection_to_unnest(), 1409 transforms.eliminate_semi_and_anti_joins, 1410 _transform_generate_date_array, 1411 _qualify_unnested_columns, 1412 _eliminate_dot_variant_lookup, 1413 ] 1414 ), 1415 exp.SHA: rename_func("SHA1"), 1416 exp.MD5Digest: rename_func("MD5_BINARY"), 1417 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1418 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1419 exp.LowerHex: rename_func("TO_CHAR"), 1420 exp.SortArray: rename_func("ARRAY_SORT"), 1421 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1422 exp.StartsWith: rename_func("STARTSWITH"), 1423 exp.EndsWith: rename_func("ENDSWITH"), 1424 exp.StrPosition: lambda self, e: strposition_sql( 1425 self, e, func_name="CHARINDEX", supports_position=True 1426 ), 1427 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1428 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1429 exp.Stuff: rename_func("INSERT"), 1430 exp.StPoint: rename_func("ST_MAKEPOINT"), 1431 exp.TimeAdd: date_delta_sql("TIMEADD"), 1432 exp.Timestamp: no_timestamp_sql, 1433 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1434 exp.TimestampDiff: lambda self, e: self.func( 1435 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1436 ), 1437 exp.TimestampTrunc: timestamptrunc_sql(), 1438 exp.TimeStrToTime: timestrtotime_sql, 1439 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1440 exp.ToArray: rename_func("TO_ARRAY"), 1441 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1442 exp.ToDouble: rename_func("TO_DOUBLE"), 1443 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1444 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1445 exp.TsOrDsToDate: lambda self, e: self.func( 1446 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1447 ), 1448 exp.TsOrDsToTime: lambda self, e: self.func( 1449 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1450 ), 1451 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1452 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1453 exp.Uuid: rename_func("UUID_STRING"), 1454 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1455 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1456 exp.Xor: rename_func("BOOLXOR"), 1457 exp.ByteLength: rename_func("OCTET_LENGTH"), 1458 } 1459 1460 SUPPORTED_JSON_PATH_PARTS = { 1461 exp.JSONPathKey, 1462 exp.JSONPathRoot, 1463 exp.JSONPathSubscript, 1464 } 1465 1466 TYPE_MAPPING = { 1467 **generator.Generator.TYPE_MAPPING, 1468 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1469 exp.DataType.Type.NESTED: "OBJECT", 1470 exp.DataType.Type.STRUCT: "OBJECT", 1471 exp.DataType.Type.TEXT: "VARCHAR", 1472 } 1473 1474 TOKEN_MAPPING = { 1475 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1476 } 1477 1478 PROPERTIES_LOCATION = { 1479 **generator.Generator.PROPERTIES_LOCATION, 1480 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1481 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1482 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1483 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1484 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1485 } 1486 1487 UNSUPPORTED_VALUES_EXPRESSIONS = { 1488 exp.Map, 1489 exp.StarMap, 1490 exp.Struct, 1491 exp.VarMap, 1492 } 1493 1494 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1495 1496 def with_properties(self, properties: exp.Properties) -> str: 1497 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1498 1499 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1500 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1501 values_as_table = False 1502 1503 return super().values_sql(expression, values_as_table=values_as_table) 1504 1505 def datatype_sql(self, expression: exp.DataType) -> str: 1506 expressions = expression.expressions 1507 if ( 1508 expressions 1509 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1510 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1511 ): 1512 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1513 return "OBJECT" 1514 1515 return super().datatype_sql(expression) 1516 1517 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1518 return self.func( 1519 "TO_NUMBER", 1520 expression.this, 1521 expression.args.get("format"), 1522 expression.args.get("precision"), 1523 expression.args.get("scale"), 1524 ) 1525 1526 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1527 milli = expression.args.get("milli") 1528 if milli is not None: 1529 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1530 expression.set("nano", milli_to_nano) 1531 1532 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1533 1534 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1535 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1536 return self.func("TO_GEOGRAPHY", expression.this) 1537 if expression.is_type(exp.DataType.Type.GEOMETRY): 1538 return self.func("TO_GEOMETRY", expression.this) 1539 1540 return super().cast_sql(expression, safe_prefix=safe_prefix) 1541 1542 def trycast_sql(self, expression: exp.TryCast) -> str: 1543 value = expression.this 1544 1545 if value.type is None: 1546 from sqlglot.optimizer.annotate_types import annotate_types 1547 1548 value = annotate_types(value, dialect=self.dialect) 1549 1550 # Snowflake requires that TRY_CAST's value be a string 1551 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1552 # if we can deduce that the value is a string, then we can generate TRY_CAST 1553 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1554 return super().trycast_sql(expression) 1555 1556 return self.cast_sql(expression) 1557 1558 def log_sql(self, expression: exp.Log) -> str: 1559 if not expression.expression: 1560 return self.func("LN", expression.this) 1561 1562 return super().log_sql(expression) 1563 1564 def unnest_sql(self, expression: exp.Unnest) -> str: 1565 unnest_alias = expression.args.get("alias") 1566 offset = expression.args.get("offset") 1567 1568 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1569 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1570 1571 columns = [ 1572 exp.to_identifier("seq"), 1573 exp.to_identifier("key"), 1574 exp.to_identifier("path"), 1575 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1576 value, 1577 exp.to_identifier("this"), 1578 ] 1579 1580 if unnest_alias: 1581 unnest_alias.set("columns", columns) 1582 else: 1583 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1584 1585 table_input = self.sql(expression.expressions[0]) 1586 if not table_input.startswith("INPUT =>"): 1587 table_input = f"INPUT => {table_input}" 1588 1589 expression_parent = expression.parent 1590 1591 explode = ( 1592 f"FLATTEN({table_input})" 1593 if isinstance(expression_parent, exp.Lateral) 1594 else f"TABLE(FLATTEN({table_input}))" 1595 ) 1596 alias = self.sql(unnest_alias) 1597 alias = f" AS {alias}" if alias else "" 1598 value = ( 1599 "" 1600 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1601 else f"{value} FROM " 1602 ) 1603 1604 return f"{value}{explode}{alias}" 1605 1606 def show_sql(self, expression: exp.Show) -> str: 1607 terse = "TERSE " if expression.args.get("terse") else "" 1608 history = " HISTORY" if expression.args.get("history") else "" 1609 like = self.sql(expression, "like") 1610 like = f" LIKE {like}" if like else "" 1611 1612 scope = self.sql(expression, "scope") 1613 scope = f" {scope}" if scope else "" 1614 1615 scope_kind = self.sql(expression, "scope_kind") 1616 if scope_kind: 1617 scope_kind = f" IN {scope_kind}" 1618 1619 starts_with = self.sql(expression, "starts_with") 1620 if starts_with: 1621 starts_with = f" STARTS WITH {starts_with}" 1622 1623 limit = self.sql(expression, "limit") 1624 1625 from_ = self.sql(expression, "from") 1626 if from_: 1627 from_ = f" FROM {from_}" 1628 1629 privileges = self.expressions(expression, key="privileges", flat=True) 1630 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1631 1632 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1633 1634 def describe_sql(self, expression: exp.Describe) -> str: 1635 # Default to table if kind is unknown 1636 kind_value = expression.args.get("kind") or "TABLE" 1637 kind = f" {kind_value}" if kind_value else "" 1638 this = f" {self.sql(expression, 'this')}" 1639 expressions = self.expressions(expression, flat=True) 1640 expressions = f" {expressions}" if expressions else "" 1641 return f"DESCRIBE{kind}{this}{expressions}" 1642 1643 def generatedasidentitycolumnconstraint_sql( 1644 self, expression: exp.GeneratedAsIdentityColumnConstraint 1645 ) -> str: 1646 start = expression.args.get("start") 1647 start = f" START {start}" if start else "" 1648 increment = expression.args.get("increment") 1649 increment = f" INCREMENT {increment}" if increment else "" 1650 1651 order = expression.args.get("order") 1652 if order is not None: 1653 order_clause = " ORDER" if order else " NOORDER" 1654 else: 1655 order_clause = "" 1656 1657 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1658 1659 def cluster_sql(self, expression: exp.Cluster) -> str: 1660 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1661 1662 def struct_sql(self, expression: exp.Struct) -> str: 1663 if len(expression.expressions) == 1: 1664 arg = expression.expressions[0] 1665 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1666 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1667 return f"{{{self.sql(expression.expressions[0])}}}" 1668 1669 keys = [] 1670 values = [] 1671 1672 for i, e in enumerate(expression.expressions): 1673 if isinstance(e, exp.PropertyEQ): 1674 keys.append( 1675 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1676 ) 1677 values.append(e.expression) 1678 else: 1679 keys.append(exp.Literal.string(f"_{i}")) 1680 values.append(e) 1681 1682 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1683 1684 @unsupported_args("weight", "accuracy") 1685 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1686 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1687 1688 def alterset_sql(self, expression: exp.AlterSet) -> str: 1689 exprs = self.expressions(expression, flat=True) 1690 exprs = f" {exprs}" if exprs else "" 1691 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1692 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1693 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1694 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1695 tag = self.expressions(expression, key="tag", flat=True) 1696 tag = f" TAG {tag}" if tag else "" 1697 1698 return f"SET{exprs}{file_format}{copy_options}{tag}" 1699 1700 def strtotime_sql(self, expression: exp.StrToTime): 1701 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1702 return self.func( 1703 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1704 ) 1705 1706 def timestampsub_sql(self, expression: exp.TimestampSub): 1707 return self.sql( 1708 exp.TimestampAdd( 1709 this=expression.this, 1710 expression=expression.expression * -1, 1711 unit=expression.unit, 1712 ) 1713 ) 1714 1715 def jsonextract_sql(self, expression: exp.JSONExtract): 1716 this = expression.this 1717 1718 # JSON strings are valid coming from other dialects such as BQ so 1719 # for these cases we PARSE_JSON preemptively 1720 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1721 "requires_json" 1722 ): 1723 this = exp.ParseJSON(this=this) 1724 1725 return self.func( 1726 "GET_PATH", 1727 this, 1728 expression.expression, 1729 ) 1730 1731 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1732 this = expression.this 1733 if this.is_string: 1734 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1735 1736 return self.func("TO_CHAR", this, self.format_time(expression)) 1737 1738 def datesub_sql(self, expression: exp.DateSub) -> str: 1739 value = expression.expression 1740 if value: 1741 value.replace(value * (-1)) 1742 else: 1743 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1744 1745 return date_delta_sql("DATEADD")(self, expression) 1746 1747 def select_sql(self, expression: exp.Select) -> str: 1748 limit = expression.args.get("limit") 1749 offset = expression.args.get("offset") 1750 if offset and not limit: 1751 expression.limit(exp.Null(), copy=False) 1752 return super().select_sql(expression) 1753 1754 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1755 is_materialized = expression.find(exp.MaterializedProperty) 1756 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1757 1758 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1759 # For materialized views, COPY GRANTS is located *before* the columns list 1760 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1761 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1762 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1763 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1764 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1765 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1766 1767 this_name = self.sql(expression.this, "this") 1768 copy_grants = self.sql(copy_grants_property) 1769 this_schema = self.schema_columns_sql(expression.this) 1770 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1771 1772 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1773 1774 return super().createable_sql(expression, locations) 1775 1776 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1777 this = expression.this 1778 1779 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1780 # and add it later as part of the WITHIN GROUP clause 1781 order = this if isinstance(this, exp.Order) else None 1782 if order: 1783 expression.set("this", order.this.pop()) 1784 1785 expr_sql = super().arrayagg_sql(expression) 1786 1787 if order: 1788 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1789 1790 return expr_sql 1791 1792 def array_sql(self, expression: exp.Array) -> str: 1793 expressions = expression.expressions 1794 1795 first_expr = seq_get(expressions, 0) 1796 if isinstance(first_expr, exp.Select): 1797 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1798 if first_expr.text("kind").upper() == "STRUCT": 1799 object_construct_args = [] 1800 for expr in first_expr.expressions: 1801 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1802 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1803 name = expr.this if isinstance(expr, exp.Alias) else expr 1804 1805 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1806 1807 array_agg = exp.ArrayAgg( 1808 this=_build_object_construct(args=object_construct_args) 1809 ) 1810 1811 first_expr.set("kind", None) 1812 first_expr.set("expressions", [array_agg]) 1813 1814 return self.sql(first_expr.subquery()) 1815 1816 return inline_array_sql(self, expression) 1817 1818 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1819 zone = self.sql(expression, "this") 1820 if not zone: 1821 return super().currentdate_sql(expression) 1822 1823 expr = exp.Cast( 1824 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1825 to=exp.DataType(this=exp.DataType.Type.DATE), 1826 ) 1827 return self.sql(expr) 1828 1829 def dot_sql(self, expression: exp.Dot) -> str: 1830 this = expression.this 1831 1832 if not this.type: 1833 from sqlglot.optimizer.annotate_types import annotate_types 1834 1835 this = annotate_types(this, dialect=self.dialect) 1836 1837 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1838 # Generate colon notation for the top level STRUCT 1839 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1840 1841 return super().dot_sql(expression) 1842 1843 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 1844 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" 1845 1846 def format_sql(self, expression: exp.Format) -> str: 1847 if expression.name.lower() == "%s" and len(expression.expressions) == 1: 1848 return self.func("TO_CHAR", expression.expressions[0]) 1849 1850 return self.function_fallback_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1505 def datatype_sql(self, expression: exp.DataType) -> str: 1506 expressions = expression.expressions 1507 if ( 1508 expressions 1509 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1510 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1511 ): 1512 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1513 return "OBJECT" 1514 1515 return super().datatype_sql(expression)
1526 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1527 milli = expression.args.get("milli") 1528 if milli is not None: 1529 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1530 expression.set("nano", milli_to_nano) 1531 1532 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1534 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1535 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1536 return self.func("TO_GEOGRAPHY", expression.this) 1537 if expression.is_type(exp.DataType.Type.GEOMETRY): 1538 return self.func("TO_GEOMETRY", expression.this) 1539 1540 return super().cast_sql(expression, safe_prefix=safe_prefix)
1542 def trycast_sql(self, expression: exp.TryCast) -> str: 1543 value = expression.this 1544 1545 if value.type is None: 1546 from sqlglot.optimizer.annotate_types import annotate_types 1547 1548 value = annotate_types(value, dialect=self.dialect) 1549 1550 # Snowflake requires that TRY_CAST's value be a string 1551 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1552 # if we can deduce that the value is a string, then we can generate TRY_CAST 1553 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1554 return super().trycast_sql(expression) 1555 1556 return self.cast_sql(expression)
1564 def unnest_sql(self, expression: exp.Unnest) -> str: 1565 unnest_alias = expression.args.get("alias") 1566 offset = expression.args.get("offset") 1567 1568 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1569 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1570 1571 columns = [ 1572 exp.to_identifier("seq"), 1573 exp.to_identifier("key"), 1574 exp.to_identifier("path"), 1575 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1576 value, 1577 exp.to_identifier("this"), 1578 ] 1579 1580 if unnest_alias: 1581 unnest_alias.set("columns", columns) 1582 else: 1583 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1584 1585 table_input = self.sql(expression.expressions[0]) 1586 if not table_input.startswith("INPUT =>"): 1587 table_input = f"INPUT => {table_input}" 1588 1589 expression_parent = expression.parent 1590 1591 explode = ( 1592 f"FLATTEN({table_input})" 1593 if isinstance(expression_parent, exp.Lateral) 1594 else f"TABLE(FLATTEN({table_input}))" 1595 ) 1596 alias = self.sql(unnest_alias) 1597 alias = f" AS {alias}" if alias else "" 1598 value = ( 1599 "" 1600 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1601 else f"{value} FROM " 1602 ) 1603 1604 return f"{value}{explode}{alias}"
1606 def show_sql(self, expression: exp.Show) -> str: 1607 terse = "TERSE " if expression.args.get("terse") else "" 1608 history = " HISTORY" if expression.args.get("history") else "" 1609 like = self.sql(expression, "like") 1610 like = f" LIKE {like}" if like else "" 1611 1612 scope = self.sql(expression, "scope") 1613 scope = f" {scope}" if scope else "" 1614 1615 scope_kind = self.sql(expression, "scope_kind") 1616 if scope_kind: 1617 scope_kind = f" IN {scope_kind}" 1618 1619 starts_with = self.sql(expression, "starts_with") 1620 if starts_with: 1621 starts_with = f" STARTS WITH {starts_with}" 1622 1623 limit = self.sql(expression, "limit") 1624 1625 from_ = self.sql(expression, "from") 1626 if from_: 1627 from_ = f" FROM {from_}" 1628 1629 privileges = self.expressions(expression, key="privileges", flat=True) 1630 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1631 1632 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1634 def describe_sql(self, expression: exp.Describe) -> str: 1635 # Default to table if kind is unknown 1636 kind_value = expression.args.get("kind") or "TABLE" 1637 kind = f" {kind_value}" if kind_value else "" 1638 this = f" {self.sql(expression, 'this')}" 1639 expressions = self.expressions(expression, flat=True) 1640 expressions = f" {expressions}" if expressions else "" 1641 return f"DESCRIBE{kind}{this}{expressions}"
1643 def generatedasidentitycolumnconstraint_sql( 1644 self, expression: exp.GeneratedAsIdentityColumnConstraint 1645 ) -> str: 1646 start = expression.args.get("start") 1647 start = f" START {start}" if start else "" 1648 increment = expression.args.get("increment") 1649 increment = f" INCREMENT {increment}" if increment else "" 1650 1651 order = expression.args.get("order") 1652 if order is not None: 1653 order_clause = " ORDER" if order else " NOORDER" 1654 else: 1655 order_clause = "" 1656 1657 return f"AUTOINCREMENT{start}{increment}{order_clause}"
1662 def struct_sql(self, expression: exp.Struct) -> str: 1663 if len(expression.expressions) == 1: 1664 arg = expression.expressions[0] 1665 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1666 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1667 return f"{{{self.sql(expression.expressions[0])}}}" 1668 1669 keys = [] 1670 values = [] 1671 1672 for i, e in enumerate(expression.expressions): 1673 if isinstance(e, exp.PropertyEQ): 1674 keys.append( 1675 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1676 ) 1677 values.append(e.expression) 1678 else: 1679 keys.append(exp.Literal.string(f"_{i}")) 1680 values.append(e) 1681 1682 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1688 def alterset_sql(self, expression: exp.AlterSet) -> str: 1689 exprs = self.expressions(expression, flat=True) 1690 exprs = f" {exprs}" if exprs else "" 1691 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1692 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1693 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1694 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1695 tag = self.expressions(expression, key="tag", flat=True) 1696 tag = f" TAG {tag}" if tag else "" 1697 1698 return f"SET{exprs}{file_format}{copy_options}{tag}"
1715 def jsonextract_sql(self, expression: exp.JSONExtract): 1716 this = expression.this 1717 1718 # JSON strings are valid coming from other dialects such as BQ so 1719 # for these cases we PARSE_JSON preemptively 1720 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1721 "requires_json" 1722 ): 1723 this = exp.ParseJSON(this=this) 1724 1725 return self.func( 1726 "GET_PATH", 1727 this, 1728 expression.expression, 1729 )
1738 def datesub_sql(self, expression: exp.DateSub) -> str: 1739 value = expression.expression 1740 if value: 1741 value.replace(value * (-1)) 1742 else: 1743 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1744 1745 return date_delta_sql("DATEADD")(self, expression)
1754 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1755 is_materialized = expression.find(exp.MaterializedProperty) 1756 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1757 1758 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1759 # For materialized views, COPY GRANTS is located *before* the columns list 1760 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1761 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1762 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1763 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1764 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1765 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1766 1767 this_name = self.sql(expression.this, "this") 1768 copy_grants = self.sql(copy_grants_property) 1769 this_schema = self.schema_columns_sql(expression.this) 1770 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1771 1772 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1773 1774 return super().createable_sql(expression, locations)
1776 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1777 this = expression.this 1778 1779 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1780 # and add it later as part of the WITHIN GROUP clause 1781 order = this if isinstance(this, exp.Order) else None 1782 if order: 1783 expression.set("this", order.this.pop()) 1784 1785 expr_sql = super().arrayagg_sql(expression) 1786 1787 if order: 1788 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1789 1790 return expr_sql
1792 def array_sql(self, expression: exp.Array) -> str: 1793 expressions = expression.expressions 1794 1795 first_expr = seq_get(expressions, 0) 1796 if isinstance(first_expr, exp.Select): 1797 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1798 if first_expr.text("kind").upper() == "STRUCT": 1799 object_construct_args = [] 1800 for expr in first_expr.expressions: 1801 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1802 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1803 name = expr.this if isinstance(expr, exp.Alias) else expr 1804 1805 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1806 1807 array_agg = exp.ArrayAgg( 1808 this=_build_object_construct(args=object_construct_args) 1809 ) 1810 1811 first_expr.set("kind", None) 1812 first_expr.set("expressions", [array_agg]) 1813 1814 return self.sql(first_expr.subquery()) 1815 1816 return inline_array_sql(self, expression)
1818 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1819 zone = self.sql(expression, "this") 1820 if not zone: 1821 return super().currentdate_sql(expression) 1822 1823 expr = exp.Cast( 1824 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1825 to=exp.DataType(this=exp.DataType.Type.DATE), 1826 ) 1827 return self.sql(expr)
1829 def dot_sql(self, expression: exp.Dot) -> str: 1830 this = expression.this 1831 1832 if not this.type: 1833 from sqlglot.optimizer.annotate_types import annotate_types 1834 1835 this = annotate_types(this, dialect=self.dialect) 1836 1837 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1838 # Generate colon notation for the top level STRUCT 1839 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1840 1841 return super().dot_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- UNICODE_SUBSTITUTE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql